aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-05-02 14:23:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-05-02 14:23:14 -0400
commitb66e1f11ebc429569a3784aaf64123633d9e3ed1 (patch)
treed49f96acc682aaf29416921428110da5fd78fea4
parent1be1d6b7f3f6e3a87f872dd5e7a867d03d8a6851 (diff)
parent5c598b3428c372a1209597cee99a70da20625876 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: [PATCH] fix sysctl_nr_open bugs [PATCH] sanitize anon_inode_getfd() [PATCH] split linux/file.h [PATCH] make osf_select() use core_sys_select() [PATCH] remove horrors with irix tty ioctls handling [PATCH] fix file and descriptor handling in perfmon
-rw-r--r--arch/alpha/kernel/osf_sys.c69
-rw-r--r--arch/mips/kernel/irixioctl.c55
-rw-r--r--arch/mips/kernel/kspd.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c1
-rw-r--r--drivers/char/tty_audit.c1
-rw-r--r--drivers/char/tty_io.c1
-rw-r--r--fs/anon_inodes.c13
-rw-r--r--fs/compat.c1
-rw-r--r--fs/dnotify.c2
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/exec.c1
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/locks.c1
-rw-r--r--fs/open.c1
-rw-r--r--fs/proc/array.c1
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/select.c3
-rw-r--r--fs/signalfd.c17
-rw-r--r--fs/timerfd.c11
-rw-r--r--include/linux/anon_inodes.h3
-rw-r--r--include/linux/fdtable.h99
-rw-r--r--include/linux/file.h86
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/poll.h2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kmod.c1
-rw-r--r--security/selinux/hooks.c1
-rw-r--r--virt/kvm/kvm_main.c21
32 files changed, 186 insertions, 274 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9fee37e2596f..32ca1b927307 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -981,27 +981,18 @@ asmlinkage int
981osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, 981osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
982 struct timeval32 __user *tvp) 982 struct timeval32 __user *tvp)
983{ 983{
984 fd_set_bits fds; 984 s64 timeout = MAX_SCHEDULE_TIMEOUT;
985 char *bits;
986 size_t size;
987 long timeout;
988 int ret = -EINVAL;
989 struct fdtable *fdt;
990 int max_fds;
991
992 timeout = MAX_SCHEDULE_TIMEOUT;
993 if (tvp) { 985 if (tvp) {
994 time_t sec, usec; 986 time_t sec, usec;
995 987
996 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) 988 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
997 || __get_user(sec, &tvp->tv_sec) 989 || __get_user(sec, &tvp->tv_sec)
998 || __get_user(usec, &tvp->tv_usec)) { 990 || __get_user(usec, &tvp->tv_usec)) {
999 ret = -EFAULT; 991 return -EFAULT;
1000 goto out_nofds;
1001 } 992 }
1002 993
1003 if (sec < 0 || usec < 0) 994 if (sec < 0 || usec < 0)
1004 goto out_nofds; 995 return -EINVAL;
1005 996
1006 if ((unsigned long) sec < MAX_SELECT_SECONDS) { 997 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
1007 timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); 998 timeout = (usec + 1000000/HZ - 1) / (1000000/HZ);
@@ -1009,60 +1000,8 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
1009 } 1000 }
1010 } 1001 }
1011 1002
1012 rcu_read_lock();
1013 fdt = files_fdtable(current->files);
1014 max_fds = fdt->max_fds;
1015 rcu_read_unlock();
1016 if (n < 0 || n > max_fds)
1017 goto out_nofds;
1018
1019 /*
1020 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
1021 * since we used fdset we need to allocate memory in units of
1022 * long-words.
1023 */
1024 ret = -ENOMEM;
1025 size = FDS_BYTES(n);
1026 bits = kmalloc(6 * size, GFP_KERNEL);
1027 if (!bits)
1028 goto out_nofds;
1029 fds.in = (unsigned long *) bits;
1030 fds.out = (unsigned long *) (bits + size);
1031 fds.ex = (unsigned long *) (bits + 2*size);
1032 fds.res_in = (unsigned long *) (bits + 3*size);
1033 fds.res_out = (unsigned long *) (bits + 4*size);
1034 fds.res_ex = (unsigned long *) (bits + 5*size);
1035
1036 if ((ret = get_fd_set(n, inp->fds_bits, fds.in)) ||
1037 (ret = get_fd_set(n, outp->fds_bits, fds.out)) ||
1038 (ret = get_fd_set(n, exp->fds_bits, fds.ex)))
1039 goto out;
1040 zero_fd_set(n, fds.res_in);
1041 zero_fd_set(n, fds.res_out);
1042 zero_fd_set(n, fds.res_ex);
1043
1044 ret = do_select(n, &fds, &timeout);
1045
1046 /* OSF does not copy back the remaining time. */ 1003 /* OSF does not copy back the remaining time. */
1047 1004 return core_sys_select(n, inp, outp, exp, &timeout);
1048 if (ret < 0)
1049 goto out;
1050 if (!ret) {
1051 ret = -ERESTARTNOHAND;
1052 if (signal_pending(current))
1053 goto out;
1054 ret = 0;
1055 }
1056
1057 if (set_fd_set(n, inp->fds_bits, fds.res_in) ||
1058 set_fd_set(n, outp->fds_bits, fds.res_out) ||
1059 set_fd_set(n, exp->fds_bits, fds.res_ex))
1060 ret = -EFAULT;
1061
1062 out:
1063 kfree(bits);
1064 out_nofds:
1065 return ret;
1066} 1005}
1067 1006
1068struct rusage32 { 1007struct rusage32 {
diff --git a/arch/mips/kernel/irixioctl.c b/arch/mips/kernel/irixioctl.c
index 2bde200d5ad0..b39bdba82e02 100644
--- a/arch/mips/kernel/irixioctl.c
+++ b/arch/mips/kernel/irixioctl.c
@@ -27,33 +27,6 @@ struct irix_termios {
27 cc_t c_cc[NCCS]; 27 cc_t c_cc[NCCS];
28}; 28};
29 29
30extern void start_tty(struct tty_struct *tty);
31static struct tty_struct *get_tty(int fd)
32{
33 struct file *filp;
34 struct tty_struct *ttyp = NULL;
35
36 rcu_read_lock();
37 filp = fcheck(fd);
38 if(filp && filp->private_data) {
39 ttyp = (struct tty_struct *) filp->private_data;
40
41 if(ttyp->magic != TTY_MAGIC)
42 ttyp =NULL;
43 }
44 rcu_read_unlock();
45 return ttyp;
46}
47
48static struct tty_struct *get_real_tty(struct tty_struct *tp)
49{
50 if (tp->driver->type == TTY_DRIVER_TYPE_PTY &&
51 tp->driver->subtype == PTY_TYPE_MASTER)
52 return tp->link;
53 else
54 return tp;
55}
56
57asmlinkage int irix_ioctl(int fd, unsigned long cmd, unsigned long arg) 30asmlinkage int irix_ioctl(int fd, unsigned long cmd, unsigned long arg)
58{ 31{
59 struct tty_struct *tp, *rtp; 32 struct tty_struct *tp, *rtp;
@@ -146,34 +119,24 @@ asmlinkage int irix_ioctl(int fd, unsigned long cmd, unsigned long arg)
146 error = sys_ioctl(fd, TIOCNOTTY, arg); 119 error = sys_ioctl(fd, TIOCNOTTY, arg);
147 break; 120 break;
148 121
149 case 0x00007416: 122 case 0x00007416: {
123 pid_t pid;
150#ifdef DEBUG_IOCTLS 124#ifdef DEBUG_IOCTLS
151 printk("TIOCGSID, %08lx) ", arg); 125 printk("TIOCGSID, %08lx) ", arg);
152#endif 126#endif
153 tp = get_tty(fd); 127 old_fs = get_fs(); set_fs(get_ds());
154 if(!tp) { 128 error = sys_ioctl(fd, TIOCGSID, (unsigned long)&pid);
155 error = -EINVAL; 129 set_fs(old_fs);
156 break; 130 if (!error)
157 } 131 error = put_user(pid, (unsigned long __user *) arg);
158 rtp = get_real_tty(tp);
159#ifdef DEBUG_IOCTLS
160 printk("rtp->session=%d ", rtp->session);
161#endif
162 error = put_user(rtp->session, (unsigned long __user *) arg);
163 break; 132 break;
164 133 }
165 case 0x746e: 134 case 0x746e:
166 /* TIOCSTART, same effect as hitting ^Q */ 135 /* TIOCSTART, same effect as hitting ^Q */
167#ifdef DEBUG_IOCTLS 136#ifdef DEBUG_IOCTLS
168 printk("TIOCSTART, %08lx) ", arg); 137 printk("TIOCSTART, %08lx) ", arg);
169#endif 138#endif
170 tp = get_tty(fd); 139 error = sys_ioctl(fd, TCXONC, TCOON);
171 if(!tp) {
172 error = -EINVAL;
173 break;
174 }
175 rtp = get_real_tty(tp);
176 start_tty(rtp);
177 break; 140 break;
178 141
179 case 0x20006968: 142 case 0x20006968:
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
index 998c4efcce88..ceb62dce1c9c 100644
--- a/arch/mips/kernel/kspd.c
+++ b/arch/mips/kernel/kspd.c
@@ -20,6 +20,7 @@
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/unistd.h> 21#include <linux/unistd.h>
22#include <linux/file.h> 22#include <linux/file.h>
23#include <linux/fdtable.h>
23#include <linux/fs.h> 24#include <linux/fs.h>
24#include <linux/syscalls.h> 25#include <linux/syscalls.h>
25#include <linux/workqueue.h> 26#include <linux/workqueue.h>
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index b962c3ab470c..af116aadba10 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/elf.h> 23#include <linux/elf.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h>
25#include <linux/fs.h> 26#include <linux/fs.h>
26#include <linux/list.h> 27#include <linux/list.h>
27#include <linux/module.h> 28#include <linux/module.h>
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 6342b0534f4d..3582f43345a8 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/audit.h> 12#include <linux/audit.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/fdtable.h>
14#include <linux/tty.h> 15#include <linux/tty.h>
15 16
16struct tty_audit_buf { 17struct tty_audit_buf {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1d298c2cf930..49c1a2267a55 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -78,6 +78,7 @@
78#include <linux/tty_flip.h> 78#include <linux/tty_flip.h>
79#include <linux/devpts_fs.h> 79#include <linux/devpts_fs.h>
80#include <linux/file.h> 80#include <linux/file.h>
81#include <linux/fdtable.h>
81#include <linux/console.h> 82#include <linux/console.h>
82#include <linux/timer.h> 83#include <linux/timer.h>
83#include <linux/ctype.h> 84#include <linux/ctype.h>
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e085..977ef208c051 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
57 * anonymous inode, and a dentry that describe the "class" 57 * anonymous inode, and a dentry that describe the "class"
58 * of the file 58 * of the file
59 * 59 *
60 * @pfd: [out] pointer to the file descriptor
61 * @dpinode: [out] pointer to the inode
62 * @pfile: [out] pointer to the file struct
63 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
64 * @fops [in] file operations for the new file 61 * @fops [in] file operations for the new file
65 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv [in] private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * that do not need to have a full-fledged inode in order to operate correctly. 65 * that do not need to have a full-fledged inode in order to operate correctly.
69 * All the files created with anon_inode_getfd() will share a single inode, 66 * All the files created with anon_inode_getfd() will share a single inode,
70 * hence saving memory and avoiding code duplication for the file/inode/dentry 67 * hence saving memory and avoiding code duplication for the file/inode/dentry
71 * setup. 68 * setup. Returns new descriptor or -error.
72 */ 69 */
73int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, 70int anon_inode_getfd(const char *name, const struct file_operations *fops,
74 const char *name, const struct file_operations *fops,
75 void *priv) 71 void *priv)
76{ 72{
77 struct qstr this; 73 struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
125 121
126 fd_install(fd, file); 122 fd_install(fd, file);
127 123
128 *pfd = fd; 124 return fd;
129 *pinode = anon_inode_inode;
130 *pfile = file;
131 return 0;
132 125
133err_dput: 126err_dput:
134 dput(dentry); 127 dput(dentry);
diff --git a/fs/compat.c b/fs/compat.c
index 139dc93c092d..332a869d2c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
24#include <linux/fcntl.h> 24#include <linux/fcntl.h>
25#include <linux/namei.h> 25#include <linux/namei.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/vfs.h> 28#include <linux/vfs.h>
28#include <linux/ioctl.h> 29#include <linux/ioctl.h>
29#include <linux/init.h> 30#include <linux/init.h>
diff --git a/fs/dnotify.c b/fs/dnotify.c
index eaecc4cfe540..676073b8dda5 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,7 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/file.h> 23#include <linux/fdtable.h>
24 24
25int dir_notify_enable __read_mostly = 1; 25int dir_notify_enable __read_mostly = 1;
26 26
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50ac..343942deeec1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd(unsigned int count)
202{ 202{
203 int error, fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 struct file *file;
206 struct inode *inode;
207 205
208 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
209 if (!ctx) 207 if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
216 * When we call this, the initialization must be complete, since 214 * When we call this, the initialization must be complete, since
217 * anon_inode_getfd() will install the fd. 215 * anon_inode_getfd() will install the fd.
218 */ 216 */
219 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", 217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
220 &eventfd_fops, ctx); 218 if (fd < 0)
221 if (!error) 219 kfree(ctx);
222 return fd; 220 return fd;
223
224 kfree(ctx);
225 return error;
226} 221}
227 222
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 221086fef174..990c01d2d66b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1050,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
1050{ 1050{
1051 int error, fd = -1; 1051 int error, fd = -1;
1052 struct eventpoll *ep; 1052 struct eventpoll *ep;
1053 struct inode *inode;
1054 struct file *file;
1055 1053
1056 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1057 current, size)); 1055 current, size));
@@ -1061,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
1061 * structure ( "struct eventpoll" ). 1059 * structure ( "struct eventpoll" ).
1062 */ 1060 */
1063 error = -EINVAL; 1061 error = -EINVAL;
1064 if (size <= 0 || (error = ep_alloc(&ep)) != 0) 1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
1063 fd = error;
1065 goto error_return; 1064 goto error_return;
1065 }
1066 1066
1067 /* 1067 /*
1068 * Creates all the items needed to setup an eventpoll file. That is, 1068 * Creates all the items needed to setup an eventpoll file. That is,
1069 * a file structure, and inode and a free file descriptor. 1069 * a file structure and a free file descriptor.
1070 */ 1070 */
1071 error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", 1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
1072 &eventpoll_fops, ep); 1072 if (fd < 0)
1073 if (error) 1073 ep_free(ep);
1074 goto error_free;
1075 1074
1075error_return:
1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1077 current, size, fd)); 1077 current, size, fd));
1078 1078
1079 return fd; 1079 return fd;
1080
1081error_free:
1082 ep_free(ep);
1083error_return:
1084 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1085 current, size, error));
1086 return error;
1087} 1080}
1088 1081
1089/* 1082/*
diff --git a/fs/exec.c b/fs/exec.c
index 9f9f931ef949..aeaa9791d8be 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
28#include <linux/a.out.h> 29#include <linux/a.out.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac630ccde..bfd776509a72 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/fdtable.h>
12#include <linux/capability.h> 13#include <linux/capability.h>
13#include <linux/dnotify.h> 14#include <linux/dnotify.h>
14#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9ef..4c6f0ea12c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/fdtable.h>
15#include <linux/bitops.h> 16#include <linux/bitops.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
149 nr /= (1024 / sizeof(struct file *)); 150 nr /= (1024 / sizeof(struct file *));
150 nr = roundup_pow_of_two(nr + 1); 151 nr = roundup_pow_of_two(nr + 1);
151 nr *= (1024 / sizeof(struct file *)); 152 nr *= (1024 / sizeof(struct file *));
152 if (nr > sysctl_nr_open) 153 /*
153 nr = sysctl_nr_open; 154 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
155 * had been set lower between the check in expand_files() and here. Deal
156 * with that in caller, it's cheaper that way.
157 *
158 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
159 * bitmaps handling below becomes unpleasant, to put it mildly...
160 */
161 if (unlikely(nr > sysctl_nr_open))
162 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
154 163
155 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 164 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
156 if (!fdt) 165 if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
199 if (!new_fdt) 208 if (!new_fdt)
200 return -ENOMEM; 209 return -ENOMEM;
201 /* 210 /*
211 * extremely unlikely race - sysctl_nr_open decreased between the check in
212 * caller and alloc_fdtable(). Cheaper to catch it here...
213 */
214 if (unlikely(new_fdt->max_fds <= nr)) {
215 free_fdarr(new_fdt);
216 free_fdset(new_fdt);
217 kfree(new_fdt);
218 return -EMFILE;
219 }
220 /*
202 * Check again since another task may have expanded the fd table while 221 * Check again since another task may have expanded the fd table while
203 * we dropped the lock 222 * we dropped the lock
204 */ 223 */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b872251..83084225b4c3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fdtable.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a7ec50..663c069b59b3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
116 116
117#include <linux/capability.h> 117#include <linux/capability.h>
118#include <linux/file.h> 118#include <linux/file.h>
119#include <linux/fdtable.h>
119#include <linux/fs.h> 120#include <linux/fs.h>
120#include <linux/init.h> 121#include <linux/init.h>
121#include <linux/module.h> 122#include <linux/module.h>
diff --git a/fs/open.c b/fs/open.c
index 7af1f05d5978..a1450086e92f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/fdtable.h>
10#include <linux/quotaops.h> 11#include <linux/quotaops.h>
11#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
12#include <linux/module.h> 13#include <linux/module.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c135cbdd9127..dca997a93bff 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
73#include <linux/signal.h> 73#include <linux/signal.h>
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/file.h> 75#include <linux/file.h>
76#include <linux/fdtable.h>
76#include <linux/times.h> 77#include <linux/times.h>
77#include <linux/cpuset.h> 78#include <linux/cpuset.h>
78#include <linux/rcupdate.h> 79#include <linux/rcupdate.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fcf02f2deeba..808cbdc193d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/capability.h> 57#include <linux/capability.h>
58#include <linux/file.h> 58#include <linux/file.h>
59#include <linux/fdtable.h>
59#include <linux/string.h> 60#include <linux/string.h>
60#include <linux/seq_file.h> 61#include <linux/seq_file.h>
61#include <linux/namei.h> 62#include <linux/namei.h>
diff --git a/fs/select.c b/fs/select.c
index 2c292146e246..8dda969614a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
21#include <linux/poll.h> 21#include <linux/poll.h>
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fdtable.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
298#define MAX_SELECT_SECONDS \ 299#define MAX_SELECT_SECONDS \
299 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
300 301
301static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
302 fd_set __user *exp, s64 *timeout) 303 fd_set __user *exp, s64 *timeout)
303{ 304{
304 fd_set_bits fds; 305 fd_set_bits fds;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db35933..619725644c75 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
209{ 209{
210 int error;
211 sigset_t sigmask; 210 sigset_t sigmask;
212 struct signalfd_ctx *ctx; 211 struct signalfd_ctx *ctx;
213 struct file *file;
214 struct inode *inode;
215 212
216 if (sizemask != sizeof(sigset_t) || 213 if (sizemask != sizeof(sigset_t) ||
217 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 214 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
230 * When we call this, the initialization must be complete, since 227 * When we call this, the initialization must be complete, since
231 * anon_inode_getfd() will install the fd. 228 * anon_inode_getfd() will install the fd.
232 */ 229 */
233 error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", 230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
234 &signalfd_fops, ctx); 231 if (ufd < 0)
235 if (error) 232 kfree(ctx);
236 goto err_fdalloc;
237 } else { 233 } else {
238 file = fget(ufd); 234 struct file *file = fget(ufd);
239 if (!file) 235 if (!file)
240 return -EBADF; 236 return -EBADF;
241 ctx = file->private_data; 237 ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
252 } 248 }
253 249
254 return ufd; 250 return ufd;
255
256err_fdalloc:
257 kfree(ctx);
258 return error;
259} 251}
260
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 5400524e9cb1..d87d354ec424 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -181,10 +181,8 @@ static struct file *timerfd_fget(int fd)
181 181
182asmlinkage long sys_timerfd_create(int clockid, int flags) 182asmlinkage long sys_timerfd_create(int clockid, int flags)
183{ 183{
184 int error, ufd; 184 int ufd;
185 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
186 struct file *file;
187 struct inode *inode;
188 186
189 if (flags) 187 if (flags)
190 return -EINVAL; 188 return -EINVAL;
@@ -200,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
200 ctx->clockid = clockid; 198 ctx->clockid = clockid;
201 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
202 200
203 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", 201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
204 &timerfd_fops, ctx); 202 if (ufd < 0)
205 if (error) {
206 kfree(ctx); 203 kfree(ctx);
207 return error;
208 }
209 204
210 return ufd; 205 return ufd;
211} 206}
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index b2e1ba325b9a..6129e58ca7c9 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,8 +8,7 @@
8#ifndef _LINUX_ANON_INODES_H 8#ifndef _LINUX_ANON_INODES_H
9#define _LINUX_ANON_INODES_H 9#define _LINUX_ANON_INODES_H
10 10
11int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, 11int anon_inode_getfd(const char *name, const struct file_operations *fops,
12 const char *name, const struct file_operations *fops,
13 void *priv); 12 void *priv);
14 13
15#endif /* _LINUX_ANON_INODES_H */ 14#endif /* _LINUX_ANON_INODES_H */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
new file mode 100644
index 000000000000..a118f3c0b240
--- /dev/null
+++ b/include/linux/fdtable.h
@@ -0,0 +1,99 @@
1/*
2 * descriptor table internals; you almost certainly want file.h instead.
3 */
4
5#ifndef __LINUX_FDTABLE_H
6#define __LINUX_FDTABLE_H
7
8#include <asm/atomic.h>
9#include <linux/posix_types.h>
10#include <linux/compiler.h>
11#include <linux/spinlock.h>
12#include <linux/rcupdate.h>
13#include <linux/types.h>
14
15/*
16 * The default fd array needs to be at least BITS_PER_LONG,
17 * as this is the granularity returned by copy_fdset().
18 */
19#define NR_OPEN_DEFAULT BITS_PER_LONG
20
21/*
22 * The embedded_fd_set is a small fd_set,
23 * suitable for most tasks (which open <= BITS_PER_LONG files)
24 */
25struct embedded_fd_set {
26 unsigned long fds_bits[1];
27};
28
29struct fdtable {
30 unsigned int max_fds;
31 struct file ** fd; /* current fd array */
32 fd_set *close_on_exec;
33 fd_set *open_fds;
34 struct rcu_head rcu;
35 struct fdtable *next;
36};
37
38/*
39 * Open file table structure
40 */
41struct files_struct {
42 /*
43 * read mostly part
44 */
45 atomic_t count;
46 struct fdtable *fdt;
47 struct fdtable fdtab;
48 /*
49 * written part on a separate cache line in SMP
50 */
51 spinlock_t file_lock ____cacheline_aligned_in_smp;
52 int next_fd;
53 struct embedded_fd_set close_on_exec_init;
54 struct embedded_fd_set open_fds_init;
55 struct file * fd_array[NR_OPEN_DEFAULT];
56};
57
58#define files_fdtable(files) (rcu_dereference((files)->fdt))
59
60extern struct kmem_cache *filp_cachep;
61
62struct file_operations;
63struct vfsmount;
64struct dentry;
65
66extern int expand_files(struct files_struct *, int nr);
67extern void free_fdtable_rcu(struct rcu_head *rcu);
68extern void __init files_defer_init(void);
69
70static inline void free_fdtable(struct fdtable *fdt)
71{
72 call_rcu(&fdt->rcu, free_fdtable_rcu);
73}
74
75static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
76{
77 struct file * file = NULL;
78 struct fdtable *fdt = files_fdtable(files);
79
80 if (fd < fdt->max_fds)
81 file = rcu_dereference(fdt->fd[fd]);
82 return file;
83}
84
85/*
86 * Check whether the specified fd has an open file.
87 */
88#define fcheck(fd) fcheck_files(current->files, fd)
89
90struct task_struct;
91
92struct files_struct *get_files_struct(struct task_struct *);
93void put_files_struct(struct files_struct *fs);
94void reset_files_struct(struct files_struct *);
95int unshare_files(struct files_struct **);
96
97extern struct kmem_cache *files_cachep;
98
99#endif /* __LINUX_FDTABLE_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 69baf5a4f0a5..27c64bdc68c9 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -5,59 +5,11 @@
5#ifndef __LINUX_FILE_H 5#ifndef __LINUX_FILE_H
6#define __LINUX_FILE_H 6#define __LINUX_FILE_H
7 7
8#include <asm/atomic.h>
9#include <linux/posix_types.h>
10#include <linux/compiler.h> 8#include <linux/compiler.h>
11#include <linux/spinlock.h>
12#include <linux/rcupdate.h>
13#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/posix_types.h>
14 11
15/* 12struct file;
16 * The default fd array needs to be at least BITS_PER_LONG,
17 * as this is the granularity returned by copy_fdset().
18 */
19#define NR_OPEN_DEFAULT BITS_PER_LONG
20
21/*
22 * The embedded_fd_set is a small fd_set,
23 * suitable for most tasks (which open <= BITS_PER_LONG files)
24 */
25struct embedded_fd_set {
26 unsigned long fds_bits[1];
27};
28
29struct fdtable {
30 unsigned int max_fds;
31 struct file ** fd; /* current fd array */
32 fd_set *close_on_exec;
33 fd_set *open_fds;
34 struct rcu_head rcu;
35 struct fdtable *next;
36};
37
38/*
39 * Open file table structure
40 */
41struct files_struct {
42 /*
43 * read mostly part
44 */
45 atomic_t count;
46 struct fdtable *fdt;
47 struct fdtable fdtab;
48 /*
49 * written part on a separate cache line in SMP
50 */
51 spinlock_t file_lock ____cacheline_aligned_in_smp;
52 int next_fd;
53 struct embedded_fd_set close_on_exec_init;
54 struct embedded_fd_set open_fds_init;
55 struct file * fd_array[NR_OPEN_DEFAULT];
56};
57
58#define files_fdtable(files) (rcu_dereference((files)->fdt))
59
60extern struct kmem_cache *filp_cachep;
61 13
62extern void __fput(struct file *); 14extern void __fput(struct file *);
63extern void fput(struct file *); 15extern void fput(struct file *);
@@ -85,41 +37,7 @@ extern void put_filp(struct file *);
85extern int get_unused_fd(void); 37extern int get_unused_fd(void);
86extern int get_unused_fd_flags(int flags); 38extern int get_unused_fd_flags(int flags);
87extern void put_unused_fd(unsigned int fd); 39extern void put_unused_fd(unsigned int fd);
88struct kmem_cache;
89
90extern int expand_files(struct files_struct *, int nr);
91extern void free_fdtable_rcu(struct rcu_head *rcu);
92extern void __init files_defer_init(void);
93
94static inline void free_fdtable(struct fdtable *fdt)
95{
96 call_rcu(&fdt->rcu, free_fdtable_rcu);
97}
98
99static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
100{
101 struct file * file = NULL;
102 struct fdtable *fdt = files_fdtable(files);
103
104 if (fd < fdt->max_fds)
105 file = rcu_dereference(fdt->fd[fd]);
106 return file;
107}
108
109/*
110 * Check whether the specified fd has an open file.
111 */
112#define fcheck(fd) fcheck_files(current->files, fd)
113 40
114extern void fd_install(unsigned int fd, struct file *file); 41extern void fd_install(unsigned int fd, struct file *file);
115 42
116struct task_struct;
117
118struct files_struct *get_files_struct(struct task_struct *);
119void put_files_struct(struct files_struct *fs);
120void reset_files_struct(struct files_struct *);
121int unshare_files(struct files_struct **);
122
123extern struct kmem_cache *files_cachep;
124
125#endif /* __LINUX_FILE_H */ 43#endif /* __LINUX_FILE_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index bf6b8a61f8db..b24c2875aa05 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -1,7 +1,7 @@
1#ifndef _LINUX__INIT_TASK_H 1#ifndef _LINUX__INIT_TASK_H
2#define _LINUX__INIT_TASK_H 2#define _LINUX__INIT_TASK_H
3 3
4#include <linux/file.h> 4#include <linux/fdtable.h>
5#include <linux/rcupdate.h> 5#include <linux/rcupdate.h>
6#include <linux/irqflags.h> 6#include <linux/irqflags.h>
7#include <linux/utsname.h> 7#include <linux/utsname.h>
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 16d813b364ef..ef453828877a 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -117,6 +117,8 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset)
117extern int do_select(int n, fd_set_bits *fds, s64 *timeout); 117extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, 118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
119 s64 *timeout); 119 s64 *timeout);
120extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
121 fd_set __user *exp, s64 *timeout);
120 122
121#endif /* KERNEL */ 123#endif /* KERNEL */
122 124
diff --git a/kernel/exit.c b/kernel/exit.c
index d3ad54677f9c..1510f78a0ffa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -19,6 +19,7 @@
19#include <linux/acct.h> 19#include <linux/acct.h>
20#include <linux/tsacct_kern.h> 20#include <linux/tsacct_kern.h>
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/fdtable.h>
22#include <linux/binfmts.h> 23#include <linux/binfmts.h>
23#include <linux/nsproxy.h> 24#include <linux/nsproxy.h>
24#include <linux/pid_namespace.h> 25#include <linux/pid_namespace.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 2bb675af4de3..933e60ebccae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -22,6 +22,7 @@
22#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
23#include <linux/sem.h> 23#include <linux/sem.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h>
25#include <linux/key.h> 26#include <linux/key.h>
26#include <linux/binfmts.h> 27#include <linux/binfmts.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index e2764047ec03..8df97d3dfda8 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -27,6 +27,7 @@
27#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 28#include <linux/completion.h>
29#include <linux/file.h> 29#include <linux/file.h>
30#include <linux/fdtable.h>
30#include <linux/workqueue.h> 31#include <linux/workqueue.h>
31#include <linux/security.h> 32#include <linux/security.h>
32#include <linux/mount.h> 33#include <linux/mount.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1b50a6ebc55f..1c864c0efe2b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -39,6 +39,7 @@
39#include <linux/spinlock.h> 39#include <linux/spinlock.h>
40#include <linux/syscalls.h> 40#include <linux/syscalls.h>
41#include <linux/file.h> 41#include <linux/file.h>
42#include <linux/fdtable.h>
42#include <linux/namei.h> 43#include <linux/namei.h>
43#include <linux/mount.h> 44#include <linux/mount.h>
44#include <linux/ext2_fs.h> 45#include <linux/ext2_fs.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c82cf15730a1..e89338e2b043 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -834,16 +834,9 @@ static const struct file_operations kvm_vcpu_fops = {
834 */ 834 */
835static int create_vcpu_fd(struct kvm_vcpu *vcpu) 835static int create_vcpu_fd(struct kvm_vcpu *vcpu)
836{ 836{
837 int fd, r; 837 int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
838 struct inode *inode; 838 if (fd < 0)
839 struct file *file;
840
841 r = anon_inode_getfd(&fd, &inode, &file,
842 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
843 if (r) {
844 kvm_put_kvm(vcpu->kvm); 839 kvm_put_kvm(vcpu->kvm);
845 return r;
846 }
847 return fd; 840 return fd;
848} 841}
849 842
@@ -1168,19 +1161,15 @@ static const struct file_operations kvm_vm_fops = {
1168 1161
1169static int kvm_dev_ioctl_create_vm(void) 1162static int kvm_dev_ioctl_create_vm(void)
1170{ 1163{
1171 int fd, r; 1164 int fd;
1172 struct inode *inode;
1173 struct file *file;
1174 struct kvm *kvm; 1165 struct kvm *kvm;
1175 1166
1176 kvm = kvm_create_vm(); 1167 kvm = kvm_create_vm();
1177 if (IS_ERR(kvm)) 1168 if (IS_ERR(kvm))
1178 return PTR_ERR(kvm); 1169 return PTR_ERR(kvm);
1179 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); 1170 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
1180 if (r) { 1171 if (fd < 0)
1181 kvm_put_kvm(kvm); 1172 kvm_put_kvm(kvm);
1182 return r;
1183 }
1184 1173
1185 return fd; 1174 return fd;
1186} 1175}