aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 16:04:10 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 16:57:55 -0400
commitbadf16621c1f9d1ac753be056fce11b43d6e0be5 (patch)
tree3fdf833fdf2e3d3a439090743539680449ec3428
parentc0dfb2905126e9e94edebbce8d3e05001301f52d (diff)
[PATCH] files: break up files struct
In order for the RCU to work, the file table array, sets and their sizes must be updated atomically. Instead of ensuring this through too many memory barriers, we put the arrays and their sizes in a separate structure. This patch takes the first step of putting the file table elements in a separate structure fdtable that is embedded withing files_struct. It also changes all the users to refer to the file table using files_fdtable() macro. Subsequent applciation of RCU becomes easier after this. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/alpha/kernel/osf_sys.c4
-rw-r--r--arch/ia64/kernel/perfmon.c7
-rw-r--r--arch/sparc64/solaris/ioctl.c8
-rw-r--r--drivers/char/tty_io.c4
-rw-r--r--fs/exec.c8
-rw-r--r--fs/fcntl.c47
-rw-r--r--fs/file.c42
-rw-r--r--fs/locks.c8
-rw-r--r--fs/open.c41
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/select.c12
-rw-r--r--include/linux/file.h23
-rw-r--r--include/linux/init_task.h13
-rw-r--r--kernel/exit.c21
-rw-r--r--kernel/fork.c82
-rw-r--r--security/selinux/hooks.c6
17 files changed, 211 insertions, 124 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 167fd89f8707..2b034182a0ca 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -974,6 +974,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
974 size_t size; 974 size_t size;
975 long timeout; 975 long timeout;
976 int ret = -EINVAL; 976 int ret = -EINVAL;
977 struct fdtable *fdt;
977 978
978 timeout = MAX_SCHEDULE_TIMEOUT; 979 timeout = MAX_SCHEDULE_TIMEOUT;
979 if (tvp) { 980 if (tvp) {
@@ -995,7 +996,8 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
995 } 996 }
996 } 997 }
997 998
998 if (n < 0 || n > current->files->max_fdset) 999 fdt = files_fdtable(current->files);
1000 if (n < 0 || n > fdt->max_fdset)
999 goto out_nofds; 1001 goto out_nofds;
1000 1002
1001 /* 1003 /*
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f1201ac8a116..4ad97b3b39dc 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -38,6 +38,7 @@
38#include <linux/pagemap.h> 38#include <linux/pagemap.h>
39#include <linux/mount.h> 39#include <linux/mount.h>
40#include <linux/bitops.h> 40#include <linux/bitops.h>
41#include <linux/rcupdate.h>
41 42
42#include <asm/errno.h> 43#include <asm/errno.h>
43#include <asm/intrinsics.h> 44#include <asm/intrinsics.h>
@@ -2217,15 +2218,17 @@ static void
2217pfm_free_fd(int fd, struct file *file) 2218pfm_free_fd(int fd, struct file *file)
2218{ 2219{
2219 struct files_struct *files = current->files; 2220 struct files_struct *files = current->files;
2221 struct fdtable *fdt = files_fdtable(files);
2220 2222
2221 /* 2223 /*
2222 * there ie no fd_uninstall(), so we do it here 2224 * there ie no fd_uninstall(), so we do it here
2223 */ 2225 */
2224 spin_lock(&files->file_lock); 2226 spin_lock(&files->file_lock);
2225 files->fd[fd] = NULL; 2227 rcu_assign_pointer(fdt->fd[fd], NULL);
2226 spin_unlock(&files->file_lock); 2228 spin_unlock(&files->file_lock);
2227 2229
2228 if (file) put_filp(file); 2230 if (file)
2231 put_filp(file);
2229 put_unused_fd(fd); 2232 put_unused_fd(fd);
2230} 2233}
2231 2234
diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c
index cac0a1cf0050..374766455f5e 100644
--- a/arch/sparc64/solaris/ioctl.c
+++ b/arch/sparc64/solaris/ioctl.c
@@ -293,11 +293,13 @@ static struct module_info {
293static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) 293static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg)
294{ 294{
295 struct inode *ino; 295 struct inode *ino;
296 struct fdtable *fdt;
296 /* I wonder which of these tests are superfluous... --patrik */ 297 /* I wonder which of these tests are superfluous... --patrik */
297 spin_lock(&current->files->file_lock); 298 spin_lock(&current->files->file_lock);
298 if (! current->files->fd[fd] || 299 fdt = files_fdtable(current->files);
299 ! current->files->fd[fd]->f_dentry || 300 if (! fdt->fd[fd] ||
300 ! (ino = current->files->fd[fd]->f_dentry->d_inode) || 301 ! fdt->fd[fd]->f_dentry ||
302 ! (ino = fdt->fd[fd]->f_dentry->d_inode) ||
301 ! S_ISSOCK(ino->i_mode)) { 303 ! S_ISSOCK(ino->i_mode)) {
302 spin_unlock(&current->files->file_lock); 304 spin_unlock(&current->files->file_lock);
303 return TBADF; 305 return TBADF;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6a56ae4f7725..0bfc7af68917 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2454,6 +2454,7 @@ static void __do_SAK(void *arg)
2454 int i; 2454 int i;
2455 struct file *filp; 2455 struct file *filp;
2456 struct tty_ldisc *disc; 2456 struct tty_ldisc *disc;
2457 struct fdtable *fdt;
2457 2458
2458 if (!tty) 2459 if (!tty)
2459 return; 2460 return;
@@ -2480,7 +2481,8 @@ static void __do_SAK(void *arg)
2480 task_lock(p); 2481 task_lock(p);
2481 if (p->files) { 2482 if (p->files) {
2482 spin_lock(&p->files->file_lock); 2483 spin_lock(&p->files->file_lock);
2483 for (i=0; i < p->files->max_fds; i++) { 2484 fdt = files_fdtable(p->files);
2485 for (i=0; i < fdt->max_fds; i++) {
2484 filp = fcheck_files(p->files, i); 2486 filp = fcheck_files(p->files, i);
2485 if (!filp) 2487 if (!filp)
2486 continue; 2488 continue;
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d8..14dd03907ccb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
798static inline void flush_old_files(struct files_struct * files) 798static inline void flush_old_files(struct files_struct * files)
799{ 799{
800 long j = -1; 800 long j = -1;
801 struct fdtable *fdt;
801 802
802 spin_lock(&files->file_lock); 803 spin_lock(&files->file_lock);
803 for (;;) { 804 for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
805 806
806 j++; 807 j++;
807 i = j * __NFDBITS; 808 i = j * __NFDBITS;
808 if (i >= files->max_fds || i >= files->max_fdset) 809 fdt = files_fdtable(files);
810 if (i >= fdt->max_fds || i >= fdt->max_fdset)
809 break; 811 break;
810 set = files->close_on_exec->fds_bits[j]; 812 set = fdt->close_on_exec->fds_bits[j];
811 if (!set) 813 if (!set)
812 continue; 814 continue;
813 files->close_on_exec->fds_bits[j] = 0; 815 fdt->close_on_exec->fds_bits[j] = 0;
814 spin_unlock(&files->file_lock); 816 spin_unlock(&files->file_lock);
815 for ( ; set ; i++,set >>= 1) { 817 for ( ; set ; i++,set >>= 1) {
816 if (set & 1) { 818 if (set & 1) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc36..bfecc6238083 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -24,20 +24,24 @@
24void fastcall set_close_on_exec(unsigned int fd, int flag) 24void fastcall set_close_on_exec(unsigned int fd, int flag)
25{ 25{
26 struct files_struct *files = current->files; 26 struct files_struct *files = current->files;
27 struct fdtable *fdt;
27 spin_lock(&files->file_lock); 28 spin_lock(&files->file_lock);
29 fdt = files_fdtable(files);
28 if (flag) 30 if (flag)
29 FD_SET(fd, files->close_on_exec); 31 FD_SET(fd, fdt->close_on_exec);
30 else 32 else
31 FD_CLR(fd, files->close_on_exec); 33 FD_CLR(fd, fdt->close_on_exec);
32 spin_unlock(&files->file_lock); 34 spin_unlock(&files->file_lock);
33} 35}
34 36
35static inline int get_close_on_exec(unsigned int fd) 37static inline int get_close_on_exec(unsigned int fd)
36{ 38{
37 struct files_struct *files = current->files; 39 struct files_struct *files = current->files;
40 struct fdtable *fdt;
38 int res; 41 int res;
39 spin_lock(&files->file_lock); 42 spin_lock(&files->file_lock);
40 res = FD_ISSET(fd, files->close_on_exec); 43 fdt = files_fdtable(files);
44 res = FD_ISSET(fd, fdt->close_on_exec);
41 spin_unlock(&files->file_lock); 45 spin_unlock(&files->file_lock);
42 return res; 46 return res;
43} 47}
@@ -54,24 +58,26 @@ static int locate_fd(struct files_struct *files,
54 unsigned int newfd; 58 unsigned int newfd;
55 unsigned int start; 59 unsigned int start;
56 int error; 60 int error;
61 struct fdtable *fdt;
57 62
58 error = -EINVAL; 63 error = -EINVAL;
59 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 64 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
60 goto out; 65 goto out;
61 66
67 fdt = files_fdtable(files);
62repeat: 68repeat:
63 /* 69 /*
64 * Someone might have closed fd's in the range 70 * Someone might have closed fd's in the range
65 * orig_start..files->next_fd 71 * orig_start..fdt->next_fd
66 */ 72 */
67 start = orig_start; 73 start = orig_start;
68 if (start < files->next_fd) 74 if (start < fdt->next_fd)
69 start = files->next_fd; 75 start = fdt->next_fd;
70 76
71 newfd = start; 77 newfd = start;
72 if (start < files->max_fdset) { 78 if (start < fdt->max_fdset) {
73 newfd = find_next_zero_bit(files->open_fds->fds_bits, 79 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
74 files->max_fdset, start); 80 fdt->max_fdset, start);
75 } 81 }
76 82
77 error = -EMFILE; 83 error = -EMFILE;
@@ -89,8 +95,8 @@ repeat:
89 if (error) 95 if (error)
90 goto repeat; 96 goto repeat;
91 97
92 if (start <= files->next_fd) 98 if (start <= fdt->next_fd)
93 files->next_fd = newfd + 1; 99 fdt->next_fd = newfd + 1;
94 100
95 error = newfd; 101 error = newfd;
96 102
@@ -101,13 +107,16 @@ out:
101static int dupfd(struct file *file, unsigned int start) 107static int dupfd(struct file *file, unsigned int start)
102{ 108{
103 struct files_struct * files = current->files; 109 struct files_struct * files = current->files;
110 struct fdtable *fdt;
104 int fd; 111 int fd;
105 112
106 spin_lock(&files->file_lock); 113 spin_lock(&files->file_lock);
107 fd = locate_fd(files, file, start); 114 fd = locate_fd(files, file, start);
108 if (fd >= 0) { 115 if (fd >= 0) {
109 FD_SET(fd, files->open_fds); 116 /* locate_fd() may have expanded fdtable, load the ptr */
110 FD_CLR(fd, files->close_on_exec); 117 fdt = files_fdtable(files);
118 FD_SET(fd, fdt->open_fds);
119 FD_CLR(fd, fdt->close_on_exec);
111 spin_unlock(&files->file_lock); 120 spin_unlock(&files->file_lock);
112 fd_install(fd, file); 121 fd_install(fd, file);
113 } else { 122 } else {
@@ -123,6 +132,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
123 int err = -EBADF; 132 int err = -EBADF;
124 struct file * file, *tofree; 133 struct file * file, *tofree;
125 struct files_struct * files = current->files; 134 struct files_struct * files = current->files;
135 struct fdtable *fdt;
126 136
127 spin_lock(&files->file_lock); 137 spin_lock(&files->file_lock);
128 if (!(file = fcheck(oldfd))) 138 if (!(file = fcheck(oldfd)))
@@ -148,13 +158,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
148 158
149 /* Yes. It's a race. In user space. Nothing sane to do */ 159 /* Yes. It's a race. In user space. Nothing sane to do */
150 err = -EBUSY; 160 err = -EBUSY;
151 tofree = files->fd[newfd]; 161 fdt = files_fdtable(files);
152 if (!tofree && FD_ISSET(newfd, files->open_fds)) 162 tofree = fdt->fd[newfd];
163 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
153 goto out_fput; 164 goto out_fput;
154 165
155 files->fd[newfd] = file; 166 fdt->fd[newfd] = file;
156 FD_SET(newfd, files->open_fds); 167 FD_SET(newfd, fdt->open_fds);
157 FD_CLR(newfd, files->close_on_exec); 168 FD_CLR(newfd, fdt->close_on_exec);
158 spin_unlock(&files->file_lock); 169 spin_unlock(&files->file_lock);
159 170
160 if (tofree) 171 if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d2..f5926ce73f37 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -59,13 +59,15 @@ static int expand_fd_array(struct files_struct *files, int nr)
59{ 59{
60 struct file **new_fds; 60 struct file **new_fds;
61 int error, nfds; 61 int error, nfds;
62 struct fdtable *fdt;
62 63
63 64
64 error = -EMFILE; 65 error = -EMFILE;
65 if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) 66 fdt = files_fdtable(files);
67 if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN)
66 goto out; 68 goto out;
67 69
68 nfds = files->max_fds; 70 nfds = fdt->max_fds;
69 spin_unlock(&files->file_lock); 71 spin_unlock(&files->file_lock);
70 72
71 /* 73 /*
@@ -95,13 +97,14 @@ static int expand_fd_array(struct files_struct *files, int nr)
95 goto out; 97 goto out;
96 98
97 /* Copy the existing array and install the new pointer */ 99 /* Copy the existing array and install the new pointer */
100 fdt = files_fdtable(files);
98 101
99 if (nfds > files->max_fds) { 102 if (nfds > fdt->max_fds) {
100 struct file **old_fds; 103 struct file **old_fds;
101 int i; 104 int i;
102 105
103 old_fds = xchg(&files->fd, new_fds); 106 old_fds = xchg(&fdt->fd, new_fds);
104 i = xchg(&files->max_fds, nfds); 107 i = xchg(&fdt->max_fds, nfds);
105 108
106 /* Don't copy/clear the array if we are creating a new 109 /* Don't copy/clear the array if we are creating a new
107 fd array for fork() */ 110 fd array for fork() */
@@ -164,12 +167,14 @@ static int expand_fdset(struct files_struct *files, int nr)
164{ 167{
165 fd_set *new_openset = NULL, *new_execset = NULL; 168 fd_set *new_openset = NULL, *new_execset = NULL;
166 int error, nfds = 0; 169 int error, nfds = 0;
170 struct fdtable *fdt;
167 171
168 error = -EMFILE; 172 error = -EMFILE;
169 if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN) 173 fdt = files_fdtable(files);
174 if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN)
170 goto out; 175 goto out;
171 176
172 nfds = files->max_fdset; 177 nfds = fdt->max_fdset;
173 spin_unlock(&files->file_lock); 178 spin_unlock(&files->file_lock);
174 179
175 /* Expand to the max in easy steps */ 180 /* Expand to the max in easy steps */
@@ -193,24 +198,25 @@ static int expand_fdset(struct files_struct *files, int nr)
193 error = 0; 198 error = 0;
194 199
195 /* Copy the existing tables and install the new pointers */ 200 /* Copy the existing tables and install the new pointers */
196 if (nfds > files->max_fdset) { 201 fdt = files_fdtable(files);
197 int i = files->max_fdset / (sizeof(unsigned long) * 8); 202 if (nfds > fdt->max_fdset) {
198 int count = (nfds - files->max_fdset) / 8; 203 int i = fdt->max_fdset / (sizeof(unsigned long) * 8);
204 int count = (nfds - fdt->max_fdset) / 8;
199 205
200 /* 206 /*
201 * Don't copy the entire array if the current fdset is 207 * Don't copy the entire array if the current fdset is
202 * not yet initialised. 208 * not yet initialised.
203 */ 209 */
204 if (i) { 210 if (i) {
205 memcpy (new_openset, files->open_fds, files->max_fdset/8); 211 memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8);
206 memcpy (new_execset, files->close_on_exec, files->max_fdset/8); 212 memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8);
207 memset (&new_openset->fds_bits[i], 0, count); 213 memset (&new_openset->fds_bits[i], 0, count);
208 memset (&new_execset->fds_bits[i], 0, count); 214 memset (&new_execset->fds_bits[i], 0, count);
209 } 215 }
210 216
211 nfds = xchg(&files->max_fdset, nfds); 217 nfds = xchg(&fdt->max_fdset, nfds);
212 new_openset = xchg(&files->open_fds, new_openset); 218 new_openset = xchg(&fdt->open_fds, new_openset);
213 new_execset = xchg(&files->close_on_exec, new_execset); 219 new_execset = xchg(&fdt->close_on_exec, new_execset);
214 spin_unlock(&files->file_lock); 220 spin_unlock(&files->file_lock);
215 free_fdset (new_openset, nfds); 221 free_fdset (new_openset, nfds);
216 free_fdset (new_execset, nfds); 222 free_fdset (new_execset, nfds);
@@ -237,13 +243,15 @@ out:
237int expand_files(struct files_struct *files, int nr) 243int expand_files(struct files_struct *files, int nr)
238{ 244{
239 int err, expand = 0; 245 int err, expand = 0;
246 struct fdtable *fdt;
240 247
241 if (nr >= files->max_fdset) { 248 fdt = files_fdtable(files);
249 if (nr >= fdt->max_fdset) {
242 expand = 1; 250 expand = 1;
243 if ((err = expand_fdset(files, nr))) 251 if ((err = expand_fdset(files, nr)))
244 goto out; 252 goto out;
245 } 253 }
246 if (nr >= files->max_fds) { 254 if (nr >= fdt->max_fds) {
247 expand = 1; 255 expand = 1;
248 if ((err = expand_fd_array(files, nr))) 256 if ((err = expand_fd_array(files, nr)))
249 goto out; 257 goto out;
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179ff..c2c09b4798d6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
2198{ 2198{
2199 struct files_struct *files = current->files; 2199 struct files_struct *files = current->files;
2200 int i, j; 2200 int i, j;
2201 struct fdtable *fdt;
2201 2202
2202 if (from == files) 2203 if (from == files)
2203 return; 2204 return;
2204 2205
2205 lock_kernel(); 2206 lock_kernel();
2206 j = 0; 2207 j = 0;
2208 fdt = files_fdtable(files);
2207 for (;;) { 2209 for (;;) {
2208 unsigned long set; 2210 unsigned long set;
2209 i = j * __NFDBITS; 2211 i = j * __NFDBITS;
2210 if (i >= files->max_fdset || i >= files->max_fds) 2212 if (i >= fdt->max_fdset || i >= fdt->max_fds)
2211 break; 2213 break;
2212 set = files->open_fds->fds_bits[j++]; 2214 set = fdt->open_fds->fds_bits[j++];
2213 while (set) { 2215 while (set) {
2214 if (set & 1) { 2216 if (set & 1) {
2215 struct file *file = files->fd[i]; 2217 struct file *file = fdt->fd[i];
2216 if (file) 2218 if (file)
2217 __steal_locks(file, from); 2219 __steal_locks(file, from);
2218 } 2220 }
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c28..b6542516a0ca 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -842,14 +842,16 @@ int get_unused_fd(void)
842{ 842{
843 struct files_struct * files = current->files; 843 struct files_struct * files = current->files;
844 int fd, error; 844 int fd, error;
845 struct fdtable *fdt;
845 846
846 error = -EMFILE; 847 error = -EMFILE;
847 spin_lock(&files->file_lock); 848 spin_lock(&files->file_lock);
848 849
849repeat: 850repeat:
850 fd = find_next_zero_bit(files->open_fds->fds_bits, 851 fdt = files_fdtable(files);
851 files->max_fdset, 852 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
852 files->next_fd); 853 fdt->max_fdset,
854 fdt->next_fd);
853 855
854 /* 856 /*
855 * N.B. For clone tasks sharing a files structure, this test 857 * N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +874,14 @@ repeat:
872 goto repeat; 874 goto repeat;
873 } 875 }
874 876
875 FD_SET(fd, files->open_fds); 877 FD_SET(fd, fdt->open_fds);
876 FD_CLR(fd, files->close_on_exec); 878 FD_CLR(fd, fdt->close_on_exec);
877 files->next_fd = fd + 1; 879 fdt->next_fd = fd + 1;
878#if 1 880#if 1
879 /* Sanity check */ 881 /* Sanity check */
880 if (files->fd[fd] != NULL) { 882 if (fdt->fd[fd] != NULL) {
881 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 883 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
882 files->fd[fd] = NULL; 884 fdt->fd[fd] = NULL;
883 } 885 }
884#endif 886#endif
885 error = fd; 887 error = fd;
@@ -893,9 +895,10 @@ EXPORT_SYMBOL(get_unused_fd);
893 895
894static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) 896static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
895{ 897{
896 __FD_CLR(fd, files->open_fds); 898 struct fdtable *fdt = files_fdtable(files);
897 if (fd < files->next_fd) 899 __FD_CLR(fd, fdt->open_fds);
898 files->next_fd = fd; 900 if (fd < fdt->next_fd)
901 fdt->next_fd = fd;
899} 902}
900 903
901void fastcall put_unused_fd(unsigned int fd) 904void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +927,12 @@ EXPORT_SYMBOL(put_unused_fd);
924void fastcall fd_install(unsigned int fd, struct file * file) 927void fastcall fd_install(unsigned int fd, struct file * file)
925{ 928{
926 struct files_struct *files = current->files; 929 struct files_struct *files = current->files;
930 struct fdtable *fdt;
927 spin_lock(&files->file_lock); 931 spin_lock(&files->file_lock);
928 if (unlikely(files->fd[fd] != NULL)) 932 fdt = files_fdtable(files);
933 if (unlikely(fdt->fd[fd] != NULL))
929 BUG(); 934 BUG();
930 files->fd[fd] = file; 935 fdt->fd[fd] = file;
931 spin_unlock(&files->file_lock); 936 spin_unlock(&files->file_lock);
932} 937}
933 938
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
1010{ 1015{
1011 struct file * filp; 1016 struct file * filp;
1012 struct files_struct *files = current->files; 1017 struct files_struct *files = current->files;
1018 struct fdtable *fdt;
1013 1019
1014 spin_lock(&files->file_lock); 1020 spin_lock(&files->file_lock);
1015 if (fd >= files->max_fds) 1021 fdt = files_fdtable(files);
1022 if (fd >= fdt->max_fds)
1016 goto out_unlock; 1023 goto out_unlock;
1017 filp = files->fd[fd]; 1024 filp = fdt->fd[fd];
1018 if (!filp) 1025 if (!filp)
1019 goto out_unlock; 1026 goto out_unlock;
1020 files->fd[fd] = NULL; 1027 fdt->fd[fd] = NULL;
1021 FD_CLR(fd, files->close_on_exec); 1028 FD_CLR(fd, fdt->close_on_exec);
1022 __put_unused_fd(files, fd); 1029 __put_unused_fd(files, fd);
1023 spin_unlock(&files->file_lock); 1030 spin_unlock(&files->file_lock);
1024 return filp_close(filp, files); 1031 return filp_close(filp, files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998ad..d88d518d30f6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
159{ 159{
160 struct group_info *group_info; 160 struct group_info *group_info;
161 int g; 161 int g;
162 struct fdtable *fdt = NULL;
162 163
163 read_lock(&tasklist_lock); 164 read_lock(&tasklist_lock);
164 buffer += sprintf(buffer, 165 buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
179 p->gid, p->egid, p->sgid, p->fsgid); 180 p->gid, p->egid, p->sgid, p->fsgid);
180 read_unlock(&tasklist_lock); 181 read_unlock(&tasklist_lock);
181 task_lock(p); 182 task_lock(p);
183 if (p->files)
184 fdt = files_fdtable(p->files);
182 buffer += sprintf(buffer, 185 buffer += sprintf(buffer,
183 "FDSize:\t%d\n" 186 "FDSize:\t%d\n"
184 "Groups:\t", 187 "Groups:\t",
185 p->files ? p->files->max_fds : 0); 188 fdt ? fdt->max_fds : 0);
186 189
187 group_info = p->group_info; 190 group_info = p->group_info;
188 get_group_info(group_info); 191 get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d5..d0087a0b024b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1039,6 +1039,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1039 int retval; 1039 int retval;
1040 char buf[NUMBUF]; 1040 char buf[NUMBUF];
1041 struct files_struct * files; 1041 struct files_struct * files;
1042 struct fdtable *fdt;
1042 1043
1043 retval = -ENOENT; 1044 retval = -ENOENT;
1044 if (!pid_alive(p)) 1045 if (!pid_alive(p))
@@ -1062,8 +1063,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1062 if (!files) 1063 if (!files)
1063 goto out; 1064 goto out;
1064 spin_lock(&files->file_lock); 1065 spin_lock(&files->file_lock);
1066 fdt = files_fdtable(files);
1065 for (fd = filp->f_pos-2; 1067 for (fd = filp->f_pos-2;
1066 fd < files->max_fds; 1068 fd < fdt->max_fds;
1067 fd++, filp->f_pos++) { 1069 fd++, filp->f_pos++) {
1068 unsigned int i,j; 1070 unsigned int i,j;
1069 1071
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0d..2e56325c73c4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -132,11 +132,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
132 unsigned long *open_fds; 132 unsigned long *open_fds;
133 unsigned long set; 133 unsigned long set;
134 int max; 134 int max;
135 struct fdtable *fdt;
135 136
136 /* handle last in-complete long-word first */ 137 /* handle last in-complete long-word first */
137 set = ~(~0UL << (n & (__NFDBITS-1))); 138 set = ~(~0UL << (n & (__NFDBITS-1)));
138 n /= __NFDBITS; 139 n /= __NFDBITS;
139 open_fds = current->files->open_fds->fds_bits+n; 140 fdt = files_fdtable(current->files);
141 open_fds = fdt->open_fds->fds_bits+n;
140 max = 0; 142 max = 0;
141 if (set) { 143 if (set) {
142 set &= BITS(fds, n); 144 set &= BITS(fds, n);
@@ -299,6 +301,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
299 char *bits; 301 char *bits;
300 long timeout; 302 long timeout;
301 int ret, size, max_fdset; 303 int ret, size, max_fdset;
304 struct fdtable *fdt;
302 305
303 timeout = MAX_SCHEDULE_TIMEOUT; 306 timeout = MAX_SCHEDULE_TIMEOUT;
304 if (tvp) { 307 if (tvp) {
@@ -326,7 +329,8 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
326 goto out_nofds; 329 goto out_nofds;
327 330
328 /* max_fdset can increase, so grab it once to avoid race */ 331 /* max_fdset can increase, so grab it once to avoid race */
329 max_fdset = current->files->max_fdset; 332 fdt = files_fdtable(current->files);
333 max_fdset = fdt->max_fdset;
330 if (n > max_fdset) 334 if (n > max_fdset)
331 n = max_fdset; 335 n = max_fdset;
332 336
@@ -464,9 +468,11 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
464 unsigned int i; 468 unsigned int i;
465 struct poll_list *head; 469 struct poll_list *head;
466 struct poll_list *walk; 470 struct poll_list *walk;
471 struct fdtable *fdt;
467 472
468 /* Do a sanity check on nfds ... */ 473 /* Do a sanity check on nfds ... */
469 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) 474 fdt = files_fdtable(current->files);
475 if (nfds > fdt->max_fdset && nfds > OPEN_MAX)
470 return -EINVAL; 476 return -EINVAL;
471 477
472 if (timeout) { 478 if (timeout) {
diff --git a/include/linux/file.h b/include/linux/file.h
index 5206beb9a80e..db372230848e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -16,23 +16,29 @@
16 */ 16 */
17#define NR_OPEN_DEFAULT BITS_PER_LONG 17#define NR_OPEN_DEFAULT BITS_PER_LONG
18 18
19struct fdtable {
20 unsigned int max_fds;
21 int max_fdset;
22 int next_fd;
23 struct file ** fd; /* current fd array */
24 fd_set *close_on_exec;
25 fd_set *open_fds;
26};
27
19/* 28/*
20 * Open file table structure 29 * Open file table structure
21 */ 30 */
22struct files_struct { 31struct files_struct {
23 atomic_t count; 32 atomic_t count;
24 spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ 33 spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
25 int max_fds; 34 struct fdtable fdtab;
26 int max_fdset;
27 int next_fd;
28 struct file ** fd; /* current fd array */
29 fd_set *close_on_exec;
30 fd_set *open_fds;
31 fd_set close_on_exec_init; 35 fd_set close_on_exec_init;
32 fd_set open_fds_init; 36 fd_set open_fds_init;
33 struct file * fd_array[NR_OPEN_DEFAULT]; 37 struct file * fd_array[NR_OPEN_DEFAULT];
34}; 38};
35 39
40#define files_fdtable(files) (&(files)->fdtab)
41
36extern void FASTCALL(__fput(struct file *)); 42extern void FASTCALL(__fput(struct file *));
37extern void FASTCALL(fput(struct file *)); 43extern void FASTCALL(fput(struct file *));
38 44
@@ -63,9 +69,10 @@ extern int expand_files(struct files_struct *, int nr);
63static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) 69static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
64{ 70{
65 struct file * file = NULL; 71 struct file * file = NULL;
72 struct fdtable *fdt = files_fdtable(files);
66 73
67 if (fd < files->max_fds) 74 if (fd < fdt->max_fds)
68 file = files->fd[fd]; 75 file = fdt->fd[fd];
69 return file; 76 return file;
70} 77}
71 78
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index c727c195a91a..94aefa54a1b5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,16 +3,21 @@
3 3
4#include <linux/file.h> 4#include <linux/file.h>
5 5
6#define INIT_FILES \ 6#define INIT_FDTABLE \
7{ \ 7{ \
8 .count = ATOMIC_INIT(1), \
9 .file_lock = SPIN_LOCK_UNLOCKED, \
10 .max_fds = NR_OPEN_DEFAULT, \ 8 .max_fds = NR_OPEN_DEFAULT, \
11 .max_fdset = __FD_SETSIZE, \ 9 .max_fdset = __FD_SETSIZE, \
12 .next_fd = 0, \ 10 .next_fd = 0, \
13 .fd = &init_files.fd_array[0], \ 11 .fd = &init_files.fd_array[0], \
14 .close_on_exec = &init_files.close_on_exec_init, \ 12 .close_on_exec = &init_files.close_on_exec_init, \
15 .open_fds = &init_files.open_fds_init, \ 13 .open_fds = &init_files.open_fds_init, \
14}
15
16#define INIT_FILES \
17{ \
18 .count = ATOMIC_INIT(1), \
19 .file_lock = SPIN_LOCK_UNLOCKED, \
20 .fdtab = INIT_FDTABLE, \
16 .close_on_exec_init = { { 0, } }, \ 21 .close_on_exec_init = { { 0, } }, \
17 .open_fds_init = { { 0, } }, \ 22 .open_fds_init = { { 0, } }, \
18 .fd_array = { NULL, } \ 23 .fd_array = { NULL, } \
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b0fb9f09f21..83beb1e93b18 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -368,17 +368,19 @@ EXPORT_SYMBOL(daemonize);
368static inline void close_files(struct files_struct * files) 368static inline void close_files(struct files_struct * files)
369{ 369{
370 int i, j; 370 int i, j;
371 struct fdtable *fdt;
371 372
372 j = 0; 373 j = 0;
374 fdt = files_fdtable(files);
373 for (;;) { 375 for (;;) {
374 unsigned long set; 376 unsigned long set;
375 i = j * __NFDBITS; 377 i = j * __NFDBITS;
376 if (i >= files->max_fdset || i >= files->max_fds) 378 if (i >= fdt->max_fdset || i >= fdt->max_fds)
377 break; 379 break;
378 set = files->open_fds->fds_bits[j++]; 380 set = fdt->open_fds->fds_bits[j++];
379 while (set) { 381 while (set) {
380 if (set & 1) { 382 if (set & 1) {
381 struct file * file = xchg(&files->fd[i], NULL); 383 struct file * file = xchg(&fdt->fd[i], NULL);
382 if (file) 384 if (file)
383 filp_close(file, files); 385 filp_close(file, files);
384 } 386 }
@@ -403,16 +405,19 @@ struct files_struct *get_files_struct(struct task_struct *task)
403 405
404void fastcall put_files_struct(struct files_struct *files) 406void fastcall put_files_struct(struct files_struct *files)
405{ 407{
408 struct fdtable *fdt;
409
406 if (atomic_dec_and_test(&files->count)) { 410 if (atomic_dec_and_test(&files->count)) {
407 close_files(files); 411 close_files(files);
408 /* 412 /*
409 * Free the fd and fdset arrays if we expanded them. 413 * Free the fd and fdset arrays if we expanded them.
410 */ 414 */
411 if (files->fd != &files->fd_array[0]) 415 fdt = files_fdtable(files);
412 free_fd_array(files->fd, files->max_fds); 416 if (fdt->fd != &files->fd_array[0])
413 if (files->max_fdset > __FD_SETSIZE) { 417 free_fd_array(fdt->fd, fdt->max_fds);
414 free_fdset(files->open_fds, files->max_fdset); 418 if (fdt->max_fdset > __FD_SETSIZE) {
415 free_fdset(files->close_on_exec, files->max_fdset); 419 free_fdset(fdt->open_fds, fdt->max_fdset);
420 free_fdset(fdt->close_on_exec, fdt->max_fdset);
416 } 421 }
417 kmem_cache_free(files_cachep, files); 422 kmem_cache_free(files_cachep, files);
418 } 423 }
diff --git a/kernel/fork.c b/kernel/fork.c
index b25802065031..ecc694debb50 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -568,21 +568,47 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
568static int count_open_files(struct files_struct *files, int size) 568static int count_open_files(struct files_struct *files, int size)
569{ 569{
570 int i; 570 int i;
571 struct fdtable *fdt;
571 572
572 /* Find the last open fd */ 573 /* Find the last open fd */
574 fdt = files_fdtable(files);
573 for (i = size/(8*sizeof(long)); i > 0; ) { 575 for (i = size/(8*sizeof(long)); i > 0; ) {
574 if (files->open_fds->fds_bits[--i]) 576 if (fdt->open_fds->fds_bits[--i])
575 break; 577 break;
576 } 578 }
577 i = (i+1) * 8 * sizeof(long); 579 i = (i+1) * 8 * sizeof(long);
578 return i; 580 return i;
579} 581}
580 582
583static struct files_struct *alloc_files(void)
584{
585 struct files_struct *newf;
586 struct fdtable *fdt;
587
588 newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
589 if (!newf)
590 goto out;
591
592 atomic_set(&newf->count, 1);
593
594 spin_lock_init(&newf->file_lock);
595 fdt = files_fdtable(newf);
596 fdt->next_fd = 0;
597 fdt->max_fds = NR_OPEN_DEFAULT;
598 fdt->max_fdset = __FD_SETSIZE;
599 fdt->close_on_exec = &newf->close_on_exec_init;
600 fdt->open_fds = &newf->open_fds_init;
601 fdt->fd = &newf->fd_array[0];
602out:
603 return newf;
604}
605
581static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 606static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
582{ 607{
583 struct files_struct *oldf, *newf; 608 struct files_struct *oldf, *newf;
584 struct file **old_fds, **new_fds; 609 struct file **old_fds, **new_fds;
585 int open_files, size, i, error = 0, expand; 610 int open_files, size, i, error = 0, expand;
611 struct fdtable *old_fdt, *new_fdt;
586 612
587 /* 613 /*
588 * A background process may not have any files ... 614 * A background process may not have any files ...
@@ -603,35 +629,27 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
603 */ 629 */
604 tsk->files = NULL; 630 tsk->files = NULL;
605 error = -ENOMEM; 631 error = -ENOMEM;
606 newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); 632 newf = alloc_files();
607 if (!newf) 633 if (!newf)
608 goto out; 634 goto out;
609 635
610 atomic_set(&newf->count, 1);
611
612 spin_lock_init(&newf->file_lock);
613 newf->next_fd = 0;
614 newf->max_fds = NR_OPEN_DEFAULT;
615 newf->max_fdset = __FD_SETSIZE;
616 newf->close_on_exec = &newf->close_on_exec_init;
617 newf->open_fds = &newf->open_fds_init;
618 newf->fd = &newf->fd_array[0];
619
620 spin_lock(&oldf->file_lock); 636 spin_lock(&oldf->file_lock);
621 637 old_fdt = files_fdtable(oldf);
622 open_files = count_open_files(oldf, oldf->max_fdset); 638 new_fdt = files_fdtable(newf);
639 size = old_fdt->max_fdset;
640 open_files = count_open_files(oldf, old_fdt->max_fdset);
623 expand = 0; 641 expand = 0;
624 642
625 /* 643 /*
626 * Check whether we need to allocate a larger fd array or fd set. 644 * Check whether we need to allocate a larger fd array or fd set.
627 * Note: we're not a clone task, so the open count won't change. 645 * Note: we're not a clone task, so the open count won't change.
628 */ 646 */
629 if (open_files > newf->max_fdset) { 647 if (open_files > new_fdt->max_fdset) {
630 newf->max_fdset = 0; 648 new_fdt->max_fdset = 0;
631 expand = 1; 649 expand = 1;
632 } 650 }
633 if (open_files > newf->max_fds) { 651 if (open_files > new_fdt->max_fds) {
634 newf->max_fds = 0; 652 new_fdt->max_fds = 0;
635 expand = 1; 653 expand = 1;
636 } 654 }
637 655
@@ -646,11 +664,11 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
646 spin_lock(&oldf->file_lock); 664 spin_lock(&oldf->file_lock);
647 } 665 }
648 666
649 old_fds = oldf->fd; 667 old_fds = old_fdt->fd;
650 new_fds = newf->fd; 668 new_fds = new_fdt->fd;
651 669
652 memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); 670 memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
653 memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); 671 memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
654 672
655 for (i = open_files; i != 0; i--) { 673 for (i = open_files; i != 0; i--) {
656 struct file *f = *old_fds++; 674 struct file *f = *old_fds++;
@@ -663,24 +681,24 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
663 * is partway through open(). So make sure that this 681 * is partway through open(). So make sure that this
664 * fd is available to the new process. 682 * fd is available to the new process.
665 */ 683 */
666 FD_CLR(open_files - i, newf->open_fds); 684 FD_CLR(open_files - i, new_fdt->open_fds);
667 } 685 }
668 *new_fds++ = f; 686 *new_fds++ = f;
669 } 687 }
670 spin_unlock(&oldf->file_lock); 688 spin_unlock(&oldf->file_lock);
671 689
672 /* compute the remainder to be cleared */ 690 /* compute the remainder to be cleared */
673 size = (newf->max_fds - open_files) * sizeof(struct file *); 691 size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
674 692
675 /* This is long word aligned thus could use a optimized version */ 693 /* This is long word aligned thus could use a optimized version */
676 memset(new_fds, 0, size); 694 memset(new_fds, 0, size);
677 695
678 if (newf->max_fdset > open_files) { 696 if (new_fdt->max_fdset > open_files) {
679 int left = (newf->max_fdset-open_files)/8; 697 int left = (new_fdt->max_fdset-open_files)/8;
680 int start = open_files / (8 * sizeof(unsigned long)); 698 int start = open_files / (8 * sizeof(unsigned long));
681 699
682 memset(&newf->open_fds->fds_bits[start], 0, left); 700 memset(&new_fdt->open_fds->fds_bits[start], 0, left);
683 memset(&newf->close_on_exec->fds_bits[start], 0, left); 701 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
684 } 702 }
685 703
686 tsk->files = newf; 704 tsk->files = newf;
@@ -689,9 +707,9 @@ out:
689 return error; 707 return error;
690 708
691out_release: 709out_release:
692 free_fdset (newf->close_on_exec, newf->max_fdset); 710 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
693 free_fdset (newf->open_fds, newf->max_fdset); 711 free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
694 free_fd_array(newf->fd, newf->max_fds); 712 free_fd_array(new_fdt->fd, new_fdt->max_fds);
695 kmem_cache_free(files_cachep, newf); 713 kmem_cache_free(files_cachep, newf);
696 goto out; 714 goto out;
697} 715}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3f0b533be92c..acb5a495a902 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1594,6 +1594,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1594 struct avc_audit_data ad; 1594 struct avc_audit_data ad;
1595 struct file *file, *devnull = NULL; 1595 struct file *file, *devnull = NULL;
1596 struct tty_struct *tty = current->signal->tty; 1596 struct tty_struct *tty = current->signal->tty;
1597 struct fdtable *fdt;
1597 long j = -1; 1598 long j = -1;
1598 1599
1599 if (tty) { 1600 if (tty) {
@@ -1627,9 +1628,10 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1627 1628
1628 j++; 1629 j++;
1629 i = j * __NFDBITS; 1630 i = j * __NFDBITS;
1630 if (i >= files->max_fds || i >= files->max_fdset) 1631 fdt = files_fdtable(files);
1632 if (i >= fdt->max_fds || i >= fdt->max_fdset)
1631 break; 1633 break;
1632 set = files->open_fds->fds_bits[j]; 1634 set = fdt->open_fds->fds_bits[j];
1633 if (!set) 1635 if (!set)
1634 continue; 1636 continue;
1635 spin_unlock(&files->file_lock); 1637 spin_unlock(&files->file_lock);