aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fcntl.c9
-rw-r--r--fs/file.c34
-rw-r--r--fs/open.c8
-rw-r--r--include/linux/file.h28
-rw-r--r--include/linux/init_task.h10
-rw-r--r--kernel/fork.c8
6 files changed, 55 insertions, 42 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index dc4a7007f4e7..03c789560fb8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -73,8 +73,8 @@ repeat:
73 * orig_start..fdt->next_fd 73 * orig_start..fdt->next_fd
74 */ 74 */
75 start = orig_start; 75 start = orig_start;
76 if (start < fdt->next_fd) 76 if (start < files->next_fd)
77 start = fdt->next_fd; 77 start = files->next_fd;
78 78
79 newfd = start; 79 newfd = start;
80 if (start < fdt->max_fdset) { 80 if (start < fdt->max_fdset) {
@@ -102,9 +102,8 @@ repeat:
102 * we reacquire the fdtable pointer and use it while holding 102 * we reacquire the fdtable pointer and use it while holding
103 * the lock, no one can free it during that time. 103 * the lock, no one can free it during that time.
104 */ 104 */
105 fdt = files_fdtable(files); 105 if (start <= files->next_fd)
106 if (start <= fdt->next_fd) 106 files->next_fd = newfd + 1;
107 fdt->next_fd = newfd + 1;
108 107
109 error = newfd; 108 error = newfd;
110 109
diff --git a/fs/file.c b/fs/file.c
index cea7cbea11d0..bbc743314730 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
125 kmem_cache_free(files_cachep, fdt->free_files); 125 kmem_cache_free(files_cachep, fdt->free_files);
126 return; 126 return;
127 } 127 }
128 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { 128 if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
129 fdt->max_fds <= NR_OPEN_DEFAULT) {
129 /* 130 /*
130 * The fdtable was embedded 131 * The fdtable was embedded
131 */ 132 */
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
155 156
156void free_fdtable(struct fdtable *fdt) 157void free_fdtable(struct fdtable *fdt)
157{ 158{
158 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || 159 if (fdt->free_files ||
159 fdt->max_fds > NR_OPEN_DEFAULT) 160 fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
161 fdt->max_fds > NR_OPEN_DEFAULT)
160 call_rcu(&fdt->rcu, free_fdtable_rcu); 162 call_rcu(&fdt->rcu, free_fdtable_rcu);
161} 163}
162 164
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
199 (nfdt->max_fds - fdt->max_fds) * 201 (nfdt->max_fds - fdt->max_fds) *
200 sizeof(struct file *)); 202 sizeof(struct file *));
201 } 203 }
202 nfdt->next_fd = fdt->next_fd;
203} 204}
204 205
205/* 206/*
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num)
220 221
221void free_fdset(fd_set *array, int num) 222void free_fdset(fd_set *array, int num)
222{ 223{
223 int size = num / 8; 224 if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
224
225 if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
226 return; 225 return;
227 else if (size <= PAGE_SIZE) 226 else if (num <= 8 * PAGE_SIZE)
228 kfree(array); 227 kfree(array);
229 else 228 else
230 vfree(array); 229 vfree(array);
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr)
237 fd_set *new_openset = NULL, *new_execset = NULL; 236 fd_set *new_openset = NULL, *new_execset = NULL;
238 struct file **new_fds; 237 struct file **new_fds;
239 238
240 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); 239 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
241 if (!fdt) 240 if (!fdt)
242 goto out; 241 goto out;
243 memset(fdt, 0, sizeof(*fdt));
244 242
245 nfds = __FD_SETSIZE; 243 nfds = 8 * L1_CACHE_BYTES;
246 /* Expand to the max in easy steps */ 244 /* Expand to the max in easy steps */
247 do { 245 while (nfds <= nr) {
248 if (nfds < (PAGE_SIZE * 8)) 246 nfds = nfds * 2;
249 nfds = PAGE_SIZE * 8; 247 if (nfds > NR_OPEN)
250 else { 248 nfds = NR_OPEN;
251 nfds = nfds * 2; 249 }
252 if (nfds > NR_OPEN)
253 nfds = NR_OPEN;
254 }
255 } while (nfds <= nr);
256 250
257 new_openset = alloc_fdset(nfds); 251 new_openset = alloc_fdset(nfds);
258 new_execset = alloc_fdset(nfds); 252 new_execset = alloc_fdset(nfds);
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e77..1091dadd6c38 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -973,7 +973,7 @@ repeat:
973 fdt = files_fdtable(files); 973 fdt = files_fdtable(files);
974 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 974 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
975 fdt->max_fdset, 975 fdt->max_fdset,
976 fdt->next_fd); 976 files->next_fd);
977 977
978 /* 978 /*
979 * N.B. For clone tasks sharing a files structure, this test 979 * N.B. For clone tasks sharing a files structure, this test
@@ -998,7 +998,7 @@ repeat:
998 998
999 FD_SET(fd, fdt->open_fds); 999 FD_SET(fd, fdt->open_fds);
1000 FD_CLR(fd, fdt->close_on_exec); 1000 FD_CLR(fd, fdt->close_on_exec);
1001 fdt->next_fd = fd + 1; 1001 files->next_fd = fd + 1;
1002#if 1 1002#if 1
1003 /* Sanity check */ 1003 /* Sanity check */
1004 if (fdt->fd[fd] != NULL) { 1004 if (fdt->fd[fd] != NULL) {
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1019{ 1019{
1020 struct fdtable *fdt = files_fdtable(files); 1020 struct fdtable *fdt = files_fdtable(files);
1021 __FD_CLR(fd, fdt->open_fds); 1021 __FD_CLR(fd, fdt->open_fds);
1022 if (fd < fdt->next_fd) 1022 if (fd < files->next_fd)
1023 fdt->next_fd = fd; 1023 files->next_fd = fd;
1024} 1024}
1025 1025
1026void fastcall put_unused_fd(unsigned int fd) 1026void fastcall put_unused_fd(unsigned int fd)
diff --git a/include/linux/file.h b/include/linux/file.h
index 9901b850f2e4..9f7c2513866f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -10,6 +10,7 @@
10#include <linux/compiler.h> 10#include <linux/compiler.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12#include <linux/rcupdate.h> 12#include <linux/rcupdate.h>
13#include <linux/types.h>
13 14
14/* 15/*
15 * The default fd array needs to be at least BITS_PER_LONG, 16 * The default fd array needs to be at least BITS_PER_LONG,
@@ -17,10 +18,22 @@
17 */ 18 */
18#define NR_OPEN_DEFAULT BITS_PER_LONG 19#define NR_OPEN_DEFAULT BITS_PER_LONG
19 20
21/*
22 * The embedded_fd_set is a small fd_set,
23 * suitable for most tasks (which open <= BITS_PER_LONG files)
24 */
25struct embedded_fd_set {
26 unsigned long fds_bits[1];
27};
28
29/*
30 * More than this number of fds: we use a separately allocated fd_set
31 */
32#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
33
20struct fdtable { 34struct fdtable {
21 unsigned int max_fds; 35 unsigned int max_fds;
22 int max_fdset; 36 int max_fdset;
23 int next_fd;
24 struct file ** fd; /* current fd array */ 37 struct file ** fd; /* current fd array */
25 fd_set *close_on_exec; 38 fd_set *close_on_exec;
26 fd_set *open_fds; 39 fd_set *open_fds;
@@ -33,13 +46,20 @@ struct fdtable {
33 * Open file table structure 46 * Open file table structure
34 */ 47 */
35struct files_struct { 48struct files_struct {
49 /*
50 * read mostly part
51 */
36 atomic_t count; 52 atomic_t count;
37 struct fdtable *fdt; 53 struct fdtable *fdt;
38 struct fdtable fdtab; 54 struct fdtable fdtab;
39 fd_set close_on_exec_init; 55 /*
40 fd_set open_fds_init; 56 * written part on a separate cache line in SMP
57 */
58 spinlock_t file_lock ____cacheline_aligned_in_smp;
59 int next_fd;
60 struct embedded_fd_set close_on_exec_init;
61 struct embedded_fd_set open_fds_init;
41 struct file * fd_array[NR_OPEN_DEFAULT]; 62 struct file * fd_array[NR_OPEN_DEFAULT];
42 spinlock_t file_lock; /* Protects concurrent writers. Nests inside tsk->alloc_lock */
43}; 63};
44 64
45#define files_fdtable(files) (rcu_dereference((files)->fdt)) 65#define files_fdtable(files) (rcu_dereference((files)->fdt))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfd2ecccb5d..92146f3b7423 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -7,11 +7,10 @@
7#define INIT_FDTABLE \ 7#define INIT_FDTABLE \
8{ \ 8{ \
9 .max_fds = NR_OPEN_DEFAULT, \ 9 .max_fds = NR_OPEN_DEFAULT, \
10 .max_fdset = __FD_SETSIZE, \ 10 .max_fdset = EMBEDDED_FD_SET_SIZE, \
11 .next_fd = 0, \
12 .fd = &init_files.fd_array[0], \ 11 .fd = &init_files.fd_array[0], \
13 .close_on_exec = &init_files.close_on_exec_init, \ 12 .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \
14 .open_fds = &init_files.open_fds_init, \ 13 .open_fds = (fd_set *)&init_files.open_fds_init, \
15 .rcu = RCU_HEAD_INIT, \ 14 .rcu = RCU_HEAD_INIT, \
16 .free_files = NULL, \ 15 .free_files = NULL, \
17 .next = NULL, \ 16 .next = NULL, \
@@ -20,9 +19,10 @@
20#define INIT_FILES \ 19#define INIT_FILES \
21{ \ 20{ \
22 .count = ATOMIC_INIT(1), \ 21 .count = ATOMIC_INIT(1), \
23 .file_lock = SPIN_LOCK_UNLOCKED, \
24 .fdt = &init_files.fdtab, \ 22 .fdt = &init_files.fdtab, \
25 .fdtab = INIT_FDTABLE, \ 23 .fdtab = INIT_FDTABLE, \
24 .file_lock = SPIN_LOCK_UNLOCKED, \
25 .next_fd = 0, \
26 .close_on_exec_init = { { 0, } }, \ 26 .close_on_exec_init = { { 0, } }, \
27 .open_fds_init = { { 0, } }, \ 27 .open_fds_init = { { 0, } }, \
28 .fd_array = { NULL, } \ 28 .fd_array = { NULL, } \
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd7b65ee418..c79ae0b19a49 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -607,12 +607,12 @@ static struct files_struct *alloc_files(void)
607 atomic_set(&newf->count, 1); 607 atomic_set(&newf->count, 1);
608 608
609 spin_lock_init(&newf->file_lock); 609 spin_lock_init(&newf->file_lock);
610 newf->next_fd = 0;
610 fdt = &newf->fdtab; 611 fdt = &newf->fdtab;
611 fdt->next_fd = 0;
612 fdt->max_fds = NR_OPEN_DEFAULT; 612 fdt->max_fds = NR_OPEN_DEFAULT;
613 fdt->max_fdset = __FD_SETSIZE; 613 fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
614 fdt->close_on_exec = &newf->close_on_exec_init; 614 fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
615 fdt->open_fds = &newf->open_fds_init; 615 fdt->open_fds = (fd_set *)&newf->open_fds_init;
616 fdt->fd = &newf->fd_array[0]; 616 fdt->fd = &newf->fd_array[0];
617 INIT_RCU_HEAD(&fdt->rcu); 617 INIT_RCU_HEAD(&fdt->rcu);
618 fdt->free_files = NULL; 618 fdt->free_files = NULL;