diff options
-rw-r--r-- | fs/fcntl.c | 9 | ||||
-rw-r--r-- | fs/file.c | 34 | ||||
-rw-r--r-- | fs/open.c | 8 | ||||
-rw-r--r-- | include/linux/file.h | 28 | ||||
-rw-r--r-- | include/linux/init_task.h | 10 | ||||
-rw-r--r-- | kernel/fork.c | 8 |
6 files changed, 55 insertions, 42 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c index dc4a7007f4e7..03c789560fb8 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -73,8 +73,8 @@ repeat: | |||
73 | * orig_start..fdt->next_fd | 73 | * orig_start..fdt->next_fd |
74 | */ | 74 | */ |
75 | start = orig_start; | 75 | start = orig_start; |
76 | if (start < fdt->next_fd) | 76 | if (start < files->next_fd) |
77 | start = fdt->next_fd; | 77 | start = files->next_fd; |
78 | 78 | ||
79 | newfd = start; | 79 | newfd = start; |
80 | if (start < fdt->max_fdset) { | 80 | if (start < fdt->max_fdset) { |
@@ -102,9 +102,8 @@ repeat: | |||
102 | * we reacquire the fdtable pointer and use it while holding | 102 | * we reacquire the fdtable pointer and use it while holding |
103 | * the lock, no one can free it during that time. | 103 | * the lock, no one can free it during that time. |
104 | */ | 104 | */ |
105 | fdt = files_fdtable(files); | 105 | if (start <= files->next_fd) |
106 | if (start <= fdt->next_fd) | 106 | files->next_fd = newfd + 1; |
107 | fdt->next_fd = newfd + 1; | ||
108 | 107 | ||
109 | error = newfd; | 108 | error = newfd; |
110 | 109 | ||
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu) | |||
125 | kmem_cache_free(files_cachep, fdt->free_files); | 125 | kmem_cache_free(files_cachep, fdt->free_files); |
126 | return; | 126 | return; |
127 | } | 127 | } |
128 | if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { | 128 | if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE && |
129 | fdt->max_fds <= NR_OPEN_DEFAULT) { | ||
129 | /* | 130 | /* |
130 | * The fdtable was embedded | 131 | * The fdtable was embedded |
131 | */ | 132 | */ |
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu) | |||
155 | 156 | ||
156 | void free_fdtable(struct fdtable *fdt) | 157 | void free_fdtable(struct fdtable *fdt) |
157 | { | 158 | { |
158 | if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || | 159 | if (fdt->free_files || |
159 | fdt->max_fds > NR_OPEN_DEFAULT) | 160 | fdt->max_fdset > EMBEDDED_FD_SET_SIZE || |
161 | fdt->max_fds > NR_OPEN_DEFAULT) | ||
160 | call_rcu(&fdt->rcu, free_fdtable_rcu); | 162 | call_rcu(&fdt->rcu, free_fdtable_rcu); |
161 | } | 163 | } |
162 | 164 | ||
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) | |||
199 | (nfdt->max_fds - fdt->max_fds) * | 201 | (nfdt->max_fds - fdt->max_fds) * |
200 | sizeof(struct file *)); | 202 | sizeof(struct file *)); |
201 | } | 203 | } |
202 | nfdt->next_fd = fdt->next_fd; | ||
203 | } | 204 | } |
204 | 205 | ||
205 | /* | 206 | /* |
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num) | |||
220 | 221 | ||
221 | void free_fdset(fd_set *array, int num) | 222 | void free_fdset(fd_set *array, int num) |
222 | { | 223 | { |
223 | int size = num / 8; | 224 | if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */ |
224 | |||
225 | if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */ | ||
226 | return; | 225 | return; |
227 | else if (size <= PAGE_SIZE) | 226 | else if (num <= 8 * PAGE_SIZE) |
228 | kfree(array); | 227 | kfree(array); |
229 | else | 228 | else |
230 | vfree(array); | 229 | vfree(array); |
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr) | |||
237 | fd_set *new_openset = NULL, *new_execset = NULL; | 236 | fd_set *new_openset = NULL, *new_execset = NULL; |
238 | struct file **new_fds; | 237 | struct file **new_fds; |
239 | 238 | ||
240 | fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); | 239 | fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); |
241 | if (!fdt) | 240 | if (!fdt) |
242 | goto out; | 241 | goto out; |
243 | memset(fdt, 0, sizeof(*fdt)); | ||
244 | 242 | ||
245 | nfds = __FD_SETSIZE; | 243 | nfds = 8 * L1_CACHE_BYTES; |
246 | /* Expand to the max in easy steps */ | 244 | /* Expand to the max in easy steps */ |
247 | do { | 245 | while (nfds <= nr) { |
248 | if (nfds < (PAGE_SIZE * 8)) | 246 | nfds = nfds * 2; |
249 | nfds = PAGE_SIZE * 8; | 247 | if (nfds > NR_OPEN) |
250 | else { | 248 | nfds = NR_OPEN; |
251 | nfds = nfds * 2; | 249 | } |
252 | if (nfds > NR_OPEN) | ||
253 | nfds = NR_OPEN; | ||
254 | } | ||
255 | } while (nfds <= nr); | ||
256 | 250 | ||
257 | new_openset = alloc_fdset(nfds); | 251 | new_openset = alloc_fdset(nfds); |
258 | new_execset = alloc_fdset(nfds); | 252 | new_execset = alloc_fdset(nfds); |
@@ -973,7 +973,7 @@ repeat: | |||
973 | fdt = files_fdtable(files); | 973 | fdt = files_fdtable(files); |
974 | fd = find_next_zero_bit(fdt->open_fds->fds_bits, | 974 | fd = find_next_zero_bit(fdt->open_fds->fds_bits, |
975 | fdt->max_fdset, | 975 | fdt->max_fdset, |
976 | fdt->next_fd); | 976 | files->next_fd); |
977 | 977 | ||
978 | /* | 978 | /* |
979 | * N.B. For clone tasks sharing a files structure, this test | 979 | * N.B. For clone tasks sharing a files structure, this test |
@@ -998,7 +998,7 @@ repeat: | |||
998 | 998 | ||
999 | FD_SET(fd, fdt->open_fds); | 999 | FD_SET(fd, fdt->open_fds); |
1000 | FD_CLR(fd, fdt->close_on_exec); | 1000 | FD_CLR(fd, fdt->close_on_exec); |
1001 | fdt->next_fd = fd + 1; | 1001 | files->next_fd = fd + 1; |
1002 | #if 1 | 1002 | #if 1 |
1003 | /* Sanity check */ | 1003 | /* Sanity check */ |
1004 | if (fdt->fd[fd] != NULL) { | 1004 | if (fdt->fd[fd] != NULL) { |
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) | |||
1019 | { | 1019 | { |
1020 | struct fdtable *fdt = files_fdtable(files); | 1020 | struct fdtable *fdt = files_fdtable(files); |
1021 | __FD_CLR(fd, fdt->open_fds); | 1021 | __FD_CLR(fd, fdt->open_fds); |
1022 | if (fd < fdt->next_fd) | 1022 | if (fd < files->next_fd) |
1023 | fdt->next_fd = fd; | 1023 | files->next_fd = fd; |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | void fastcall put_unused_fd(unsigned int fd) | 1026 | void fastcall put_unused_fd(unsigned int fd) |
diff --git a/include/linux/file.h b/include/linux/file.h index 9901b850f2e4..9f7c2513866f 100644 --- a/include/linux/file.h +++ b/include/linux/file.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/compiler.h> | 10 | #include <linux/compiler.h> |
11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
12 | #include <linux/rcupdate.h> | 12 | #include <linux/rcupdate.h> |
13 | #include <linux/types.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * The default fd array needs to be at least BITS_PER_LONG, | 16 | * The default fd array needs to be at least BITS_PER_LONG, |
@@ -17,10 +18,22 @@ | |||
17 | */ | 18 | */ |
18 | #define NR_OPEN_DEFAULT BITS_PER_LONG | 19 | #define NR_OPEN_DEFAULT BITS_PER_LONG |
19 | 20 | ||
21 | /* | ||
22 | * The embedded_fd_set is a small fd_set, | ||
23 | * suitable for most tasks (which open <= BITS_PER_LONG files) | ||
24 | */ | ||
25 | struct embedded_fd_set { | ||
26 | unsigned long fds_bits[1]; | ||
27 | }; | ||
28 | |||
29 | /* | ||
30 | * More than this number of fds: we use a separately allocated fd_set | ||
31 | */ | ||
32 | #define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set)) | ||
33 | |||
20 | struct fdtable { | 34 | struct fdtable { |
21 | unsigned int max_fds; | 35 | unsigned int max_fds; |
22 | int max_fdset; | 36 | int max_fdset; |
23 | int next_fd; | ||
24 | struct file ** fd; /* current fd array */ | 37 | struct file ** fd; /* current fd array */ |
25 | fd_set *close_on_exec; | 38 | fd_set *close_on_exec; |
26 | fd_set *open_fds; | 39 | fd_set *open_fds; |
@@ -33,13 +46,20 @@ struct fdtable { | |||
33 | * Open file table structure | 46 | * Open file table structure |
34 | */ | 47 | */ |
35 | struct files_struct { | 48 | struct files_struct { |
49 | /* | ||
50 | * read mostly part | ||
51 | */ | ||
36 | atomic_t count; | 52 | atomic_t count; |
37 | struct fdtable *fdt; | 53 | struct fdtable *fdt; |
38 | struct fdtable fdtab; | 54 | struct fdtable fdtab; |
39 | fd_set close_on_exec_init; | 55 | /* |
40 | fd_set open_fds_init; | 56 | * written part on a separate cache line in SMP |
57 | */ | ||
58 | spinlock_t file_lock ____cacheline_aligned_in_smp; | ||
59 | int next_fd; | ||
60 | struct embedded_fd_set close_on_exec_init; | ||
61 | struct embedded_fd_set open_fds_init; | ||
41 | struct file * fd_array[NR_OPEN_DEFAULT]; | 62 | struct file * fd_array[NR_OPEN_DEFAULT]; |
42 | spinlock_t file_lock; /* Protects concurrent writers. Nests inside tsk->alloc_lock */ | ||
43 | }; | 63 | }; |
44 | 64 | ||
45 | #define files_fdtable(files) (rcu_dereference((files)->fdt)) | 65 | #define files_fdtable(files) (rcu_dereference((files)->fdt)) |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dcfd2ecccb5d..92146f3b7423 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -7,11 +7,10 @@ | |||
7 | #define INIT_FDTABLE \ | 7 | #define INIT_FDTABLE \ |
8 | { \ | 8 | { \ |
9 | .max_fds = NR_OPEN_DEFAULT, \ | 9 | .max_fds = NR_OPEN_DEFAULT, \ |
10 | .max_fdset = __FD_SETSIZE, \ | 10 | .max_fdset = EMBEDDED_FD_SET_SIZE, \ |
11 | .next_fd = 0, \ | ||
12 | .fd = &init_files.fd_array[0], \ | 11 | .fd = &init_files.fd_array[0], \ |
13 | .close_on_exec = &init_files.close_on_exec_init, \ | 12 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \ |
14 | .open_fds = &init_files.open_fds_init, \ | 13 | .open_fds = (fd_set *)&init_files.open_fds_init, \ |
15 | .rcu = RCU_HEAD_INIT, \ | 14 | .rcu = RCU_HEAD_INIT, \ |
16 | .free_files = NULL, \ | 15 | .free_files = NULL, \ |
17 | .next = NULL, \ | 16 | .next = NULL, \ |
@@ -20,9 +19,10 @@ | |||
20 | #define INIT_FILES \ | 19 | #define INIT_FILES \ |
21 | { \ | 20 | { \ |
22 | .count = ATOMIC_INIT(1), \ | 21 | .count = ATOMIC_INIT(1), \ |
23 | .file_lock = SPIN_LOCK_UNLOCKED, \ | ||
24 | .fdt = &init_files.fdtab, \ | 22 | .fdt = &init_files.fdtab, \ |
25 | .fdtab = INIT_FDTABLE, \ | 23 | .fdtab = INIT_FDTABLE, \ |
24 | .file_lock = SPIN_LOCK_UNLOCKED, \ | ||
25 | .next_fd = 0, \ | ||
26 | .close_on_exec_init = { { 0, } }, \ | 26 | .close_on_exec_init = { { 0, } }, \ |
27 | .open_fds_init = { { 0, } }, \ | 27 | .open_fds_init = { { 0, } }, \ |
28 | .fd_array = { NULL, } \ | 28 | .fd_array = { NULL, } \ |
diff --git a/kernel/fork.c b/kernel/fork.c index 9bd7b65ee418..c79ae0b19a49 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -607,12 +607,12 @@ static struct files_struct *alloc_files(void) | |||
607 | atomic_set(&newf->count, 1); | 607 | atomic_set(&newf->count, 1); |
608 | 608 | ||
609 | spin_lock_init(&newf->file_lock); | 609 | spin_lock_init(&newf->file_lock); |
610 | newf->next_fd = 0; | ||
610 | fdt = &newf->fdtab; | 611 | fdt = &newf->fdtab; |
611 | fdt->next_fd = 0; | ||
612 | fdt->max_fds = NR_OPEN_DEFAULT; | 612 | fdt->max_fds = NR_OPEN_DEFAULT; |
613 | fdt->max_fdset = __FD_SETSIZE; | 613 | fdt->max_fdset = EMBEDDED_FD_SET_SIZE; |
614 | fdt->close_on_exec = &newf->close_on_exec_init; | 614 | fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; |
615 | fdt->open_fds = &newf->open_fds_init; | 615 | fdt->open_fds = (fd_set *)&newf->open_fds_init; |
616 | fdt->fd = &newf->fd_array[0]; | 616 | fdt->fd = &newf->fd_array[0]; |
617 | INIT_RCU_HEAD(&fdt->rcu); | 617 | INIT_RCU_HEAD(&fdt->rcu); |
618 | fdt->free_files = NULL; | 618 | fdt->free_files = NULL; |