aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2006-03-23 06:00:12 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-23 10:38:09 -0500
commit0c9e63fd38a2fb2181668a0cdd622a3c23cfd567 (patch)
tree8fdb91603347b1da2e83a095ebcaab44b2c3c237
parentd8733c2956968a01394a4d2a9e97a8b431a78776 (diff)
[PATCH] Shrinks sizeof(files_struct) and better layout
1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits platforms, lowering kmalloc() allocated space by 50%. 2) Reduce the size of (files_struct), using a special 32 bits (or 64bits) embedded_fd_set, instead of a 1024 bits fd_set for the close_on_exec_init and open_fds_init fields. This save some ram (248 bytes per task) as most tasks dont open more than 32 files. D-Cache footprint for such tasks is also reduced to the minimum. 3) Reduce size of allocated fdset. Currently two full pages are allocated, that is 32768 bits on x86 for example, and way too much. The minimum is now L1_CACHE_BYTES. UP and SMP should benefit from this patch, because most tasks will touch only one cache line when open()/close() stdin/stdout/stderr (0/1/2), (next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the same cache line) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/fcntl.c9
-rw-r--r--fs/file.c34
-rw-r--r--fs/open.c8
-rw-r--r--include/linux/file.h28
-rw-r--r--include/linux/init_task.h10
-rw-r--r--kernel/fork.c8
6 files changed, 55 insertions, 42 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index dc4a7007f4e7..03c789560fb8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -73,8 +73,8 @@ repeat:
73 * orig_start..fdt->next_fd 73 * orig_start..fdt->next_fd
74 */ 74 */
75 start = orig_start; 75 start = orig_start;
76 if (start < fdt->next_fd) 76 if (start < files->next_fd)
77 start = fdt->next_fd; 77 start = files->next_fd;
78 78
79 newfd = start; 79 newfd = start;
80 if (start < fdt->max_fdset) { 80 if (start < fdt->max_fdset) {
@@ -102,9 +102,8 @@ repeat:
102 * we reacquire the fdtable pointer and use it while holding 102 * we reacquire the fdtable pointer and use it while holding
103 * the lock, no one can free it during that time. 103 * the lock, no one can free it during that time.
104 */ 104 */
105 fdt = files_fdtable(files); 105 if (start <= files->next_fd)
106 if (start <= fdt->next_fd) 106 files->next_fd = newfd + 1;
107 fdt->next_fd = newfd + 1;
108 107
109 error = newfd; 108 error = newfd;
110 109
diff --git a/fs/file.c b/fs/file.c
index cea7cbea11d0..bbc743314730 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
125 kmem_cache_free(files_cachep, fdt->free_files); 125 kmem_cache_free(files_cachep, fdt->free_files);
126 return; 126 return;
127 } 127 }
128 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { 128 if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
129 fdt->max_fds <= NR_OPEN_DEFAULT) {
129 /* 130 /*
130 * The fdtable was embedded 131 * The fdtable was embedded
131 */ 132 */
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
155 156
156void free_fdtable(struct fdtable *fdt) 157void free_fdtable(struct fdtable *fdt)
157{ 158{
158 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || 159 if (fdt->free_files ||
159 fdt->max_fds > NR_OPEN_DEFAULT) 160 fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
161 fdt->max_fds > NR_OPEN_DEFAULT)
160 call_rcu(&fdt->rcu, free_fdtable_rcu); 162 call_rcu(&fdt->rcu, free_fdtable_rcu);
161} 163}
162 164
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
199 (nfdt->max_fds - fdt->max_fds) * 201 (nfdt->max_fds - fdt->max_fds) *
200 sizeof(struct file *)); 202 sizeof(struct file *));
201 } 203 }
202 nfdt->next_fd = fdt->next_fd;
203} 204}
204 205
205/* 206/*
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num)
220 221
221void free_fdset(fd_set *array, int num) 222void free_fdset(fd_set *array, int num)
222{ 223{
223 int size = num / 8; 224 if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
224
225 if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
226 return; 225 return;
227 else if (size <= PAGE_SIZE) 226 else if (num <= 8 * PAGE_SIZE)
228 kfree(array); 227 kfree(array);
229 else 228 else
230 vfree(array); 229 vfree(array);
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr)
237 fd_set *new_openset = NULL, *new_execset = NULL; 236 fd_set *new_openset = NULL, *new_execset = NULL;
238 struct file **new_fds; 237 struct file **new_fds;
239 238
240 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); 239 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
241 if (!fdt) 240 if (!fdt)
242 goto out; 241 goto out;
243 memset(fdt, 0, sizeof(*fdt));
244 242
245 nfds = __FD_SETSIZE; 243 nfds = 8 * L1_CACHE_BYTES;
246 /* Expand to the max in easy steps */ 244 /* Expand to the max in easy steps */
247 do { 245 while (nfds <= nr) {
248 if (nfds < (PAGE_SIZE * 8)) 246 nfds = nfds * 2;
249 nfds = PAGE_SIZE * 8; 247 if (nfds > NR_OPEN)
250 else { 248 nfds = NR_OPEN;
251 nfds = nfds * 2; 249 }
252 if (nfds > NR_OPEN)
253 nfds = NR_OPEN;
254 }
255 } while (nfds <= nr);
256 250
257 new_openset = alloc_fdset(nfds); 251 new_openset = alloc_fdset(nfds);
258 new_execset = alloc_fdset(nfds); 252 new_execset = alloc_fdset(nfds);
diff --git a/fs/open.c b/fs/open.c
index 70e0230d8e77..1091dadd6c38 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -973,7 +973,7 @@ repeat:
973 fdt = files_fdtable(files); 973 fdt = files_fdtable(files);
974 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 974 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
975 fdt->max_fdset, 975 fdt->max_fdset,
976 fdt->next_fd); 976 files->next_fd);
977 977
978 /* 978 /*
979 * N.B. For clone tasks sharing a files structure, this test 979 * N.B. For clone tasks sharing a files structure, this test
@@ -998,7 +998,7 @@ repeat:
998 998
999 FD_SET(fd, fdt->open_fds); 999 FD_SET(fd, fdt->open_fds);
1000 FD_CLR(fd, fdt->close_on_exec); 1000 FD_CLR(fd, fdt->close_on_exec);
1001 fdt->next_fd = fd + 1; 1001 files->next_fd = fd + 1;
1002#if 1 1002#if 1
1003 /* Sanity check */ 1003 /* Sanity check */
1004 if (fdt->fd[fd] != NULL) { 1004 if (fdt->fd[fd] != NULL) {
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1019{ 1019{
1020 struct fdtable *fdt = files_fdtable(files); 1020 struct fdtable *fdt = files_fdtable(files);
1021 __FD_CLR(fd, fdt->open_fds); 1021 __FD_CLR(fd, fdt->open_fds);
1022 if (fd < fdt->next_fd) 1022 if (fd < files->next_fd)
1023 fdt->next_fd = fd; 1023 files->next_fd = fd;
1024} 1024}
1025 1025
1026void fastcall put_unused_fd(unsigned int fd) 1026void fastcall put_unused_fd(unsigned int fd)
diff --git a/include/linux/file.h b/include/linux/file.h
index 9901b850f2e4..9f7c2513866f 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -10,6 +10,7 @@
10#include <linux/compiler.h> 10#include <linux/compiler.h>
11#include <linux/spinlock.h> 11#include <linux/spinlock.h>
12#include <linux/rcupdate.h> 12#include <linux/rcupdate.h>
13#include <linux/types.h>
13 14
14/* 15/*
15 * The default fd array needs to be at least BITS_PER_LONG, 16 * The default fd array needs to be at least BITS_PER_LONG,
@@ -17,10 +18,22 @@
17 */ 18 */
18#define NR_OPEN_DEFAULT BITS_PER_LONG 19#define NR_OPEN_DEFAULT BITS_PER_LONG
19 20
21/*
22 * The embedded_fd_set is a small fd_set,
23 * suitable for most tasks (which open <= BITS_PER_LONG files)
24 */
25struct embedded_fd_set {
26 unsigned long fds_bits[1];
27};
28
29/*
30 * More than this number of fds: we use a separately allocated fd_set
31 */
32#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
33
20struct fdtable { 34struct fdtable {
21 unsigned int max_fds; 35 unsigned int max_fds;
22 int max_fdset; 36 int max_fdset;
23 int next_fd;
24 struct file ** fd; /* current fd array */ 37 struct file ** fd; /* current fd array */
25 fd_set *close_on_exec; 38 fd_set *close_on_exec;
26 fd_set *open_fds; 39 fd_set *open_fds;
@@ -33,13 +46,20 @@ struct fdtable {
33 * Open file table structure 46 * Open file table structure
34 */ 47 */
35struct files_struct { 48struct files_struct {
49 /*
50 * read mostly part
51 */
36 atomic_t count; 52 atomic_t count;
37 struct fdtable *fdt; 53 struct fdtable *fdt;
38 struct fdtable fdtab; 54 struct fdtable fdtab;
39 fd_set close_on_exec_init; 55 /*
40 fd_set open_fds_init; 56 * written part on a separate cache line in SMP
57 */
58 spinlock_t file_lock ____cacheline_aligned_in_smp;
59 int next_fd;
60 struct embedded_fd_set close_on_exec_init;
61 struct embedded_fd_set open_fds_init;
41 struct file * fd_array[NR_OPEN_DEFAULT]; 62 struct file * fd_array[NR_OPEN_DEFAULT];
42 spinlock_t file_lock; /* Protects concurrent writers. Nests inside tsk->alloc_lock */
43}; 63};
44 64
45#define files_fdtable(files) (rcu_dereference((files)->fdt)) 65#define files_fdtable(files) (rcu_dereference((files)->fdt))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfd2ecccb5d..92146f3b7423 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -7,11 +7,10 @@
7#define INIT_FDTABLE \ 7#define INIT_FDTABLE \
8{ \ 8{ \
9 .max_fds = NR_OPEN_DEFAULT, \ 9 .max_fds = NR_OPEN_DEFAULT, \
10 .max_fdset = __FD_SETSIZE, \ 10 .max_fdset = EMBEDDED_FD_SET_SIZE, \
11 .next_fd = 0, \
12 .fd = &init_files.fd_array[0], \ 11 .fd = &init_files.fd_array[0], \
13 .close_on_exec = &init_files.close_on_exec_init, \ 12 .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \
14 .open_fds = &init_files.open_fds_init, \ 13 .open_fds = (fd_set *)&init_files.open_fds_init, \
15 .rcu = RCU_HEAD_INIT, \ 14 .rcu = RCU_HEAD_INIT, \
16 .free_files = NULL, \ 15 .free_files = NULL, \
17 .next = NULL, \ 16 .next = NULL, \
@@ -20,9 +19,10 @@
20#define INIT_FILES \ 19#define INIT_FILES \
21{ \ 20{ \
22 .count = ATOMIC_INIT(1), \ 21 .count = ATOMIC_INIT(1), \
23 .file_lock = SPIN_LOCK_UNLOCKED, \
24 .fdt = &init_files.fdtab, \ 22 .fdt = &init_files.fdtab, \
25 .fdtab = INIT_FDTABLE, \ 23 .fdtab = INIT_FDTABLE, \
24 .file_lock = SPIN_LOCK_UNLOCKED, \
25 .next_fd = 0, \
26 .close_on_exec_init = { { 0, } }, \ 26 .close_on_exec_init = { { 0, } }, \
27 .open_fds_init = { { 0, } }, \ 27 .open_fds_init = { { 0, } }, \
28 .fd_array = { NULL, } \ 28 .fd_array = { NULL, } \
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd7b65ee418..c79ae0b19a49 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -607,12 +607,12 @@ static struct files_struct *alloc_files(void)
607 atomic_set(&newf->count, 1); 607 atomic_set(&newf->count, 1);
608 608
609 spin_lock_init(&newf->file_lock); 609 spin_lock_init(&newf->file_lock);
610 newf->next_fd = 0;
610 fdt = &newf->fdtab; 611 fdt = &newf->fdtab;
611 fdt->next_fd = 0;
612 fdt->max_fds = NR_OPEN_DEFAULT; 612 fdt->max_fds = NR_OPEN_DEFAULT;
613 fdt->max_fdset = __FD_SETSIZE; 613 fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
614 fdt->close_on_exec = &newf->close_on_exec_init; 614 fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
615 fdt->open_fds = &newf->open_fds_init; 615 fdt->open_fds = (fd_set *)&newf->open_fds_init;
616 fdt->fd = &newf->fd_array[0]; 616 fdt->fd = &newf->fd_array[0];
617 INIT_RCU_HEAD(&fdt->rcu); 617 INIT_RCU_HEAD(&fdt->rcu);
618 fdt->free_files = NULL; 618 fdt->free_files = NULL;