aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2012-02-16 12:49:54 -0500
committerH. Peter Anvin <hpa@zytor.com>2012-02-19 13:30:57 -0500
commit1fd36adcd98c14d2fd97f545293c488775cb2823 (patch)
treec13ab1934a15aebe0d81601d910ce5a3c6fa2c6f
parent1dce27c5aa6770e9d195f2bb7db1db3d4dde5591 (diff)
Replace the fd_sets in struct fdtable with an array of unsigned longs
Replace the fd_sets in struct fdtable with an array of unsigned longs and then use the standard non-atomic bit operations rather than the FD_* macros. This: (1) Removes the abuses of struct fd_set: (a) Since we don't want to allocate a full fd_set the vast majority of the time, we actually, in effect, just allocate a just-big-enough array of unsigned longs and cast it to an fd_set type - so why bother with the fd_set at all? (b) Some places outside of the core fdtable handling code (such as SELinux) want to look inside the array of unsigned longs hidden inside the fd_set struct for more efficient iteration over the entire set. (2) Eliminates the use of FD_*() macros in the kernel completely. (3) Permits the __FD_*() macros to be deleted entirely where not exposed to userspace. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk Signed-off-by: H. Peter Anvin <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/exec.c4
-rw-r--r--fs/file.c46
-rw-r--r--fs/select.c2
-rw-r--r--include/linux/fdtable.h28
-rw-r--r--kernel/exit.c2
-rw-r--r--security/selinux/hooks.c2
6 files changed, 37 insertions, 47 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 22cc38d9e79f..cfd5e3047bd8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files)
1026 fdt = files_fdtable(files); 1026 fdt = files_fdtable(files);
1027 if (i >= fdt->max_fds) 1027 if (i >= fdt->max_fds)
1028 break; 1028 break;
1029 set = fdt->close_on_exec->fds_bits[j]; 1029 set = fdt->close_on_exec[j];
1030 if (!set) 1030 if (!set)
1031 continue; 1031 continue;
1032 fdt->close_on_exec->fds_bits[j] = 0; 1032 fdt->close_on_exec[j] = 0;
1033 spin_unlock(&files->file_lock); 1033 spin_unlock(&files->file_lock);
1034 for ( ; set ; i++,set >>= 1) { 1034 for ( ; set ; i++,set >>= 1) {
1035 if (set & 1) { 1035 if (set & 1) {
diff --git a/fs/file.c b/fs/file.c
index 114fea0a2cec..2d479dd8484e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
40 */ 40 */
41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
42 42
43static void *alloc_fdmem(unsigned int size) 43static void *alloc_fdmem(size_t size)
44{ 44{
45 /* 45 /*
46 * Very large allocations can stress page reclaim, so fall back to 46 * Very large allocations can stress page reclaim, so fall back to
@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
142static struct fdtable * alloc_fdtable(unsigned int nr) 142static struct fdtable * alloc_fdtable(unsigned int nr)
143{ 143{
144 struct fdtable *fdt; 144 struct fdtable *fdt;
145 char *data; 145 void *data;
146 146
147 /* 147 /*
148 * Figure out how many fds we actually want to support in this fdtable. 148 * Figure out how many fds we actually want to support in this fdtable.
@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
172 data = alloc_fdmem(nr * sizeof(struct file *)); 172 data = alloc_fdmem(nr * sizeof(struct file *));
173 if (!data) 173 if (!data)
174 goto out_fdt; 174 goto out_fdt;
175 fdt->fd = (struct file **)data; 175 fdt->fd = data;
176 data = alloc_fdmem(max_t(unsigned int, 176
177 data = alloc_fdmem(max_t(size_t,
177 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 178 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
178 if (!data) 179 if (!data)
179 goto out_arr; 180 goto out_arr;
180 fdt->open_fds = (fd_set *)data; 181 fdt->open_fds = data;
181 data += nr / BITS_PER_BYTE; 182 data += nr / BITS_PER_LONG;
182 fdt->close_on_exec = (fd_set *)data; 183 fdt->close_on_exec = data;
183 fdt->next = NULL; 184 fdt->next = NULL;
184 185
185 return fdt; 186 return fdt;
@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt)
275 int i; 276 int i;
276 277
277 /* Find the last open fd */ 278 /* Find the last open fd */
278 for (i = size/(8*sizeof(long)); i > 0; ) { 279 for (i = size / BITS_PER_LONG; i > 0; ) {
279 if (fdt->open_fds->fds_bits[--i]) 280 if (fdt->open_fds[--i])
280 break; 281 break;
281 } 282 }
282 i = (i+1) * 8 * sizeof(long); 283 i = (i + 1) * BITS_PER_LONG;
283 return i; 284 return i;
284} 285}
285 286
@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
306 newf->next_fd = 0; 307 newf->next_fd = 0;
307 new_fdt = &newf->fdtab; 308 new_fdt = &newf->fdtab;
308 new_fdt->max_fds = NR_OPEN_DEFAULT; 309 new_fdt->max_fds = NR_OPEN_DEFAULT;
309 new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; 310 new_fdt->close_on_exec = newf->close_on_exec_init;
310 new_fdt->open_fds = (fd_set *)&newf->open_fds_init; 311 new_fdt->open_fds = newf->open_fds_init;
311 new_fdt->fd = &newf->fd_array[0]; 312 new_fdt->fd = &newf->fd_array[0];
312 new_fdt->next = NULL; 313 new_fdt->next = NULL;
313 314
@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
350 old_fds = old_fdt->fd; 351 old_fds = old_fdt->fd;
351 new_fds = new_fdt->fd; 352 new_fds = new_fdt->fd;
352 353
353 memcpy(new_fdt->open_fds->fds_bits, 354 memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
354 old_fdt->open_fds->fds_bits, open_files/8); 355 memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
355 memcpy(new_fdt->close_on_exec->fds_bits,
356 old_fdt->close_on_exec->fds_bits, open_files/8);
357 356
358 for (i = open_files; i != 0; i--) { 357 for (i = open_files; i != 0; i--) {
359 struct file *f = *old_fds++; 358 struct file *f = *old_fds++;
@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
379 memset(new_fds, 0, size); 378 memset(new_fds, 0, size);
380 379
381 if (new_fdt->max_fds > open_files) { 380 if (new_fdt->max_fds > open_files) {
382 int left = (new_fdt->max_fds-open_files)/8; 381 int left = (new_fdt->max_fds - open_files) / 8;
383 int start = open_files / (8 * sizeof(unsigned long)); 382 int start = open_files / BITS_PER_LONG;
384 383
385 memset(&new_fdt->open_fds->fds_bits[start], 0, left); 384 memset(&new_fdt->open_fds[start], 0, left);
386 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 385 memset(&new_fdt->close_on_exec[start], 0, left);
387 } 386 }
388 387
389 rcu_assign_pointer(newf->fdt, new_fdt); 388 rcu_assign_pointer(newf->fdt, new_fdt);
@@ -419,8 +418,8 @@ struct files_struct init_files = {
419 .fdtab = { 418 .fdtab = {
420 .max_fds = NR_OPEN_DEFAULT, 419 .max_fds = NR_OPEN_DEFAULT,
421 .fd = &init_files.fd_array[0], 420 .fd = &init_files.fd_array[0],
422 .close_on_exec = (fd_set *)&init_files.close_on_exec_init, 421 .close_on_exec = init_files.close_on_exec_init,
423 .open_fds = (fd_set *)&init_files.open_fds_init, 422 .open_fds = init_files.open_fds_init,
424 }, 423 },
425 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
426}; 425};
@@ -443,8 +442,7 @@ repeat:
443 fd = files->next_fd; 442 fd = files->next_fd;
444 443
445 if (fd < fdt->max_fds) 444 if (fd < fdt->max_fds)
446 fd = find_next_zero_bit(fdt->open_fds->fds_bits, 445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
447 fdt->max_fds, fd);
448 446
449 error = expand_files(files, fd); 447 error = expand_files(files, fd);
450 if (error < 0) 448 if (error < 0)
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..2e7fbe8a092c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
348 set = ~(~0UL << (n & (__NFDBITS-1))); 348 set = ~(~0UL << (n & (__NFDBITS-1)));
349 n /= __NFDBITS; 349 n /= __NFDBITS;
350 fdt = files_fdtable(current->files); 350 fdt = files_fdtable(current->files);
351 open_fds = fdt->open_fds->fds_bits+n; 351 open_fds = fdt->open_fds + n;
352 max = 0; 352 max = 0;
353 if (set) { 353 if (set) {
354 set &= BITS(fds, n); 354 set &= BITS(fds, n);
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 7675da2c18f7..158a41eed314 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -21,51 +21,43 @@
21 */ 21 */
22#define NR_OPEN_DEFAULT BITS_PER_LONG 22#define NR_OPEN_DEFAULT BITS_PER_LONG
23 23
24/*
25 * The embedded_fd_set is a small fd_set,
26 * suitable for most tasks (which open <= BITS_PER_LONG files)
27 */
28struct embedded_fd_set {
29 unsigned long fds_bits[1];
30};
31
32struct fdtable { 24struct fdtable {
33 unsigned int max_fds; 25 unsigned int max_fds;
34 struct file __rcu **fd; /* current fd array */ 26 struct file __rcu **fd; /* current fd array */
35 fd_set *close_on_exec; 27 unsigned long *close_on_exec;
36 fd_set *open_fds; 28 unsigned long *open_fds;
37 struct rcu_head rcu; 29 struct rcu_head rcu;
38 struct fdtable *next; 30 struct fdtable *next;
39}; 31};
40 32
41static inline void __set_close_on_exec(int fd, struct fdtable *fdt) 33static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
42{ 34{
43 FD_SET(fd, fdt->close_on_exec); 35 __set_bit(fd, fdt->close_on_exec);
44} 36}
45 37
46static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) 38static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
47{ 39{
48 FD_CLR(fd, fdt->close_on_exec); 40 __clear_bit(fd, fdt->close_on_exec);
49} 41}
50 42
51static inline bool close_on_exec(int fd, const struct fdtable *fdt) 43static inline bool close_on_exec(int fd, const struct fdtable *fdt)
52{ 44{
53 return FD_ISSET(fd, fdt->close_on_exec); 45 return test_bit(fd, fdt->close_on_exec);
54} 46}
55 47
56static inline void __set_open_fd(int fd, struct fdtable *fdt) 48static inline void __set_open_fd(int fd, struct fdtable *fdt)
57{ 49{
58 FD_SET(fd, fdt->open_fds); 50 __set_bit(fd, fdt->open_fds);
59} 51}
60 52
61static inline void __clear_open_fd(int fd, struct fdtable *fdt) 53static inline void __clear_open_fd(int fd, struct fdtable *fdt)
62{ 54{
63 FD_CLR(fd, fdt->open_fds); 55 __clear_bit(fd, fdt->open_fds);
64} 56}
65 57
66static inline bool fd_is_open(int fd, const struct fdtable *fdt) 58static inline bool fd_is_open(int fd, const struct fdtable *fdt)
67{ 59{
68 return FD_ISSET(fd, fdt->open_fds); 60 return test_bit(fd, fdt->open_fds);
69} 61}
70 62
71/* 63/*
@@ -83,8 +75,8 @@ struct files_struct {
83 */ 75 */
84 spinlock_t file_lock ____cacheline_aligned_in_smp; 76 spinlock_t file_lock ____cacheline_aligned_in_smp;
85 int next_fd; 77 int next_fd;
86 struct embedded_fd_set close_on_exec_init; 78 unsigned long close_on_exec_init[1];
87 struct embedded_fd_set open_fds_init; 79 unsigned long open_fds_init[1];
88 struct file __rcu * fd_array[NR_OPEN_DEFAULT]; 80 struct file __rcu * fd_array[NR_OPEN_DEFAULT];
89}; 81};
90 82
diff --git a/kernel/exit.c b/kernel/exit.c
index 4b4042f9bc6a..4db020015f14 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -473,7 +473,7 @@ static void close_files(struct files_struct * files)
473 i = j * __NFDBITS; 473 i = j * __NFDBITS;
474 if (i >= fdt->max_fds) 474 if (i >= fdt->max_fds)
475 break; 475 break;
476 set = fdt->open_fds->fds_bits[j++]; 476 set = fdt->open_fds[j++];
477 while (set) { 477 while (set) {
478 if (set & 1) { 478 if (set & 1) {
479 struct file * file = xchg(&fdt->fd[i], NULL); 479 struct file * file = xchg(&fdt->fd[i], NULL);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6a3683e28426..421c990a20b2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
2145 fdt = files_fdtable(files); 2145 fdt = files_fdtable(files);
2146 if (i >= fdt->max_fds) 2146 if (i >= fdt->max_fds)
2147 break; 2147 break;
2148 set = fdt->open_fds->fds_bits[j]; 2148 set = fdt->open_fds[j];
2149 if (!set) 2149 if (!set)
2150 continue; 2150 continue;
2151 spin_unlock(&files->file_lock); 2151 spin_unlock(&files->file_lock);