diff options
author | David Howells <dhowells@redhat.com> | 2012-02-16 12:49:54 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2012-02-19 13:30:57 -0500 |
commit | 1fd36adcd98c14d2fd97f545293c488775cb2823 (patch) | |
tree | c13ab1934a15aebe0d81601d910ce5a3c6fa2c6f | |
parent | 1dce27c5aa6770e9d195f2bb7db1db3d4dde5591 (diff) |
Replace the fd_sets in struct fdtable with an array of unsigned longs
Replace the fd_sets in struct fdtable with an array of unsigned longs and then
use the standard non-atomic bit operations rather than the FD_* macros.
This:
(1) Removes the abuses of struct fd_set:
(a) Since we don't want to allocate a full fd_set the vast majority of the
time, we actually, in effect, just allocate a just-big-enough array of
unsigned longs and cast it to an fd_set type - so why bother with the
fd_set at all?
(b) Some places outside of the core fdtable handling code (such as
SELinux) want to look inside the array of unsigned longs hidden inside
the fd_set struct for more efficient iteration over the entire set.
(2) Eliminates the use of FD_*() macros in the kernel completely.
(3) Permits the __FD_*() macros to be deleted entirely where not exposed to
userspace.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/exec.c | 4 | ||||
-rw-r--r-- | fs/file.c | 46 | ||||
-rw-r--r-- | fs/select.c | 2 | ||||
-rw-r--r-- | include/linux/fdtable.h | 28 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | security/selinux/hooks.c | 2 |
6 files changed, 37 insertions, 47 deletions
@@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files) | |||
1026 | fdt = files_fdtable(files); | 1026 | fdt = files_fdtable(files); |
1027 | if (i >= fdt->max_fds) | 1027 | if (i >= fdt->max_fds) |
1028 | break; | 1028 | break; |
1029 | set = fdt->close_on_exec->fds_bits[j]; | 1029 | set = fdt->close_on_exec[j]; |
1030 | if (!set) | 1030 | if (!set) |
1031 | continue; | 1031 | continue; |
1032 | fdt->close_on_exec->fds_bits[j] = 0; | 1032 | fdt->close_on_exec[j] = 0; |
1033 | spin_unlock(&files->file_lock); | 1033 | spin_unlock(&files->file_lock); |
1034 | for ( ; set ; i++,set >>= 1) { | 1034 | for ( ; set ; i++,set >>= 1) { |
1035 | if (set & 1) { | 1035 | if (set & 1) { |
@@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
40 | */ | 40 | */ |
41 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 41 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
42 | 42 | ||
43 | static void *alloc_fdmem(unsigned int size) | 43 | static void *alloc_fdmem(size_t size) |
44 | { | 44 | { |
45 | /* | 45 | /* |
46 | * Very large allocations can stress page reclaim, so fall back to | 46 | * Very large allocations can stress page reclaim, so fall back to |
@@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) | |||
142 | static struct fdtable * alloc_fdtable(unsigned int nr) | 142 | static struct fdtable * alloc_fdtable(unsigned int nr) |
143 | { | 143 | { |
144 | struct fdtable *fdt; | 144 | struct fdtable *fdt; |
145 | char *data; | 145 | void *data; |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Figure out how many fds we actually want to support in this fdtable. | 148 | * Figure out how many fds we actually want to support in this fdtable. |
@@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
172 | data = alloc_fdmem(nr * sizeof(struct file *)); | 172 | data = alloc_fdmem(nr * sizeof(struct file *)); |
173 | if (!data) | 173 | if (!data) |
174 | goto out_fdt; | 174 | goto out_fdt; |
175 | fdt->fd = (struct file **)data; | 175 | fdt->fd = data; |
176 | data = alloc_fdmem(max_t(unsigned int, | 176 | |
177 | data = alloc_fdmem(max_t(size_t, | ||
177 | 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); | 178 | 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); |
178 | if (!data) | 179 | if (!data) |
179 | goto out_arr; | 180 | goto out_arr; |
180 | fdt->open_fds = (fd_set *)data; | 181 | fdt->open_fds = data; |
181 | data += nr / BITS_PER_BYTE; | 182 | data += nr / BITS_PER_LONG; |
182 | fdt->close_on_exec = (fd_set *)data; | 183 | fdt->close_on_exec = data; |
183 | fdt->next = NULL; | 184 | fdt->next = NULL; |
184 | 185 | ||
185 | return fdt; | 186 | return fdt; |
@@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt) | |||
275 | int i; | 276 | int i; |
276 | 277 | ||
277 | /* Find the last open fd */ | 278 | /* Find the last open fd */ |
278 | for (i = size/(8*sizeof(long)); i > 0; ) { | 279 | for (i = size / BITS_PER_LONG; i > 0; ) { |
279 | if (fdt->open_fds->fds_bits[--i]) | 280 | if (fdt->open_fds[--i]) |
280 | break; | 281 | break; |
281 | } | 282 | } |
282 | i = (i+1) * 8 * sizeof(long); | 283 | i = (i + 1) * BITS_PER_LONG; |
283 | return i; | 284 | return i; |
284 | } | 285 | } |
285 | 286 | ||
@@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
306 | newf->next_fd = 0; | 307 | newf->next_fd = 0; |
307 | new_fdt = &newf->fdtab; | 308 | new_fdt = &newf->fdtab; |
308 | new_fdt->max_fds = NR_OPEN_DEFAULT; | 309 | new_fdt->max_fds = NR_OPEN_DEFAULT; |
309 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | 310 | new_fdt->close_on_exec = newf->close_on_exec_init; |
310 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | 311 | new_fdt->open_fds = newf->open_fds_init; |
311 | new_fdt->fd = &newf->fd_array[0]; | 312 | new_fdt->fd = &newf->fd_array[0]; |
312 | new_fdt->next = NULL; | 313 | new_fdt->next = NULL; |
313 | 314 | ||
@@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
350 | old_fds = old_fdt->fd; | 351 | old_fds = old_fdt->fd; |
351 | new_fds = new_fdt->fd; | 352 | new_fds = new_fdt->fd; |
352 | 353 | ||
353 | memcpy(new_fdt->open_fds->fds_bits, | 354 | memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); |
354 | old_fdt->open_fds->fds_bits, open_files/8); | 355 | memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); |
355 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
356 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
357 | 356 | ||
358 | for (i = open_files; i != 0; i--) { | 357 | for (i = open_files; i != 0; i--) { |
359 | struct file *f = *old_fds++; | 358 | struct file *f = *old_fds++; |
@@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
379 | memset(new_fds, 0, size); | 378 | memset(new_fds, 0, size); |
380 | 379 | ||
381 | if (new_fdt->max_fds > open_files) { | 380 | if (new_fdt->max_fds > open_files) { |
382 | int left = (new_fdt->max_fds-open_files)/8; | 381 | int left = (new_fdt->max_fds - open_files) / 8; |
383 | int start = open_files / (8 * sizeof(unsigned long)); | 382 | int start = open_files / BITS_PER_LONG; |
384 | 383 | ||
385 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | 384 | memset(&new_fdt->open_fds[start], 0, left); |
386 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 385 | memset(&new_fdt->close_on_exec[start], 0, left); |
387 | } | 386 | } |
388 | 387 | ||
389 | rcu_assign_pointer(newf->fdt, new_fdt); | 388 | rcu_assign_pointer(newf->fdt, new_fdt); |
@@ -419,8 +418,8 @@ struct files_struct init_files = { | |||
419 | .fdtab = { | 418 | .fdtab = { |
420 | .max_fds = NR_OPEN_DEFAULT, | 419 | .max_fds = NR_OPEN_DEFAULT, |
421 | .fd = &init_files.fd_array[0], | 420 | .fd = &init_files.fd_array[0], |
422 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, | 421 | .close_on_exec = init_files.close_on_exec_init, |
423 | .open_fds = (fd_set *)&init_files.open_fds_init, | 422 | .open_fds = init_files.open_fds_init, |
424 | }, | 423 | }, |
425 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
426 | }; | 425 | }; |
@@ -443,8 +442,7 @@ repeat: | |||
443 | fd = files->next_fd; | 442 | fd = files->next_fd; |
444 | 443 | ||
445 | if (fd < fdt->max_fds) | 444 | if (fd < fdt->max_fds) |
446 | fd = find_next_zero_bit(fdt->open_fds->fds_bits, | 445 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); |
447 | fdt->max_fds, fd); | ||
448 | 446 | ||
449 | error = expand_files(files, fd); | 447 | error = expand_files(files, fd); |
450 | if (error < 0) | 448 | if (error < 0) |
diff --git a/fs/select.c b/fs/select.c index d33418fdc858..2e7fbe8a092c 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) | |||
348 | set = ~(~0UL << (n & (__NFDBITS-1))); | 348 | set = ~(~0UL << (n & (__NFDBITS-1))); |
349 | n /= __NFDBITS; | 349 | n /= __NFDBITS; |
350 | fdt = files_fdtable(current->files); | 350 | fdt = files_fdtable(current->files); |
351 | open_fds = fdt->open_fds->fds_bits+n; | 351 | open_fds = fdt->open_fds + n; |
352 | max = 0; | 352 | max = 0; |
353 | if (set) { | 353 | if (set) { |
354 | set &= BITS(fds, n); | 354 | set &= BITS(fds, n); |
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 7675da2c18f7..158a41eed314 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h | |||
@@ -21,51 +21,43 @@ | |||
21 | */ | 21 | */ |
22 | #define NR_OPEN_DEFAULT BITS_PER_LONG | 22 | #define NR_OPEN_DEFAULT BITS_PER_LONG |
23 | 23 | ||
24 | /* | ||
25 | * The embedded_fd_set is a small fd_set, | ||
26 | * suitable for most tasks (which open <= BITS_PER_LONG files) | ||
27 | */ | ||
28 | struct embedded_fd_set { | ||
29 | unsigned long fds_bits[1]; | ||
30 | }; | ||
31 | |||
32 | struct fdtable { | 24 | struct fdtable { |
33 | unsigned int max_fds; | 25 | unsigned int max_fds; |
34 | struct file __rcu **fd; /* current fd array */ | 26 | struct file __rcu **fd; /* current fd array */ |
35 | fd_set *close_on_exec; | 27 | unsigned long *close_on_exec; |
36 | fd_set *open_fds; | 28 | unsigned long *open_fds; |
37 | struct rcu_head rcu; | 29 | struct rcu_head rcu; |
38 | struct fdtable *next; | 30 | struct fdtable *next; |
39 | }; | 31 | }; |
40 | 32 | ||
41 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | 33 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) |
42 | { | 34 | { |
43 | FD_SET(fd, fdt->close_on_exec); | 35 | __set_bit(fd, fdt->close_on_exec); |
44 | } | 36 | } |
45 | 37 | ||
46 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) | 38 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) |
47 | { | 39 | { |
48 | FD_CLR(fd, fdt->close_on_exec); | 40 | __clear_bit(fd, fdt->close_on_exec); |
49 | } | 41 | } |
50 | 42 | ||
51 | static inline bool close_on_exec(int fd, const struct fdtable *fdt) | 43 | static inline bool close_on_exec(int fd, const struct fdtable *fdt) |
52 | { | 44 | { |
53 | return FD_ISSET(fd, fdt->close_on_exec); | 45 | return test_bit(fd, fdt->close_on_exec); |
54 | } | 46 | } |
55 | 47 | ||
56 | static inline void __set_open_fd(int fd, struct fdtable *fdt) | 48 | static inline void __set_open_fd(int fd, struct fdtable *fdt) |
57 | { | 49 | { |
58 | FD_SET(fd, fdt->open_fds); | 50 | __set_bit(fd, fdt->open_fds); |
59 | } | 51 | } |
60 | 52 | ||
61 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) | 53 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) |
62 | { | 54 | { |
63 | FD_CLR(fd, fdt->open_fds); | 55 | __clear_bit(fd, fdt->open_fds); |
64 | } | 56 | } |
65 | 57 | ||
66 | static inline bool fd_is_open(int fd, const struct fdtable *fdt) | 58 | static inline bool fd_is_open(int fd, const struct fdtable *fdt) |
67 | { | 59 | { |
68 | return FD_ISSET(fd, fdt->open_fds); | 60 | return test_bit(fd, fdt->open_fds); |
69 | } | 61 | } |
70 | 62 | ||
71 | /* | 63 | /* |
@@ -83,8 +75,8 @@ struct files_struct { | |||
83 | */ | 75 | */ |
84 | spinlock_t file_lock ____cacheline_aligned_in_smp; | 76 | spinlock_t file_lock ____cacheline_aligned_in_smp; |
85 | int next_fd; | 77 | int next_fd; |
86 | struct embedded_fd_set close_on_exec_init; | 78 | unsigned long close_on_exec_init[1]; |
87 | struct embedded_fd_set open_fds_init; | 79 | unsigned long open_fds_init[1]; |
88 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; | 80 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; |
89 | }; | 81 | }; |
90 | 82 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 4b4042f9bc6a..4db020015f14 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -473,7 +473,7 @@ static void close_files(struct files_struct * files) | |||
473 | i = j * __NFDBITS; | 473 | i = j * __NFDBITS; |
474 | if (i >= fdt->max_fds) | 474 | if (i >= fdt->max_fds) |
475 | break; | 475 | break; |
476 | set = fdt->open_fds->fds_bits[j++]; | 476 | set = fdt->open_fds[j++]; |
477 | while (set) { | 477 | while (set) { |
478 | if (set & 1) { | 478 | if (set & 1) { |
479 | struct file * file = xchg(&fdt->fd[i], NULL); | 479 | struct file * file = xchg(&fdt->fd[i], NULL); |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6a3683e28426..421c990a20b2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, | |||
2145 | fdt = files_fdtable(files); | 2145 | fdt = files_fdtable(files); |
2146 | if (i >= fdt->max_fds) | 2146 | if (i >= fdt->max_fds) |
2147 | break; | 2147 | break; |
2148 | set = fdt->open_fds->fds_bits[j]; | 2148 | set = fdt->open_fds[j]; |
2149 | if (!set) | 2149 | if (!set) |
2150 | continue; | 2150 | continue; |
2151 | spin_unlock(&files->file_lock); | 2151 | spin_unlock(&files->file_lock); |