diff options
| author | David Howells <dhowells@redhat.com> | 2012-02-16 12:49:54 -0500 |
|---|---|---|
| committer | H. Peter Anvin <hpa@zytor.com> | 2012-02-19 13:30:57 -0500 |
| commit | 1fd36adcd98c14d2fd97f545293c488775cb2823 (patch) | |
| tree | c13ab1934a15aebe0d81601d910ce5a3c6fa2c6f | |
| parent | 1dce27c5aa6770e9d195f2bb7db1db3d4dde5591 (diff) | |
Replace the fd_sets in struct fdtable with an array of unsigned longs
Replace the fd_sets in struct fdtable with an array of unsigned longs and then
use the standard non-atomic bit operations rather than the FD_* macros.
This:
(1) Removes the abuses of struct fd_set:
(a) Since we don't want to allocate a full fd_set the vast majority of the
time, we actually, in effect, just allocate a just-big-enough array of
unsigned longs and cast it to an fd_set type - so why bother with the
fd_set at all?
(b) Some places outside of the core fdtable handling code (such as
SELinux) want to look inside the array of unsigned longs hidden inside
the fd_set struct for more efficient iteration over the entire set.
(2) Eliminates the use of FD_*() macros in the kernel completely.
(3) Permits the __FD_*() macros to be deleted entirely where not exposed to
userspace.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
| -rw-r--r-- | fs/exec.c | 4 | ||||
| -rw-r--r-- | fs/file.c | 46 | ||||
| -rw-r--r-- | fs/select.c | 2 | ||||
| -rw-r--r-- | include/linux/fdtable.h | 28 | ||||
| -rw-r--r-- | kernel/exit.c | 2 | ||||
| -rw-r--r-- | security/selinux/hooks.c | 2 |
6 files changed, 37 insertions, 47 deletions
| @@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files) | |||
| 1026 | fdt = files_fdtable(files); | 1026 | fdt = files_fdtable(files); |
| 1027 | if (i >= fdt->max_fds) | 1027 | if (i >= fdt->max_fds) |
| 1028 | break; | 1028 | break; |
| 1029 | set = fdt->close_on_exec->fds_bits[j]; | 1029 | set = fdt->close_on_exec[j]; |
| 1030 | if (!set) | 1030 | if (!set) |
| 1031 | continue; | 1031 | continue; |
| 1032 | fdt->close_on_exec->fds_bits[j] = 0; | 1032 | fdt->close_on_exec[j] = 0; |
| 1033 | spin_unlock(&files->file_lock); | 1033 | spin_unlock(&files->file_lock); |
| 1034 | for ( ; set ; i++,set >>= 1) { | 1034 | for ( ; set ; i++,set >>= 1) { |
| 1035 | if (set & 1) { | 1035 | if (set & 1) { |
| @@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
| 40 | */ | 40 | */ |
| 41 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 41 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
| 42 | 42 | ||
| 43 | static void *alloc_fdmem(unsigned int size) | 43 | static void *alloc_fdmem(size_t size) |
| 44 | { | 44 | { |
| 45 | /* | 45 | /* |
| 46 | * Very large allocations can stress page reclaim, so fall back to | 46 | * Very large allocations can stress page reclaim, so fall back to |
| @@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) | |||
| 142 | static struct fdtable * alloc_fdtable(unsigned int nr) | 142 | static struct fdtable * alloc_fdtable(unsigned int nr) |
| 143 | { | 143 | { |
| 144 | struct fdtable *fdt; | 144 | struct fdtable *fdt; |
| 145 | char *data; | 145 | void *data; |
| 146 | 146 | ||
| 147 | /* | 147 | /* |
| 148 | * Figure out how many fds we actually want to support in this fdtable. | 148 | * Figure out how many fds we actually want to support in this fdtable. |
| @@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
| 172 | data = alloc_fdmem(nr * sizeof(struct file *)); | 172 | data = alloc_fdmem(nr * sizeof(struct file *)); |
| 173 | if (!data) | 173 | if (!data) |
| 174 | goto out_fdt; | 174 | goto out_fdt; |
| 175 | fdt->fd = (struct file **)data; | 175 | fdt->fd = data; |
| 176 | data = alloc_fdmem(max_t(unsigned int, | 176 | |
| 177 | data = alloc_fdmem(max_t(size_t, | ||
| 177 | 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); | 178 | 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); |
| 178 | if (!data) | 179 | if (!data) |
| 179 | goto out_arr; | 180 | goto out_arr; |
| 180 | fdt->open_fds = (fd_set *)data; | 181 | fdt->open_fds = data; |
| 181 | data += nr / BITS_PER_BYTE; | 182 | data += nr / BITS_PER_LONG; |
| 182 | fdt->close_on_exec = (fd_set *)data; | 183 | fdt->close_on_exec = data; |
| 183 | fdt->next = NULL; | 184 | fdt->next = NULL; |
| 184 | 185 | ||
| 185 | return fdt; | 186 | return fdt; |
| @@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt) | |||
| 275 | int i; | 276 | int i; |
| 276 | 277 | ||
| 277 | /* Find the last open fd */ | 278 | /* Find the last open fd */ |
| 278 | for (i = size/(8*sizeof(long)); i > 0; ) { | 279 | for (i = size / BITS_PER_LONG; i > 0; ) { |
| 279 | if (fdt->open_fds->fds_bits[--i]) | 280 | if (fdt->open_fds[--i]) |
| 280 | break; | 281 | break; |
| 281 | } | 282 | } |
| 282 | i = (i+1) * 8 * sizeof(long); | 283 | i = (i + 1) * BITS_PER_LONG; |
| 283 | return i; | 284 | return i; |
| 284 | } | 285 | } |
| 285 | 286 | ||
| @@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 306 | newf->next_fd = 0; | 307 | newf->next_fd = 0; |
| 307 | new_fdt = &newf->fdtab; | 308 | new_fdt = &newf->fdtab; |
| 308 | new_fdt->max_fds = NR_OPEN_DEFAULT; | 309 | new_fdt->max_fds = NR_OPEN_DEFAULT; |
| 309 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | 310 | new_fdt->close_on_exec = newf->close_on_exec_init; |
| 310 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | 311 | new_fdt->open_fds = newf->open_fds_init; |
| 311 | new_fdt->fd = &newf->fd_array[0]; | 312 | new_fdt->fd = &newf->fd_array[0]; |
| 312 | new_fdt->next = NULL; | 313 | new_fdt->next = NULL; |
| 313 | 314 | ||
| @@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 350 | old_fds = old_fdt->fd; | 351 | old_fds = old_fdt->fd; |
| 351 | new_fds = new_fdt->fd; | 352 | new_fds = new_fdt->fd; |
| 352 | 353 | ||
| 353 | memcpy(new_fdt->open_fds->fds_bits, | 354 | memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); |
| 354 | old_fdt->open_fds->fds_bits, open_files/8); | 355 | memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); |
| 355 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
| 356 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
| 357 | 356 | ||
| 358 | for (i = open_files; i != 0; i--) { | 357 | for (i = open_files; i != 0; i--) { |
| 359 | struct file *f = *old_fds++; | 358 | struct file *f = *old_fds++; |
| @@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 379 | memset(new_fds, 0, size); | 378 | memset(new_fds, 0, size); |
| 380 | 379 | ||
| 381 | if (new_fdt->max_fds > open_files) { | 380 | if (new_fdt->max_fds > open_files) { |
| 382 | int left = (new_fdt->max_fds-open_files)/8; | 381 | int left = (new_fdt->max_fds - open_files) / 8; |
| 383 | int start = open_files / (8 * sizeof(unsigned long)); | 382 | int start = open_files / BITS_PER_LONG; |
| 384 | 383 | ||
| 385 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | 384 | memset(&new_fdt->open_fds[start], 0, left); |
| 386 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 385 | memset(&new_fdt->close_on_exec[start], 0, left); |
| 387 | } | 386 | } |
| 388 | 387 | ||
| 389 | rcu_assign_pointer(newf->fdt, new_fdt); | 388 | rcu_assign_pointer(newf->fdt, new_fdt); |
| @@ -419,8 +418,8 @@ struct files_struct init_files = { | |||
| 419 | .fdtab = { | 418 | .fdtab = { |
| 420 | .max_fds = NR_OPEN_DEFAULT, | 419 | .max_fds = NR_OPEN_DEFAULT, |
| 421 | .fd = &init_files.fd_array[0], | 420 | .fd = &init_files.fd_array[0], |
| 422 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, | 421 | .close_on_exec = init_files.close_on_exec_init, |
| 423 | .open_fds = (fd_set *)&init_files.open_fds_init, | 422 | .open_fds = init_files.open_fds_init, |
| 424 | }, | 423 | }, |
| 425 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
| 426 | }; | 425 | }; |
| @@ -443,8 +442,7 @@ repeat: | |||
| 443 | fd = files->next_fd; | 442 | fd = files->next_fd; |
| 444 | 443 | ||
| 445 | if (fd < fdt->max_fds) | 444 | if (fd < fdt->max_fds) |
| 446 | fd = find_next_zero_bit(fdt->open_fds->fds_bits, | 445 | fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); |
| 447 | fdt->max_fds, fd); | ||
| 448 | 446 | ||
| 449 | error = expand_files(files, fd); | 447 | error = expand_files(files, fd); |
| 450 | if (error < 0) | 448 | if (error < 0) |
diff --git a/fs/select.c b/fs/select.c index d33418fdc858..2e7fbe8a092c 100644 --- a/fs/select.c +++ b/fs/select.c | |||
| @@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) | |||
| 348 | set = ~(~0UL << (n & (__NFDBITS-1))); | 348 | set = ~(~0UL << (n & (__NFDBITS-1))); |
| 349 | n /= __NFDBITS; | 349 | n /= __NFDBITS; |
| 350 | fdt = files_fdtable(current->files); | 350 | fdt = files_fdtable(current->files); |
| 351 | open_fds = fdt->open_fds->fds_bits+n; | 351 | open_fds = fdt->open_fds + n; |
| 352 | max = 0; | 352 | max = 0; |
| 353 | if (set) { | 353 | if (set) { |
| 354 | set &= BITS(fds, n); | 354 | set &= BITS(fds, n); |
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 7675da2c18f7..158a41eed314 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h | |||
| @@ -21,51 +21,43 @@ | |||
| 21 | */ | 21 | */ |
| 22 | #define NR_OPEN_DEFAULT BITS_PER_LONG | 22 | #define NR_OPEN_DEFAULT BITS_PER_LONG |
| 23 | 23 | ||
| 24 | /* | ||
| 25 | * The embedded_fd_set is a small fd_set, | ||
| 26 | * suitable for most tasks (which open <= BITS_PER_LONG files) | ||
| 27 | */ | ||
| 28 | struct embedded_fd_set { | ||
| 29 | unsigned long fds_bits[1]; | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct fdtable { | 24 | struct fdtable { |
| 33 | unsigned int max_fds; | 25 | unsigned int max_fds; |
| 34 | struct file __rcu **fd; /* current fd array */ | 26 | struct file __rcu **fd; /* current fd array */ |
| 35 | fd_set *close_on_exec; | 27 | unsigned long *close_on_exec; |
| 36 | fd_set *open_fds; | 28 | unsigned long *open_fds; |
| 37 | struct rcu_head rcu; | 29 | struct rcu_head rcu; |
| 38 | struct fdtable *next; | 30 | struct fdtable *next; |
| 39 | }; | 31 | }; |
| 40 | 32 | ||
| 41 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | 33 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) |
| 42 | { | 34 | { |
| 43 | FD_SET(fd, fdt->close_on_exec); | 35 | __set_bit(fd, fdt->close_on_exec); |
| 44 | } | 36 | } |
| 45 | 37 | ||
| 46 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) | 38 | static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) |
| 47 | { | 39 | { |
| 48 | FD_CLR(fd, fdt->close_on_exec); | 40 | __clear_bit(fd, fdt->close_on_exec); |
| 49 | } | 41 | } |
| 50 | 42 | ||
| 51 | static inline bool close_on_exec(int fd, const struct fdtable *fdt) | 43 | static inline bool close_on_exec(int fd, const struct fdtable *fdt) |
| 52 | { | 44 | { |
| 53 | return FD_ISSET(fd, fdt->close_on_exec); | 45 | return test_bit(fd, fdt->close_on_exec); |
| 54 | } | 46 | } |
| 55 | 47 | ||
| 56 | static inline void __set_open_fd(int fd, struct fdtable *fdt) | 48 | static inline void __set_open_fd(int fd, struct fdtable *fdt) |
| 57 | { | 49 | { |
| 58 | FD_SET(fd, fdt->open_fds); | 50 | __set_bit(fd, fdt->open_fds); |
| 59 | } | 51 | } |
| 60 | 52 | ||
| 61 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) | 53 | static inline void __clear_open_fd(int fd, struct fdtable *fdt) |
| 62 | { | 54 | { |
| 63 | FD_CLR(fd, fdt->open_fds); | 55 | __clear_bit(fd, fdt->open_fds); |
| 64 | } | 56 | } |
| 65 | 57 | ||
| 66 | static inline bool fd_is_open(int fd, const struct fdtable *fdt) | 58 | static inline bool fd_is_open(int fd, const struct fdtable *fdt) |
| 67 | { | 59 | { |
| 68 | return FD_ISSET(fd, fdt->open_fds); | 60 | return test_bit(fd, fdt->open_fds); |
| 69 | } | 61 | } |
| 70 | 62 | ||
| 71 | /* | 63 | /* |
| @@ -83,8 +75,8 @@ struct files_struct { | |||
| 83 | */ | 75 | */ |
| 84 | spinlock_t file_lock ____cacheline_aligned_in_smp; | 76 | spinlock_t file_lock ____cacheline_aligned_in_smp; |
| 85 | int next_fd; | 77 | int next_fd; |
| 86 | struct embedded_fd_set close_on_exec_init; | 78 | unsigned long close_on_exec_init[1]; |
| 87 | struct embedded_fd_set open_fds_init; | 79 | unsigned long open_fds_init[1]; |
| 88 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; | 80 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; |
| 89 | }; | 81 | }; |
| 90 | 82 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 4b4042f9bc6a..4db020015f14 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -473,7 +473,7 @@ static void close_files(struct files_struct * files) | |||
| 473 | i = j * __NFDBITS; | 473 | i = j * __NFDBITS; |
| 474 | if (i >= fdt->max_fds) | 474 | if (i >= fdt->max_fds) |
| 475 | break; | 475 | break; |
| 476 | set = fdt->open_fds->fds_bits[j++]; | 476 | set = fdt->open_fds[j++]; |
| 477 | while (set) { | 477 | while (set) { |
| 478 | if (set & 1) { | 478 | if (set & 1) { |
| 479 | struct file * file = xchg(&fdt->fd[i], NULL); | 479 | struct file * file = xchg(&fdt->fd[i], NULL); |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6a3683e28426..421c990a20b2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, | |||
| 2145 | fdt = files_fdtable(files); | 2145 | fdt = files_fdtable(files); |
| 2146 | if (i >= fdt->max_fds) | 2146 | if (i >= fdt->max_fds) |
| 2147 | break; | 2147 | break; |
| 2148 | set = fdt->open_fds->fds_bits[j]; | 2148 | set = fdt->open_fds[j]; |
| 2149 | if (!set) | 2149 | if (!set) |
| 2150 | continue; | 2150 | continue; |
| 2151 | spin_unlock(&files->file_lock); | 2151 | spin_unlock(&files->file_lock); |
