aboutsummaryrefslogtreecommitdiffstats
path: root/fs/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-10-30 19:53:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-10-31 19:12:10 -0400
commitf3f86e33dc3da437fa4f204588ce7c78ea756982 (patch)
treeb74105990139f8102016b786bff0e29768ef7828 /fs/file.c
parent8a28d67457b613258aa0578ccece206d166f2b9f (diff)
vfs: Fix pathological performance case for __alloc_fd()
Al Viro points out that: > > * [Linux-specific aside] our __alloc_fd() can degrade quite badly > > with some use patterns. The cacheline pingpong in the bitmap is probably > > inevitable, unless we accept considerably heavier memory footprint, > > but we also have a case when alloc_fd() takes O(n) and it's _not_ hard > > to trigger - close(3);open(...); will have the next open() after that > > scanning the entire in-use bitmap. And Eric Dumazet has a somewhat realistic multithreaded microbenchmark that opens and closes a lot of sockets with minimal work per socket. This patch largely fixes it. We keep a 2nd-level bitmap of the open file bitmaps, showing which words are already full. So then we can traverse that second-level bitmap to efficiently skip already allocated file descriptors. On his benchmark, this improves performance by up to an order of magnitude, by avoiding the excessive open file bitmap scanning. Tested-and-acked-by: Eric Dumazet <edumazet@google.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/file.c')
-rw-r--r--fs/file.c39
1 files changed, 35 insertions, 4 deletions
diff --git a/fs/file.c b/fs/file.c
index 6c672ad329e9..6f6eb2b03af5 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -56,6 +56,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
56 __free_fdtable(container_of(rcu, struct fdtable, rcu)); 56 __free_fdtable(container_of(rcu, struct fdtable, rcu));
57} 57}
58 58
59#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
60#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
61
59/* 62/*
60 * Expand the fdset in the files_struct. Called with the files spinlock 63 * Expand the fdset in the files_struct. Called with the files spinlock
61 * held for write. 64 * held for write.
@@ -77,6 +80,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
77 memset((char *)(nfdt->open_fds) + cpy, 0, set); 80 memset((char *)(nfdt->open_fds) + cpy, 0, set);
78 memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); 81 memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
79 memset((char *)(nfdt->close_on_exec) + cpy, 0, set); 82 memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
83
84 cpy = BITBIT_SIZE(ofdt->max_fds);
85 set = BITBIT_SIZE(nfdt->max_fds) - cpy;
86 memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
87 memset(cpy+(char *)nfdt->full_fds_bits, 0, set);
80} 88}
81 89
82static struct fdtable * alloc_fdtable(unsigned int nr) 90static struct fdtable * alloc_fdtable(unsigned int nr)
@@ -115,12 +123,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
115 fdt->fd = data; 123 fdt->fd = data;
116 124
117 data = alloc_fdmem(max_t(size_t, 125 data = alloc_fdmem(max_t(size_t,
118 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 126 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES));
119 if (!data) 127 if (!data)
120 goto out_arr; 128 goto out_arr;
121 fdt->open_fds = data; 129 fdt->open_fds = data;
122 data += nr / BITS_PER_BYTE; 130 data += nr / BITS_PER_BYTE;
123 fdt->close_on_exec = data; 131 fdt->close_on_exec = data;
132 data += nr / BITS_PER_BYTE;
133 fdt->full_fds_bits = data;
124 134
125 return fdt; 135 return fdt;
126 136
@@ -229,14 +239,18 @@ static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
229 __clear_bit(fd, fdt->close_on_exec); 239 __clear_bit(fd, fdt->close_on_exec);
230} 240}
231 241
232static inline void __set_open_fd(int fd, struct fdtable *fdt) 242static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
233{ 243{
234 __set_bit(fd, fdt->open_fds); 244 __set_bit(fd, fdt->open_fds);
245 fd /= BITS_PER_LONG;
246 if (!~fdt->open_fds[fd])
247 __set_bit(fd, fdt->full_fds_bits);
235} 248}
236 249
237static inline void __clear_open_fd(int fd, struct fdtable *fdt) 250static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
238{ 251{
239 __clear_bit(fd, fdt->open_fds); 252 __clear_bit(fd, fdt->open_fds);
253 __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
240} 254}
241 255
242static int count_open_files(struct fdtable *fdt) 256static int count_open_files(struct fdtable *fdt)
@@ -280,6 +294,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
280 new_fdt->max_fds = NR_OPEN_DEFAULT; 294 new_fdt->max_fds = NR_OPEN_DEFAULT;
281 new_fdt->close_on_exec = newf->close_on_exec_init; 295 new_fdt->close_on_exec = newf->close_on_exec_init;
282 new_fdt->open_fds = newf->open_fds_init; 296 new_fdt->open_fds = newf->open_fds_init;
297 new_fdt->full_fds_bits = newf->full_fds_bits_init;
283 new_fdt->fd = &newf->fd_array[0]; 298 new_fdt->fd = &newf->fd_array[0];
284 299
285 spin_lock(&oldf->file_lock); 300 spin_lock(&oldf->file_lock);
@@ -323,6 +338,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
323 338
324 memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); 339 memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
325 memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); 340 memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
341 memcpy(new_fdt->full_fds_bits, old_fdt->full_fds_bits, BITBIT_SIZE(open_files));
326 342
327 for (i = open_files; i != 0; i--) { 343 for (i = open_files; i != 0; i--) {
328 struct file *f = *old_fds++; 344 struct file *f = *old_fds++;
@@ -454,10 +470,25 @@ struct files_struct init_files = {
454 .fd = &init_files.fd_array[0], 470 .fd = &init_files.fd_array[0],
455 .close_on_exec = init_files.close_on_exec_init, 471 .close_on_exec = init_files.close_on_exec_init,
456 .open_fds = init_files.open_fds_init, 472 .open_fds = init_files.open_fds_init,
473 .full_fds_bits = init_files.full_fds_bits_init,
457 }, 474 },
458 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 475 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
459}; 476};
460 477
478static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start)
479{
480 unsigned long maxfd = fdt->max_fds;
481 unsigned long maxbit = maxfd / BITS_PER_LONG;
482 unsigned long bitbit = start / BITS_PER_LONG;
483
484 bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
485 if (bitbit > maxfd)
486 return maxfd;
487 if (bitbit > start)
488 start = bitbit;
489 return find_next_zero_bit(fdt->open_fds, maxfd, start);
490}
491
461/* 492/*
462 * allocate a file descriptor, mark it busy. 493 * allocate a file descriptor, mark it busy.
463 */ 494 */
@@ -476,7 +507,7 @@ repeat:
476 fd = files->next_fd; 507 fd = files->next_fd;
477 508
478 if (fd < fdt->max_fds) 509 if (fd < fdt->max_fds)
479 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 510 fd = find_next_fd(fdt, fd);
480 511
481 /* 512 /*
482 * N.B. For clone tasks sharing a files structure, this test 513 * N.B. For clone tasks sharing a files structure, this test