diff options
author | Eric Dumazet <edumazet@google.com> | 2015-06-30 09:54:08 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-07-01 02:30:09 -0400 |
commit | 8a81252b774b53e628a8a0fe18e2b8fc236d92cc (patch) | |
tree | 2176cd782e2b578da7ac1072f4430bf68e22c274 /fs | |
parent | 1af95de6f0119d5bde02d3a811a9f3a3661e954e (diff) |
fs/file.c: don't acquire files->file_lock in fd_install()
Mateusz Guzik reported :
Currently obtaining a new file descriptor results in locking fdtable
twice - once in order to reserve a slot and second time to fill it.
Holding the spinlock in __fd_install() is needed in case a resize is
done, or to prevent a resize.
Mateusz provided an RFC patch and a micro benchmark :
http://people.redhat.com/~mguzik/pipebench.c
A resize is an unlikely operation in a process lifetime,
as table size is at least doubled at every resize.
We can use RCU instead of the spinlock.
__fd_install() must wait if a resize is in progress.
The resize must block new __fd_install() callers from starting,
and wait that ongoing install are finished (synchronize_sched())
resize should be attempted by a single thread to not waste resources.
rcu_sched variant is used, as __fd_install() and expand_fdtable() run
from process context.
It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R)
CPU E5-2696 v2 @ 2.50GHz
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Mateusz Guzik <mguzik@redhat.com>
Acked-by: Mateusz Guzik <mguzik@redhat.com>
Tested-by: Mateusz Guzik <mguzik@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/file.c | 67 |
1 files changed, 48 insertions, 19 deletions
@@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
147 | 147 | ||
148 | spin_unlock(&files->file_lock); | 148 | spin_unlock(&files->file_lock); |
149 | new_fdt = alloc_fdtable(nr); | 149 | new_fdt = alloc_fdtable(nr); |
150 | |||
151 | /* make sure all __fd_install() have seen resize_in_progress | ||
152 | * or have finished their rcu_read_lock_sched() section. | ||
153 | */ | ||
154 | if (atomic_read(&files->count) > 1) | ||
155 | synchronize_sched(); | ||
156 | |||
150 | spin_lock(&files->file_lock); | 157 | spin_lock(&files->file_lock); |
151 | if (!new_fdt) | 158 | if (!new_fdt) |
152 | return -ENOMEM; | 159 | return -ENOMEM; |
@@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
158 | __free_fdtable(new_fdt); | 165 | __free_fdtable(new_fdt); |
159 | return -EMFILE; | 166 | return -EMFILE; |
160 | } | 167 | } |
161 | /* | ||
162 | * Check again since another task may have expanded the fd table while | ||
163 | * we dropped the lock | ||
164 | */ | ||
165 | cur_fdt = files_fdtable(files); | 168 | cur_fdt = files_fdtable(files); |
166 | if (nr >= cur_fdt->max_fds) { | 169 | BUG_ON(nr < cur_fdt->max_fds); |
167 | /* Continue as planned */ | 170 | copy_fdtable(new_fdt, cur_fdt); |
168 | copy_fdtable(new_fdt, cur_fdt); | 171 | rcu_assign_pointer(files->fdt, new_fdt); |
169 | rcu_assign_pointer(files->fdt, new_fdt); | 172 | if (cur_fdt != &files->fdtab) |
170 | if (cur_fdt != &files->fdtab) | 173 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
171 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); | 174 | /* coupled with smp_rmb() in __fd_install() */ |
172 | } else { | 175 | smp_wmb(); |
173 | /* Somebody else expanded, so undo our attempt */ | ||
174 | __free_fdtable(new_fdt); | ||
175 | } | ||
176 | return 1; | 176 | return 1; |
177 | } | 177 | } |
178 | 178 | ||
@@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
185 | * The files->file_lock should be held on entry, and will be held on exit. | 185 | * The files->file_lock should be held on entry, and will be held on exit. |
186 | */ | 186 | */ |
187 | static int expand_files(struct files_struct *files, int nr) | 187 | static int expand_files(struct files_struct *files, int nr) |
188 | __releases(files->file_lock) | ||
189 | __acquires(files->file_lock) | ||
188 | { | 190 | { |
189 | struct fdtable *fdt; | 191 | struct fdtable *fdt; |
192 | int expanded = 0; | ||
190 | 193 | ||
194 | repeat: | ||
191 | fdt = files_fdtable(files); | 195 | fdt = files_fdtable(files); |
192 | 196 | ||
193 | /* Do we need to expand? */ | 197 | /* Do we need to expand? */ |
194 | if (nr < fdt->max_fds) | 198 | if (nr < fdt->max_fds) |
195 | return 0; | 199 | return expanded; |
196 | 200 | ||
197 | /* Can we expand? */ | 201 | /* Can we expand? */ |
198 | if (nr >= sysctl_nr_open) | 202 | if (nr >= sysctl_nr_open) |
199 | return -EMFILE; | 203 | return -EMFILE; |
200 | 204 | ||
205 | if (unlikely(files->resize_in_progress)) { | ||
206 | spin_unlock(&files->file_lock); | ||
207 | expanded = 1; | ||
208 | wait_event(files->resize_wait, !files->resize_in_progress); | ||
209 | spin_lock(&files->file_lock); | ||
210 | goto repeat; | ||
211 | } | ||
212 | |||
201 | /* All good, so we try */ | 213 | /* All good, so we try */ |
202 | return expand_fdtable(files, nr); | 214 | files->resize_in_progress = true; |
215 | expanded = expand_fdtable(files, nr); | ||
216 | files->resize_in_progress = false; | ||
217 | |||
218 | wake_up_all(&files->resize_wait); | ||
219 | return expanded; | ||
203 | } | 220 | } |
204 | 221 | ||
205 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | 222 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) |
@@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
256 | atomic_set(&newf->count, 1); | 273 | atomic_set(&newf->count, 1); |
257 | 274 | ||
258 | spin_lock_init(&newf->file_lock); | 275 | spin_lock_init(&newf->file_lock); |
276 | newf->resize_in_progress = false; | ||
277 | init_waitqueue_head(&newf->resize_wait); | ||
259 | newf->next_fd = 0; | 278 | newf->next_fd = 0; |
260 | new_fdt = &newf->fdtab; | 279 | new_fdt = &newf->fdtab; |
261 | new_fdt->max_fds = NR_OPEN_DEFAULT; | 280 | new_fdt->max_fds = NR_OPEN_DEFAULT; |
@@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, | |||
553 | struct file *file) | 572 | struct file *file) |
554 | { | 573 | { |
555 | struct fdtable *fdt; | 574 | struct fdtable *fdt; |
556 | spin_lock(&files->file_lock); | 575 | |
557 | fdt = files_fdtable(files); | 576 | might_sleep(); |
577 | rcu_read_lock_sched(); | ||
578 | |||
579 | while (unlikely(files->resize_in_progress)) { | ||
580 | rcu_read_unlock_sched(); | ||
581 | wait_event(files->resize_wait, !files->resize_in_progress); | ||
582 | rcu_read_lock_sched(); | ||
583 | } | ||
584 | /* coupled with smp_wmb() in expand_fdtable() */ | ||
585 | smp_rmb(); | ||
586 | fdt = rcu_dereference_sched(files->fdt); | ||
558 | BUG_ON(fdt->fd[fd] != NULL); | 587 | BUG_ON(fdt->fd[fd] != NULL); |
559 | rcu_assign_pointer(fdt->fd[fd], file); | 588 | rcu_assign_pointer(fdt->fd[fd], file); |
560 | spin_unlock(&files->file_lock); | 589 | rcu_read_unlock_sched(); |
561 | } | 590 | } |
562 | 591 | ||
563 | void fd_install(unsigned int fd, struct file *file) | 592 | void fd_install(unsigned int fd, struct file *file) |