summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-06-30 09:54:08 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-07-01 02:30:09 -0400
commit8a81252b774b53e628a8a0fe18e2b8fc236d92cc (patch)
tree2176cd782e2b578da7ac1072f4430bf68e22c274 /fs
parent1af95de6f0119d5bde02d3a811a9f3a3661e954e (diff)
fs/file.c: don't acquire files->file_lock in fd_install()
Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Mateusz Guzik <mguzik@redhat.com> Acked-by: Mateusz Guzik <mguzik@redhat.com> Tested-by: Mateusz Guzik <mguzik@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/file.c67
1 files changed, 48 insertions, 19 deletions
diff --git a/fs/file.c b/fs/file.c
index 93c5f89c248b..3d2eb4c542a4 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)
147 147
148 spin_unlock(&files->file_lock); 148 spin_unlock(&files->file_lock);
149 new_fdt = alloc_fdtable(nr); 149 new_fdt = alloc_fdtable(nr);
150
151 /* make sure all __fd_install() have seen resize_in_progress
152 * or have finished their rcu_read_lock_sched() section.
153 */
154 if (atomic_read(&files->count) > 1)
155 synchronize_sched();
156
150 spin_lock(&files->file_lock); 157 spin_lock(&files->file_lock);
151 if (!new_fdt) 158 if (!new_fdt)
152 return -ENOMEM; 159 return -ENOMEM;
@@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
158 __free_fdtable(new_fdt); 165 __free_fdtable(new_fdt);
159 return -EMFILE; 166 return -EMFILE;
160 } 167 }
161 /*
162 * Check again since another task may have expanded the fd table while
163 * we dropped the lock
164 */
165 cur_fdt = files_fdtable(files); 168 cur_fdt = files_fdtable(files);
166 if (nr >= cur_fdt->max_fds) { 169 BUG_ON(nr < cur_fdt->max_fds);
167 /* Continue as planned */ 170 copy_fdtable(new_fdt, cur_fdt);
168 copy_fdtable(new_fdt, cur_fdt); 171 rcu_assign_pointer(files->fdt, new_fdt);
169 rcu_assign_pointer(files->fdt, new_fdt); 172 if (cur_fdt != &files->fdtab)
170 if (cur_fdt != &files->fdtab) 173 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
171 call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 174 /* coupled with smp_rmb() in __fd_install() */
172 } else { 175 smp_wmb();
173 /* Somebody else expanded, so undo our attempt */
174 __free_fdtable(new_fdt);
175 }
176 return 1; 176 return 1;
177} 177}
178 178
@@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
185 * The files->file_lock should be held on entry, and will be held on exit. 185 * The files->file_lock should be held on entry, and will be held on exit.
186 */ 186 */
187static int expand_files(struct files_struct *files, int nr) 187static int expand_files(struct files_struct *files, int nr)
188 __releases(files->file_lock)
189 __acquires(files->file_lock)
188{ 190{
189 struct fdtable *fdt; 191 struct fdtable *fdt;
192 int expanded = 0;
190 193
194repeat:
191 fdt = files_fdtable(files); 195 fdt = files_fdtable(files);
192 196
193 /* Do we need to expand? */ 197 /* Do we need to expand? */
194 if (nr < fdt->max_fds) 198 if (nr < fdt->max_fds)
195 return 0; 199 return expanded;
196 200
197 /* Can we expand? */ 201 /* Can we expand? */
198 if (nr >= sysctl_nr_open) 202 if (nr >= sysctl_nr_open)
199 return -EMFILE; 203 return -EMFILE;
200 204
205 if (unlikely(files->resize_in_progress)) {
206 spin_unlock(&files->file_lock);
207 expanded = 1;
208 wait_event(files->resize_wait, !files->resize_in_progress);
209 spin_lock(&files->file_lock);
210 goto repeat;
211 }
212
201 /* All good, so we try */ 213 /* All good, so we try */
202 return expand_fdtable(files, nr); 214 files->resize_in_progress = true;
215 expanded = expand_fdtable(files, nr);
216 files->resize_in_progress = false;
217
218 wake_up_all(&files->resize_wait);
219 return expanded;
203} 220}
204 221
205static inline void __set_close_on_exec(int fd, struct fdtable *fdt) 222static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
@@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
256 atomic_set(&newf->count, 1); 273 atomic_set(&newf->count, 1);
257 274
258 spin_lock_init(&newf->file_lock); 275 spin_lock_init(&newf->file_lock);
276 newf->resize_in_progress = false;
277 init_waitqueue_head(&newf->resize_wait);
259 newf->next_fd = 0; 278 newf->next_fd = 0;
260 new_fdt = &newf->fdtab; 279 new_fdt = &newf->fdtab;
261 new_fdt->max_fds = NR_OPEN_DEFAULT; 280 new_fdt->max_fds = NR_OPEN_DEFAULT;
@@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,
553 struct file *file) 572 struct file *file)
554{ 573{
555 struct fdtable *fdt; 574 struct fdtable *fdt;
556 spin_lock(&files->file_lock); 575
557 fdt = files_fdtable(files); 576 might_sleep();
577 rcu_read_lock_sched();
578
579 while (unlikely(files->resize_in_progress)) {
580 rcu_read_unlock_sched();
581 wait_event(files->resize_wait, !files->resize_in_progress);
582 rcu_read_lock_sched();
583 }
584 /* coupled with smp_wmb() in expand_fdtable() */
585 smp_rmb();
586 fdt = rcu_dereference_sched(files->fdt);
558 BUG_ON(fdt->fd[fd] != NULL); 587 BUG_ON(fdt->fd[fd] != NULL);
559 rcu_assign_pointer(fdt->fd[fd], file); 588 rcu_assign_pointer(fdt->fd[fd], file);
560 spin_unlock(&files->file_lock); 589 rcu_read_unlock_sched();
561} 590}
562 591
563void fd_install(unsigned int fd, struct file *file) 592void fd_install(unsigned int fd, struct file *file)