diff options
Diffstat (limited to 'fs/file.c')
-rw-r--r-- | fs/file.c | 77 |
1 files changed, 56 insertions, 21 deletions
@@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
147 | 147 | ||
148 | spin_unlock(&files->file_lock); | 148 | spin_unlock(&files->file_lock); |
149 | new_fdt = alloc_fdtable(nr); | 149 | new_fdt = alloc_fdtable(nr); |
150 | |||
151 | /* make sure all __fd_install() have seen resize_in_progress | ||
152 | * or have finished their rcu_read_lock_sched() section. | ||
153 | */ | ||
154 | if (atomic_read(&files->count) > 1) | ||
155 | synchronize_sched(); | ||
156 | |||
150 | spin_lock(&files->file_lock); | 157 | spin_lock(&files->file_lock); |
151 | if (!new_fdt) | 158 | if (!new_fdt) |
152 | return -ENOMEM; | 159 | return -ENOMEM; |
@@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
158 | __free_fdtable(new_fdt); | 165 | __free_fdtable(new_fdt); |
159 | return -EMFILE; | 166 | return -EMFILE; |
160 | } | 167 | } |
161 | /* | ||
162 | * Check again since another task may have expanded the fd table while | ||
163 | * we dropped the lock | ||
164 | */ | ||
165 | cur_fdt = files_fdtable(files); | 168 | cur_fdt = files_fdtable(files); |
166 | if (nr >= cur_fdt->max_fds) { | 169 | BUG_ON(nr < cur_fdt->max_fds); |
167 | /* Continue as planned */ | 170 | copy_fdtable(new_fdt, cur_fdt); |
168 | copy_fdtable(new_fdt, cur_fdt); | 171 | rcu_assign_pointer(files->fdt, new_fdt); |
169 | rcu_assign_pointer(files->fdt, new_fdt); | 172 | if (cur_fdt != &files->fdtab) |
170 | if (cur_fdt != &files->fdtab) | 173 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
171 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); | 174 | /* coupled with smp_rmb() in __fd_install() */ |
172 | } else { | 175 | smp_wmb(); |
173 | /* Somebody else expanded, so undo our attempt */ | ||
174 | __free_fdtable(new_fdt); | ||
175 | } | ||
176 | return 1; | 176 | return 1; |
177 | } | 177 | } |
178 | 178 | ||
@@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
185 | * The files->file_lock should be held on entry, and will be held on exit. | 185 | * The files->file_lock should be held on entry, and will be held on exit. |
186 | */ | 186 | */ |
187 | static int expand_files(struct files_struct *files, int nr) | 187 | static int expand_files(struct files_struct *files, int nr) |
188 | __releases(files->file_lock) | ||
189 | __acquires(files->file_lock) | ||
188 | { | 190 | { |
189 | struct fdtable *fdt; | 191 | struct fdtable *fdt; |
192 | int expanded = 0; | ||
190 | 193 | ||
194 | repeat: | ||
191 | fdt = files_fdtable(files); | 195 | fdt = files_fdtable(files); |
192 | 196 | ||
193 | /* Do we need to expand? */ | 197 | /* Do we need to expand? */ |
194 | if (nr < fdt->max_fds) | 198 | if (nr < fdt->max_fds) |
195 | return 0; | 199 | return expanded; |
196 | 200 | ||
197 | /* Can we expand? */ | 201 | /* Can we expand? */ |
198 | if (nr >= sysctl_nr_open) | 202 | if (nr >= sysctl_nr_open) |
199 | return -EMFILE; | 203 | return -EMFILE; |
200 | 204 | ||
205 | if (unlikely(files->resize_in_progress)) { | ||
206 | spin_unlock(&files->file_lock); | ||
207 | expanded = 1; | ||
208 | wait_event(files->resize_wait, !files->resize_in_progress); | ||
209 | spin_lock(&files->file_lock); | ||
210 | goto repeat; | ||
211 | } | ||
212 | |||
201 | /* All good, so we try */ | 213 | /* All good, so we try */ |
202 | return expand_fdtable(files, nr); | 214 | files->resize_in_progress = true; |
215 | expanded = expand_fdtable(files, nr); | ||
216 | files->resize_in_progress = false; | ||
217 | |||
218 | wake_up_all(&files->resize_wait); | ||
219 | return expanded; | ||
203 | } | 220 | } |
204 | 221 | ||
205 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) | 222 | static inline void __set_close_on_exec(int fd, struct fdtable *fdt) |
@@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
256 | atomic_set(&newf->count, 1); | 273 | atomic_set(&newf->count, 1); |
257 | 274 | ||
258 | spin_lock_init(&newf->file_lock); | 275 | spin_lock_init(&newf->file_lock); |
276 | newf->resize_in_progress = false; | ||
277 | init_waitqueue_head(&newf->resize_wait); | ||
259 | newf->next_fd = 0; | 278 | newf->next_fd = 0; |
260 | new_fdt = &newf->fdtab; | 279 | new_fdt = &newf->fdtab; |
261 | new_fdt->max_fds = NR_OPEN_DEFAULT; | 280 | new_fdt->max_fds = NR_OPEN_DEFAULT; |
@@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, | |||
553 | struct file *file) | 572 | struct file *file) |
554 | { | 573 | { |
555 | struct fdtable *fdt; | 574 | struct fdtable *fdt; |
556 | spin_lock(&files->file_lock); | 575 | |
557 | fdt = files_fdtable(files); | 576 | might_sleep(); |
577 | rcu_read_lock_sched(); | ||
578 | |||
579 | while (unlikely(files->resize_in_progress)) { | ||
580 | rcu_read_unlock_sched(); | ||
581 | wait_event(files->resize_wait, !files->resize_in_progress); | ||
582 | rcu_read_lock_sched(); | ||
583 | } | ||
584 | /* coupled with smp_wmb() in expand_fdtable() */ | ||
585 | smp_rmb(); | ||
586 | fdt = rcu_dereference_sched(files->fdt); | ||
558 | BUG_ON(fdt->fd[fd] != NULL); | 587 | BUG_ON(fdt->fd[fd] != NULL); |
559 | rcu_assign_pointer(fdt->fd[fd], file); | 588 | rcu_assign_pointer(fdt->fd[fd], file); |
560 | spin_unlock(&files->file_lock); | 589 | rcu_read_unlock_sched(); |
561 | } | 590 | } |
562 | 591 | ||
563 | void fd_install(unsigned int fd, struct file *file) | 592 | void fd_install(unsigned int fd, struct file *file) |
@@ -635,11 +664,17 @@ static struct file *__fget(unsigned int fd, fmode_t mask) | |||
635 | struct file *file; | 664 | struct file *file; |
636 | 665 | ||
637 | rcu_read_lock(); | 666 | rcu_read_lock(); |
667 | loop: | ||
638 | file = fcheck_files(files, fd); | 668 | file = fcheck_files(files, fd); |
639 | if (file) { | 669 | if (file) { |
640 | /* File object ref couldn't be taken */ | 670 | /* File object ref couldn't be taken. |
641 | if ((file->f_mode & mask) || !get_file_rcu(file)) | 671 | * dup2() atomicity guarantee is the reason |
672 | * we loop to catch the new file (or NULL pointer) | ||
673 | */ | ||
674 | if (file->f_mode & mask) | ||
642 | file = NULL; | 675 | file = NULL; |
676 | else if (!get_file_rcu(file)) | ||
677 | goto loop; | ||
643 | } | 678 | } |
644 | rcu_read_unlock(); | 679 | rcu_read_unlock(); |
645 | 680 | ||