diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2012-06-24 01:56:45 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2012-07-22 15:57:58 -0400 |
commit | 4a9d4b024a3102fc083c925c242d98ac27b1c5f6 (patch) | |
tree | 1a4e9d1e86a2c1b70f8352c13da8a7eee97bc5f1 | |
parent | a2d4c71d1559426155e5da8db3265bfa0d8d398d (diff) |
switch fput to task_work_add
... and schedule_work() for interrupt/kernel_thread callers
(and yes, now it *is* OK to call from interrupt).
We are guaranteed that __fput() will be done before we return
to userland (or exit). Note that for fput() from a kernel
thread we get an async behaviour; it's almost always OK, but
sometimes you might need to have __fput() completed before
you do anything else. There are two mechanisms for that -
a general barrier (flush_delayed_fput()) and explicit
__fput_sync(). Both should be used with care (as was the
case for fput() from kernel threads all along). See comments
in fs/file_table.c for details.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/file_table.c | 72 | ||||
-rw-r--r-- | include/linux/file.h | 3 | ||||
-rw-r--r-- | init/main.c | 3 |
3 files changed, 75 insertions, 3 deletions
diff --git a/fs/file_table.c b/fs/file_table.c index 9ace2781931e..b3fc4d67a26b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -23,6 +23,8 @@ | |||
23 | #include <linux/lglock.h> | 23 | #include <linux/lglock.h> |
24 | #include <linux/percpu_counter.h> | 24 | #include <linux/percpu_counter.h> |
25 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
26 | #include <linux/hardirq.h> | ||
27 | #include <linux/task_work.h> | ||
26 | #include <linux/ima.h> | 28 | #include <linux/ima.h> |
27 | 29 | ||
28 | #include <linux/atomic.h> | 30 | #include <linux/atomic.h> |
@@ -251,7 +253,6 @@ static void __fput(struct file *file) | |||
251 | } | 253 | } |
252 | fops_put(file->f_op); | 254 | fops_put(file->f_op); |
253 | put_pid(file->f_owner.pid); | 255 | put_pid(file->f_owner.pid); |
254 | file_sb_list_del(file); | ||
255 | if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) | 256 | if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
256 | i_readcount_dec(inode); | 257 | i_readcount_dec(inode); |
257 | if (file->f_mode & FMODE_WRITE) | 258 | if (file->f_mode & FMODE_WRITE) |
@@ -263,10 +264,77 @@ static void __fput(struct file *file) | |||
263 | mntput(mnt); | 264 | mntput(mnt); |
264 | } | 265 | } |
265 | 266 | ||
267 | static DEFINE_SPINLOCK(delayed_fput_lock); | ||
268 | static LIST_HEAD(delayed_fput_list); | ||
269 | static void delayed_fput(struct work_struct *unused) | ||
270 | { | ||
271 | LIST_HEAD(head); | ||
272 | spin_lock_irq(&delayed_fput_lock); | ||
273 | list_splice_init(&delayed_fput_list, &head); | ||
274 | spin_unlock_irq(&delayed_fput_lock); | ||
275 | while (!list_empty(&head)) { | ||
276 | struct file *f = list_first_entry(&head, struct file, f_u.fu_list); | ||
277 | list_del_init(&f->f_u.fu_list); | ||
278 | __fput(f); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | static void ____fput(struct callback_head *work) | ||
283 | { | ||
284 | __fput(container_of(work, struct file, f_u.fu_rcuhead)); | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * If kernel thread really needs to have the final fput() it has done | ||
289 | * to complete, call this. The only user right now is the boot - we | ||
290 | * *do* need to make sure our writes to binaries on initramfs has | ||
291 | * not left us with opened struct file waiting for __fput() - execve() | ||
292 | * won't work without that. Please, don't add more callers without | ||
293 | * very good reasons; in particular, never call that with locks | ||
294 | * held and never call that from a thread that might need to do | ||
295 | * some work on any kind of umount. | ||
296 | */ | ||
297 | void flush_delayed_fput(void) | ||
298 | { | ||
299 | delayed_fput(NULL); | ||
300 | } | ||
301 | |||
302 | static DECLARE_WORK(delayed_fput_work, delayed_fput); | ||
303 | |||
266 | void fput(struct file *file) | 304 | void fput(struct file *file) |
267 | { | 305 | { |
268 | if (atomic_long_dec_and_test(&file->f_count)) | 306 | if (atomic_long_dec_and_test(&file->f_count)) { |
307 | struct task_struct *task = current; | ||
308 | file_sb_list_del(file); | ||
309 | if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { | ||
310 | unsigned long flags; | ||
311 | spin_lock_irqsave(&delayed_fput_lock, flags); | ||
312 | list_add(&file->f_u.fu_list, &delayed_fput_list); | ||
313 | schedule_work(&delayed_fput_work); | ||
314 | spin_unlock_irqrestore(&delayed_fput_lock, flags); | ||
315 | return; | ||
316 | } | ||
317 | init_task_work(&file->f_u.fu_rcuhead, ____fput); | ||
318 | task_work_add(task, &file->f_u.fu_rcuhead, true); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * synchronous analog of fput(); for kernel threads that might be needed | ||
324 | * in some umount() (and thus can't use flush_delayed_fput() without | ||
325 | * risking deadlocks), need to wait for completion of __fput() and know | ||
326 | * for this specific struct file it won't involve anything that would | ||
327 | * need them. Use only if you really need it - at the very least, | ||
328 | * don't blindly convert fput() by kernel thread to that. | ||
329 | */ | ||
330 | void __fput_sync(struct file *file) | ||
331 | { | ||
332 | if (atomic_long_dec_and_test(&file->f_count)) { | ||
333 | struct task_struct *task = current; | ||
334 | file_sb_list_del(file); | ||
335 | BUG_ON(!(task->flags & PF_KTHREAD)); | ||
269 | __fput(file); | 336 | __fput(file); |
337 | } | ||
270 | } | 338 | } |
271 | 339 | ||
272 | EXPORT_SYMBOL(fput); | 340 | EXPORT_SYMBOL(fput); |
diff --git a/include/linux/file.h b/include/linux/file.h index 58bf158c53d9..a22408bac0d0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h | |||
@@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd); | |||
39 | 39 | ||
40 | extern void fd_install(unsigned int fd, struct file *file); | 40 | extern void fd_install(unsigned int fd, struct file *file); |
41 | 41 | ||
42 | extern void flush_delayed_fput(void); | ||
43 | extern void __fput_sync(struct file *); | ||
44 | |||
42 | #endif /* __LINUX_FILE_H */ | 45 | #endif /* __LINUX_FILE_H */ |
diff --git a/init/main.c b/init/main.c index b5cc0a7c4708..3f151f6c6da7 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -68,6 +68,7 @@ | |||
68 | #include <linux/shmem_fs.h> | 68 | #include <linux/shmem_fs.h> |
69 | #include <linux/slab.h> | 69 | #include <linux/slab.h> |
70 | #include <linux/perf_event.h> | 70 | #include <linux/perf_event.h> |
71 | #include <linux/file.h> | ||
71 | 72 | ||
72 | #include <asm/io.h> | 73 | #include <asm/io.h> |
73 | #include <asm/bugs.h> | 74 | #include <asm/bugs.h> |
@@ -804,8 +805,8 @@ static noinline int init_post(void) | |||
804 | system_state = SYSTEM_RUNNING; | 805 | system_state = SYSTEM_RUNNING; |
805 | numa_default_policy(); | 806 | numa_default_policy(); |
806 | 807 | ||
807 | |||
808 | current->signal->flags |= SIGNAL_UNKILLABLE; | 808 | current->signal->flags |= SIGNAL_UNKILLABLE; |
809 | flush_delayed_fput(); | ||
809 | 810 | ||
810 | if (ramdisk_execute_command) { | 811 | if (ramdisk_execute_command) { |
811 | run_init_process(ramdisk_execute_command); | 812 | run_init_process(ramdisk_execute_command); |