aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2012-06-24 01:56:45 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2012-07-22 15:57:58 -0400
commit4a9d4b024a3102fc083c925c242d98ac27b1c5f6 (patch)
tree1a4e9d1e86a2c1b70f8352c13da8a7eee97bc5f1
parenta2d4c71d1559426155e5da8db3265bfa0d8d398d (diff)
switch fput to task_work_add
... and schedule_work() for interrupt/kernel_thread callers (and yes, now it *is* OK to call from interrupt). We are guaranteed that __fput() will be done before we return to userland (or exit). Note that for fput() from a kernel thread we get an async behaviour; it's almost always OK, but sometimes you might need to have __fput() completed before you do anything else. There are two mechanisms for that - a general barrier (flush_delayed_fput()) and explicit __fput_sync(). Both should be used with care (as was the case for fput() from kernel threads all along). See comments in fs/file_table.c for details. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/file_table.c72
-rw-r--r--include/linux/file.h3
-rw-r--r--init/main.c3
3 files changed, 75 insertions, 3 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index 9ace2781931e..b3fc4d67a26b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -23,6 +23,8 @@
23#include <linux/lglock.h> 23#include <linux/lglock.h>
24#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/hardirq.h>
27#include <linux/task_work.h>
26#include <linux/ima.h> 28#include <linux/ima.h>
27 29
28#include <linux/atomic.h> 30#include <linux/atomic.h>
@@ -251,7 +253,6 @@ static void __fput(struct file *file)
251 } 253 }
252 fops_put(file->f_op); 254 fops_put(file->f_op);
253 put_pid(file->f_owner.pid); 255 put_pid(file->f_owner.pid);
254 file_sb_list_del(file);
255 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 256 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
256 i_readcount_dec(inode); 257 i_readcount_dec(inode);
257 if (file->f_mode & FMODE_WRITE) 258 if (file->f_mode & FMODE_WRITE)
@@ -263,10 +264,77 @@ static void __fput(struct file *file)
263 mntput(mnt); 264 mntput(mnt);
264} 265}
265 266
267static DEFINE_SPINLOCK(delayed_fput_lock);
268static LIST_HEAD(delayed_fput_list);
269static void delayed_fput(struct work_struct *unused)
270{
271 LIST_HEAD(head);
272 spin_lock_irq(&delayed_fput_lock);
273 list_splice_init(&delayed_fput_list, &head);
274 spin_unlock_irq(&delayed_fput_lock);
275 while (!list_empty(&head)) {
276 struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
277 list_del_init(&f->f_u.fu_list);
278 __fput(f);
279 }
280}
281
282static void ____fput(struct callback_head *work)
283{
284 __fput(container_of(work, struct file, f_u.fu_rcuhead));
285}
286
287/*
288 * If kernel thread really needs to have the final fput() it has done
289 * to complete, call this. The only user right now is the boot - we
290 * *do* need to make sure our writes to binaries on initramfs has
291 * not left us with opened struct file waiting for __fput() - execve()
292 * won't work without that. Please, don't add more callers without
293 * very good reasons; in particular, never call that with locks
294 * held and never call that from a thread that might need to do
295 * some work on any kind of umount.
296 */
297void flush_delayed_fput(void)
298{
299 delayed_fput(NULL);
300}
301
302static DECLARE_WORK(delayed_fput_work, delayed_fput);
303
266void fput(struct file *file) 304void fput(struct file *file)
267{ 305{
268 if (atomic_long_dec_and_test(&file->f_count)) 306 if (atomic_long_dec_and_test(&file->f_count)) {
307 struct task_struct *task = current;
308 file_sb_list_del(file);
309 if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
310 unsigned long flags;
311 spin_lock_irqsave(&delayed_fput_lock, flags);
312 list_add(&file->f_u.fu_list, &delayed_fput_list);
313 schedule_work(&delayed_fput_work);
314 spin_unlock_irqrestore(&delayed_fput_lock, flags);
315 return;
316 }
317 init_task_work(&file->f_u.fu_rcuhead, ____fput);
318 task_work_add(task, &file->f_u.fu_rcuhead, true);
319 }
320}
321
322/*
323 * synchronous analog of fput(); for kernel threads that might be needed
324 * in some umount() (and thus can't use flush_delayed_fput() without
325 * risking deadlocks), need to wait for completion of __fput() and know
326 * for this specific struct file it won't involve anything that would
327 * need them. Use only if you really need it - at the very least,
328 * don't blindly convert fput() by kernel thread to that.
329 */
330void __fput_sync(struct file *file)
331{
332 if (atomic_long_dec_and_test(&file->f_count)) {
333 struct task_struct *task = current;
334 file_sb_list_del(file);
335 BUG_ON(!(task->flags & PF_KTHREAD));
269 __fput(file); 336 __fput(file);
337 }
270} 338}
271 339
272EXPORT_SYMBOL(fput); 340EXPORT_SYMBOL(fput);
diff --git a/include/linux/file.h b/include/linux/file.h
index 58bf158c53d9..a22408bac0d0 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd);
39 39
40extern void fd_install(unsigned int fd, struct file *file); 40extern void fd_install(unsigned int fd, struct file *file);
41 41
42extern void flush_delayed_fput(void);
43extern void __fput_sync(struct file *);
44
42#endif /* __LINUX_FILE_H */ 45#endif /* __LINUX_FILE_H */
diff --git a/init/main.c b/init/main.c
index b5cc0a7c4708..3f151f6c6da7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
68#include <linux/shmem_fs.h> 68#include <linux/shmem_fs.h>
69#include <linux/slab.h> 69#include <linux/slab.h>
70#include <linux/perf_event.h> 70#include <linux/perf_event.h>
71#include <linux/file.h>
71 72
72#include <asm/io.h> 73#include <asm/io.h>
73#include <asm/bugs.h> 74#include <asm/bugs.h>
@@ -804,8 +805,8 @@ static noinline int init_post(void)
804 system_state = SYSTEM_RUNNING; 805 system_state = SYSTEM_RUNNING;
805 numa_default_policy(); 806 numa_default_policy();
806 807
807
808 current->signal->flags |= SIGNAL_UNKILLABLE; 808 current->signal->flags |= SIGNAL_UNKILLABLE;
809 flush_delayed_fput();
809 810
810 if (ramdisk_execute_command) { 811 if (ramdisk_execute_command) {
811 run_init_process(ramdisk_execute_command); 812 run_init_process(ramdisk_execute_command);