aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-08-08 13:08:20 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2014-10-09 02:38:53 -0400
commit9ea459e110df32e60a762f311f7939eaa879601d (patch)
tree3c25c8c4dbe1c21d92ab8e5a52b02c95453490d4 /fs/namespace.c
parentb3ca406f2755c20cea1cc1169672c56dd03c266c (diff)
delayed mntput
On final mntput() we want fs shutdown to happen before return to userland; however, the only case where we want it happen right there (i.e. where task_work_add won't do) is MNT_INTERNAL victim. Those have to be fully synchronous - failure halfway through module init might count on having vfsmount killed right there. Fortunately, final mntput on MNT_INTERNAL vfsmounts happens on shallow stack. So we handle those synchronously and do an analog of delayed fput logics for everything else. As the result, we are guaranteed that fs shutdown will always happen on shallow stack. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c71
1 files changed, 53 insertions, 18 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index ef42d9bee212..044134315f93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,6 +23,7 @@
23#include <linux/proc_ns.h> 23#include <linux/proc_ns.h>
24#include <linux/magic.h> 24#include <linux/magic.h>
25#include <linux/bootmem.h> 25#include <linux/bootmem.h>
26#include <linux/task_work.h>
26#include "pnode.h" 27#include "pnode.h"
27#include "internal.h" 28#include "internal.h"
28 29
@@ -957,6 +958,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
957 return ERR_PTR(err); 958 return ERR_PTR(err);
958} 959}
959 960
961static void cleanup_mnt(struct mount *mnt)
962{
963 /*
964 * This probably indicates that somebody messed
965 * up a mnt_want/drop_write() pair. If this
966 * happens, the filesystem was probably unable
967 * to make r/w->r/o transitions.
968 */
969 /*
970 * The locking used to deal with mnt_count decrement provides barriers,
971 * so mnt_get_writers() below is safe.
972 */
973 WARN_ON(mnt_get_writers(mnt));
974 if (unlikely(mnt->mnt_pins.first))
975 mnt_pin_kill(mnt);
976 fsnotify_vfsmount_delete(&mnt->mnt);
977 dput(mnt->mnt.mnt_root);
978 deactivate_super(mnt->mnt.mnt_sb);
979 mnt_free_id(mnt);
980 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
981}
982
983static void __cleanup_mnt(struct rcu_head *head)
984{
985 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
986}
987
988static LLIST_HEAD(delayed_mntput_list);
989static void delayed_mntput(struct work_struct *unused)
990{
991 struct llist_node *node = llist_del_all(&delayed_mntput_list);
992 struct llist_node *next;
993
994 for (; node; node = next) {
995 next = llist_next(node);
996 cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
997 }
998}
999static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1000
960static void mntput_no_expire(struct mount *mnt) 1001static void mntput_no_expire(struct mount *mnt)
961{ 1002{
962 rcu_read_lock(); 1003 rcu_read_lock();
@@ -982,24 +1023,18 @@ static void mntput_no_expire(struct mount *mnt)
982 list_del(&mnt->mnt_instance); 1023 list_del(&mnt->mnt_instance);
983 unlock_mount_hash(); 1024 unlock_mount_hash();
984 1025
985 /* 1026 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
986 * This probably indicates that somebody messed 1027 struct task_struct *task = current;
987 * up a mnt_want/drop_write() pair. If this 1028 if (likely(!(task->flags & PF_KTHREAD))) {
988 * happens, the filesystem was probably unable 1029 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
989 * to make r/w->r/o transitions. 1030 if (!task_work_add(task, &mnt->mnt_rcu, true))
990 */ 1031 return;
991 /* 1032 }
992 * The locking used to deal with mnt_count decrement provides barriers, 1033 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
993 * so mnt_get_writers() below is safe. 1034 schedule_delayed_work(&delayed_mntput_work, 1);
994 */ 1035 return;
995 WARN_ON(mnt_get_writers(mnt)); 1036 }
996 if (unlikely(mnt->mnt_pins.first)) 1037 cleanup_mnt(mnt);
997 mnt_pin_kill(mnt);
998 fsnotify_vfsmount_delete(&mnt->mnt);
999 dput(mnt->mnt.mnt_root);
1000 deactivate_super(mnt->mnt.mnt_sb);
1001 mnt_free_id(mnt);
1002 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1003} 1038}
1004 1039
1005void mntput(struct vfsmount *mnt) 1040void mntput(struct vfsmount *mnt)