diff options
author | Ram Pai <linuxram@us.ibm.com> | 2005-11-07 17:19:50 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-07 21:18:11 -0500 |
commit | b90fa9ae8f51f098ee480bbaabd6867992e9fc58 (patch) | |
tree | 2ad583b3a7399face7a78730b001928413c8269e | |
parent | 03e06e68ff76294e53ffa898cb844d2a997b043e (diff) |
[PATCH] shared mount handling: bind and rbind
Implement handling of MS_BIND in presense of shared mounts (see
Documentation/sharedsubtree.txt in the end of patch series for detailed
description).
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/namespace.c | 126 | ||||
-rw-r--r-- | fs/pnode.c | 81 | ||||
-rw-r--r-- | fs/pnode.h | 14 | ||||
-rw-r--r-- | include/linux/fs.h | 5 |
4 files changed, 204 insertions, 22 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index f6861a5487df..9f5a084b239f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -28,8 +28,6 @@ | |||
28 | 28 | ||
29 | extern int __init init_rootfs(void); | 29 | extern int __init init_rootfs(void); |
30 | 30 | ||
31 | #define CL_EXPIRE 0x01 | ||
32 | |||
33 | #ifdef CONFIG_SYSFS | 31 | #ifdef CONFIG_SYSFS |
34 | extern int __init sysfs_init(void); | 32 | extern int __init sysfs_init(void); |
35 | #else | 33 | #else |
@@ -145,13 +143,43 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) | |||
145 | old_nd->dentry->d_mounted--; | 143 | old_nd->dentry->d_mounted--; |
146 | } | 144 | } |
147 | 145 | ||
146 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | ||
147 | struct vfsmount *child_mnt) | ||
148 | { | ||
149 | child_mnt->mnt_parent = mntget(mnt); | ||
150 | child_mnt->mnt_mountpoint = dget(dentry); | ||
151 | dentry->d_mounted++; | ||
152 | } | ||
153 | |||
148 | static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) | 154 | static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) |
149 | { | 155 | { |
150 | mnt->mnt_parent = mntget(nd->mnt); | 156 | mnt_set_mountpoint(nd->mnt, nd->dentry, mnt); |
151 | mnt->mnt_mountpoint = dget(nd->dentry); | 157 | list_add_tail(&mnt->mnt_hash, mount_hashtable + |
152 | list_add(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry)); | 158 | hash(nd->mnt, nd->dentry)); |
153 | list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); | 159 | list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); |
154 | nd->dentry->d_mounted++; | 160 | } |
161 | |||
162 | /* | ||
163 | * the caller must hold vfsmount_lock | ||
164 | */ | ||
165 | static void commit_tree(struct vfsmount *mnt) | ||
166 | { | ||
167 | struct vfsmount *parent = mnt->mnt_parent; | ||
168 | struct vfsmount *m; | ||
169 | LIST_HEAD(head); | ||
170 | struct namespace *n = parent->mnt_namespace; | ||
171 | |||
172 | BUG_ON(parent == mnt); | ||
173 | |||
174 | list_add_tail(&head, &mnt->mnt_list); | ||
175 | list_for_each_entry(m, &head, mnt_list) | ||
176 | m->mnt_namespace = n; | ||
177 | list_splice(&head, n->list.prev); | ||
178 | |||
179 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | ||
180 | hash(parent, mnt->mnt_mountpoint)); | ||
181 | list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); | ||
182 | touch_namespace(n); | ||
155 | } | 183 | } |
156 | 184 | ||
157 | static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) | 185 | static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) |
@@ -183,7 +211,11 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
183 | mnt->mnt_root = dget(root); | 211 | mnt->mnt_root = dget(root); |
184 | mnt->mnt_mountpoint = mnt->mnt_root; | 212 | mnt->mnt_mountpoint = mnt->mnt_root; |
185 | mnt->mnt_parent = mnt; | 213 | mnt->mnt_parent = mnt; |
186 | mnt->mnt_namespace = current->namespace; | 214 | |
215 | if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) | ||
216 | list_add(&mnt->mnt_share, &old->mnt_share); | ||
217 | if (flag & CL_MAKE_SHARED) | ||
218 | set_mnt_shared(mnt); | ||
187 | 219 | ||
188 | /* stick the duplicate mount on the same expiry list | 220 | /* stick the duplicate mount on the same expiry list |
189 | * as the original if that was on one */ | 221 | * as the original if that was on one */ |
@@ -379,7 +411,7 @@ int may_umount(struct vfsmount *mnt) | |||
379 | 411 | ||
380 | EXPORT_SYMBOL(may_umount); | 412 | EXPORT_SYMBOL(may_umount); |
381 | 413 | ||
382 | static void release_mounts(struct list_head *head) | 414 | void release_mounts(struct list_head *head) |
383 | { | 415 | { |
384 | struct vfsmount *mnt; | 416 | struct vfsmount *mnt; |
385 | while(!list_empty(head)) { | 417 | while(!list_empty(head)) { |
@@ -401,7 +433,7 @@ static void release_mounts(struct list_head *head) | |||
401 | } | 433 | } |
402 | } | 434 | } |
403 | 435 | ||
404 | static void umount_tree(struct vfsmount *mnt, struct list_head *kill) | 436 | void umount_tree(struct vfsmount *mnt, struct list_head *kill) |
405 | { | 437 | { |
406 | struct vfsmount *p; | 438 | struct vfsmount *p; |
407 | 439 | ||
@@ -581,7 +613,7 @@ static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) | |||
581 | } | 613 | } |
582 | } | 614 | } |
583 | 615 | ||
584 | static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | 616 | struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, |
585 | int flag) | 617 | int flag) |
586 | { | 618 | { |
587 | struct vfsmount *res, *p, *q, *r, *s; | 619 | struct vfsmount *res, *p, *q, *r, *s; |
@@ -626,6 +658,67 @@ Enomem: | |||
626 | return NULL; | 658 | return NULL; |
627 | } | 659 | } |
628 | 660 | ||
661 | /* | ||
662 | * @source_mnt : mount tree to be attached | ||
663 | * @nd : place the mount tree @source_mnt is attached | ||
664 | * | ||
665 | * NOTE: in the table below explains the semantics when a source mount | ||
666 | * of a given type is attached to a destination mount of a given type. | ||
667 | * --------------------------------------------- | ||
668 | * | BIND MOUNT OPERATION | | ||
669 | * |******************************************** | ||
670 | * | source-->| shared | private | | ||
671 | * | dest | | | | ||
672 | * | | | | | | ||
673 | * | v | | | | ||
674 | * |******************************************** | ||
675 | * | shared | shared (++) | shared (+) | | ||
676 | * | | | | | ||
677 | * |non-shared| shared (+) | private | | ||
678 | * ********************************************* | ||
679 | * A bind operation clones the source mount and mounts the clone on the | ||
680 | * destination mount. | ||
681 | * | ||
682 | * (++) the cloned mount is propagated to all the mounts in the propagation | ||
683 | * tree of the destination mount and the cloned mount is added to | ||
684 | * the peer group of the source mount. | ||
685 | * (+) the cloned mount is created under the destination mount and is marked | ||
686 | * as shared. The cloned mount is added to the peer group of the source | ||
687 | * mount. | ||
688 | * | ||
689 | * if the source mount is a tree, the operations explained above is | ||
690 | * applied to each mount in the tree. | ||
691 | * Must be called without spinlocks held, since this function can sleep | ||
692 | * in allocations. | ||
693 | */ | ||
694 | static int attach_recursive_mnt(struct vfsmount *source_mnt, | ||
695 | struct nameidata *nd) | ||
696 | { | ||
697 | LIST_HEAD(tree_list); | ||
698 | struct vfsmount *dest_mnt = nd->mnt; | ||
699 | struct dentry *dest_dentry = nd->dentry; | ||
700 | struct vfsmount *child, *p; | ||
701 | |||
702 | if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list)) | ||
703 | return -EINVAL; | ||
704 | |||
705 | if (IS_MNT_SHARED(dest_mnt)) { | ||
706 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | ||
707 | set_mnt_shared(p); | ||
708 | } | ||
709 | |||
710 | spin_lock(&vfsmount_lock); | ||
711 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | ||
712 | commit_tree(source_mnt); | ||
713 | |||
714 | list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { | ||
715 | list_del_init(&child->mnt_hash); | ||
716 | commit_tree(child); | ||
717 | } | ||
718 | spin_unlock(&vfsmount_lock); | ||
719 | return 0; | ||
720 | } | ||
721 | |||
629 | static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) | 722 | static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) |
630 | { | 723 | { |
631 | int err; | 724 | int err; |
@@ -646,17 +739,8 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) | |||
646 | goto out_unlock; | 739 | goto out_unlock; |
647 | 740 | ||
648 | err = -ENOENT; | 741 | err = -ENOENT; |
649 | spin_lock(&vfsmount_lock); | 742 | if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) |
650 | if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { | 743 | err = attach_recursive_mnt(mnt, nd); |
651 | struct list_head head; | ||
652 | |||
653 | attach_mnt(mnt, nd); | ||
654 | list_add_tail(&head, &mnt->mnt_list); | ||
655 | list_splice(&head, current->namespace->list.prev); | ||
656 | err = 0; | ||
657 | touch_namespace(current->namespace); | ||
658 | } | ||
659 | spin_unlock(&vfsmount_lock); | ||
660 | out_unlock: | 744 | out_unlock: |
661 | up(&nd->dentry->d_inode->i_sem); | 745 | up(&nd->dentry->d_inode->i_sem); |
662 | if (!err) | 746 | if (!err) |
diff --git a/fs/pnode.c b/fs/pnode.c index 1e22165ea41f..2d572b88e6f6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -20,9 +20,88 @@ static inline struct vfsmount *next_peer(struct vfsmount *p) | |||
20 | void change_mnt_propagation(struct vfsmount *mnt, int type) | 20 | void change_mnt_propagation(struct vfsmount *mnt, int type) |
21 | { | 21 | { |
22 | if (type == MS_SHARED) { | 22 | if (type == MS_SHARED) { |
23 | mnt->mnt_flags |= MNT_SHARED; | 23 | set_mnt_shared(mnt); |
24 | } else { | 24 | } else { |
25 | list_del_init(&mnt->mnt_share); | 25 | list_del_init(&mnt->mnt_share); |
26 | mnt->mnt_flags &= ~MNT_PNODE_MASK; | 26 | mnt->mnt_flags &= ~MNT_PNODE_MASK; |
27 | } | 27 | } |
28 | } | 28 | } |
29 | |||
30 | /* | ||
31 | * get the next mount in the propagation tree. | ||
32 | * @m: the mount seen last | ||
33 | * @origin: the original mount from where the tree walk initiated | ||
34 | */ | ||
35 | static struct vfsmount *propagation_next(struct vfsmount *m, | ||
36 | struct vfsmount *origin) | ||
37 | { | ||
38 | m = next_peer(m); | ||
39 | if (m == origin) | ||
40 | return NULL; | ||
41 | return m; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * mount 'source_mnt' under the destination 'dest_mnt' at | ||
46 | * dentry 'dest_dentry'. And propagate that mount to | ||
47 | * all the peer and slave mounts of 'dest_mnt'. | ||
48 | * Link all the new mounts into a propagation tree headed at | ||
49 | * source_mnt. Also link all the new mounts using ->mnt_list | ||
50 | * headed at source_mnt's ->mnt_list | ||
51 | * | ||
52 | * @dest_mnt: destination mount. | ||
53 | * @dest_dentry: destination dentry. | ||
54 | * @source_mnt: source mount. | ||
55 | * @tree_list : list of heads of trees to be attached. | ||
56 | */ | ||
57 | int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, | ||
58 | struct vfsmount *source_mnt, struct list_head *tree_list) | ||
59 | { | ||
60 | struct vfsmount *m, *child; | ||
61 | int ret = 0; | ||
62 | struct vfsmount *prev_dest_mnt = dest_mnt; | ||
63 | struct vfsmount *prev_src_mnt = source_mnt; | ||
64 | LIST_HEAD(tmp_list); | ||
65 | LIST_HEAD(umount_list); | ||
66 | |||
67 | for (m = propagation_next(dest_mnt, dest_mnt); m; | ||
68 | m = propagation_next(m, dest_mnt)) { | ||
69 | int type = CL_PROPAGATION; | ||
70 | |||
71 | if (IS_MNT_NEW(m)) | ||
72 | continue; | ||
73 | |||
74 | if (IS_MNT_SHARED(m)) | ||
75 | type |= CL_MAKE_SHARED; | ||
76 | |||
77 | if (!(child = copy_tree(source_mnt, source_mnt->mnt_root, | ||
78 | type))) { | ||
79 | ret = -ENOMEM; | ||
80 | list_splice(tree_list, tmp_list.prev); | ||
81 | goto out; | ||
82 | } | ||
83 | |||
84 | if (is_subdir(dest_dentry, m->mnt_root)) { | ||
85 | mnt_set_mountpoint(m, dest_dentry, child); | ||
86 | list_add_tail(&child->mnt_hash, tree_list); | ||
87 | } else { | ||
88 | /* | ||
89 | * This can happen if the parent mount was bind mounted | ||
90 | * on some subdirectory of a shared/slave mount. | ||
91 | */ | ||
92 | list_add_tail(&child->mnt_hash, &tmp_list); | ||
93 | } | ||
94 | prev_dest_mnt = m; | ||
95 | prev_src_mnt = child; | ||
96 | } | ||
97 | out: | ||
98 | spin_lock(&vfsmount_lock); | ||
99 | while (!list_empty(&tmp_list)) { | ||
100 | child = list_entry(tmp_list.next, struct vfsmount, mnt_hash); | ||
101 | list_del_init(&child->mnt_hash); | ||
102 | umount_tree(child, &umount_list); | ||
103 | } | ||
104 | spin_unlock(&vfsmount_lock); | ||
105 | release_mounts(&umount_list); | ||
106 | return ret; | ||
107 | } | ||
diff --git a/fs/pnode.h b/fs/pnode.h index ab1bdaee4e08..c62c72fad212 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -12,7 +12,21 @@ | |||
12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
13 | 13 | ||
14 | #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) | 14 | #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) |
15 | #define IS_MNT_NEW(mnt) (!mnt->mnt_namespace) | ||
15 | #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) | 16 | #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) |
16 | 17 | ||
18 | #define CL_EXPIRE 0x01 | ||
19 | #define CL_COPY_ALL 0x04 | ||
20 | #define CL_MAKE_SHARED 0x08 | ||
21 | #define CL_PROPAGATION 0x10 | ||
22 | |||
23 | static inline void set_mnt_shared(struct vfsmount *mnt) | ||
24 | { | ||
25 | mnt->mnt_flags &= ~MNT_PNODE_MASK; | ||
26 | mnt->mnt_flags |= MNT_SHARED; | ||
27 | } | ||
28 | |||
17 | void change_mnt_propagation(struct vfsmount *, int); | 29 | void change_mnt_propagation(struct vfsmount *, int); |
30 | int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, | ||
31 | struct list_head *); | ||
18 | #endif /* _LINUX_PNODE_H */ | 32 | #endif /* _LINUX_PNODE_H */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 551fba303cf8..5e188b773e9c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1251,7 +1251,12 @@ extern int unregister_filesystem(struct file_system_type *); | |||
1251 | extern struct vfsmount *kern_mount(struct file_system_type *); | 1251 | extern struct vfsmount *kern_mount(struct file_system_type *); |
1252 | extern int may_umount_tree(struct vfsmount *); | 1252 | extern int may_umount_tree(struct vfsmount *); |
1253 | extern int may_umount(struct vfsmount *); | 1253 | extern int may_umount(struct vfsmount *); |
1254 | extern void umount_tree(struct vfsmount *, struct list_head *); | ||
1255 | extern void release_mounts(struct list_head *); | ||
1254 | extern long do_mount(char *, char *, char *, unsigned long, void *); | 1256 | extern long do_mount(char *, char *, char *, unsigned long, void *); |
1257 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | ||
1258 | extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, | ||
1259 | struct vfsmount *); | ||
1255 | 1260 | ||
1256 | extern int vfs_statfs(struct super_block *, struct kstatfs *); | 1261 | extern int vfs_statfs(struct super_block *, struct kstatfs *); |
1257 | 1262 | ||