aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c217
1 files changed, 141 insertions, 76 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 88058de59c7c..8a415c9c5e55 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
14#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
15#include <linux/init.h> 17#include <linux/init.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -29,6 +31,7 @@
29#include <linux/log2.h> 31#include <linux/log2.h>
30#include <linux/idr.h> 32#include <linux/idr.h>
31#include <linux/fs_struct.h> 33#include <linux/fs_struct.h>
34#include <linux/fsnotify.h>
32#include <asm/uaccess.h> 35#include <asm/uaccess.h>
33#include <asm/unistd.h> 36#include <asm/unistd.h>
34#include "pnode.h" 37#include "pnode.h"
@@ -37,12 +40,10 @@
37#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 40#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
38#define HASH_SIZE (1UL << HASH_SHIFT) 41#define HASH_SIZE (1UL << HASH_SHIFT)
39 42
40/* spinlock for vfsmount related operations, inplace of dcache_lock */
41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42
43static int event; 43static int event;
44static DEFINE_IDA(mnt_id_ida); 44static DEFINE_IDA(mnt_id_ida);
45static DEFINE_IDA(mnt_group_ida); 45static DEFINE_IDA(mnt_group_ida);
46static DEFINE_SPINLOCK(mnt_id_lock);
46static int mnt_id_start = 0; 47static int mnt_id_start = 0;
47static int mnt_group_start = 1; 48static int mnt_group_start = 1;
48 49
@@ -54,6 +55,16 @@ static struct rw_semaphore namespace_sem;
54struct kobject *fs_kobj; 55struct kobject *fs_kobj;
55EXPORT_SYMBOL_GPL(fs_kobj); 56EXPORT_SYMBOL_GPL(fs_kobj);
56 57
58/*
59 * vfsmount lock may be taken for read to prevent changes to the
60 * vfsmount hash, ie. during mountpoint lookups or walking back
61 * up the tree.
62 *
63 * It should be taken for write in all cases where the vfsmount
64 * tree or hash is modified or when a vfsmount structure is modified.
65 */
66DEFINE_BRLOCK(vfsmount_lock);
67
57static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 68static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
58{ 69{
59 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 70 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -64,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
64 75
65#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 76#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
66 77
67/* allocation is serialized by namespace_sem */ 78/*
79 * allocation is serialized by namespace_sem, but we need the spinlock to
80 * serialize with freeing.
81 */
68static int mnt_alloc_id(struct vfsmount *mnt) 82static int mnt_alloc_id(struct vfsmount *mnt)
69{ 83{
70 int res; 84 int res;
71 85
72retry: 86retry:
73 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 87 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
74 spin_lock(&vfsmount_lock); 88 spin_lock(&mnt_id_lock);
75 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); 89 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
76 if (!res) 90 if (!res)
77 mnt_id_start = mnt->mnt_id + 1; 91 mnt_id_start = mnt->mnt_id + 1;
78 spin_unlock(&vfsmount_lock); 92 spin_unlock(&mnt_id_lock);
79 if (res == -EAGAIN) 93 if (res == -EAGAIN)
80 goto retry; 94 goto retry;
81 95
@@ -85,11 +99,11 @@ retry:
85static void mnt_free_id(struct vfsmount *mnt) 99static void mnt_free_id(struct vfsmount *mnt)
86{ 100{
87 int id = mnt->mnt_id; 101 int id = mnt->mnt_id;
88 spin_lock(&vfsmount_lock); 102 spin_lock(&mnt_id_lock);
89 ida_remove(&mnt_id_ida, id); 103 ida_remove(&mnt_id_ida, id);
90 if (mnt_id_start > id) 104 if (mnt_id_start > id)
91 mnt_id_start = id; 105 mnt_id_start = id;
92 spin_unlock(&vfsmount_lock); 106 spin_unlock(&mnt_id_lock);
93} 107}
94 108
95/* 109/*
@@ -150,6 +164,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
150 INIT_LIST_HEAD(&mnt->mnt_share); 164 INIT_LIST_HEAD(&mnt->mnt_share);
151 INIT_LIST_HEAD(&mnt->mnt_slave_list); 165 INIT_LIST_HEAD(&mnt->mnt_slave_list);
152 INIT_LIST_HEAD(&mnt->mnt_slave); 166 INIT_LIST_HEAD(&mnt->mnt_slave);
167#ifdef CONFIG_FSNOTIFY
168 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
169#endif
153#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
154 mnt->mnt_writers = alloc_percpu(int); 171 mnt->mnt_writers = alloc_percpu(int);
155 if (!mnt->mnt_writers) 172 if (!mnt->mnt_writers)
@@ -344,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
344{ 361{
345 int ret = 0; 362 int ret = 0;
346 363
347 spin_lock(&vfsmount_lock); 364 br_write_lock(vfsmount_lock);
348 mnt->mnt_flags |= MNT_WRITE_HOLD; 365 mnt->mnt_flags |= MNT_WRITE_HOLD;
349 /* 366 /*
350 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 367 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -378,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
378 */ 395 */
379 smp_wmb(); 396 smp_wmb();
380 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 397 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
381 spin_unlock(&vfsmount_lock); 398 br_write_unlock(vfsmount_lock);
382 return ret; 399 return ret;
383} 400}
384 401
385static void __mnt_unmake_readonly(struct vfsmount *mnt) 402static void __mnt_unmake_readonly(struct vfsmount *mnt)
386{ 403{
387 spin_lock(&vfsmount_lock); 404 br_write_lock(vfsmount_lock);
388 mnt->mnt_flags &= ~MNT_READONLY; 405 mnt->mnt_flags &= ~MNT_READONLY;
389 spin_unlock(&vfsmount_lock); 406 br_write_unlock(vfsmount_lock);
390} 407}
391 408
392void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 409void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -410,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
410/* 427/*
411 * find the first or last mount at @dentry on vfsmount @mnt depending on 428 * find the first or last mount at @dentry on vfsmount @mnt depending on
412 * @dir. If @dir is set return the first mount else return the last mount. 429 * @dir. If @dir is set return the first mount else return the last mount.
430 * vfsmount_lock must be held for read or write.
413 */ 431 */
414struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 432struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
415 int dir) 433 int dir)
@@ -439,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
439struct vfsmount *lookup_mnt(struct path *path) 457struct vfsmount *lookup_mnt(struct path *path)
440{ 458{
441 struct vfsmount *child_mnt; 459 struct vfsmount *child_mnt;
442 spin_lock(&vfsmount_lock); 460
461 br_read_lock(vfsmount_lock);
443 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 462 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
444 mntget(child_mnt); 463 mntget(child_mnt);
445 spin_unlock(&vfsmount_lock); 464 br_read_unlock(vfsmount_lock);
446 return child_mnt; 465 return child_mnt;
447} 466}
448 467
@@ -451,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
451 return mnt->mnt_ns == current->nsproxy->mnt_ns; 470 return mnt->mnt_ns == current->nsproxy->mnt_ns;
452} 471}
453 472
473/*
474 * vfsmount lock must be held for write
475 */
454static void touch_mnt_namespace(struct mnt_namespace *ns) 476static void touch_mnt_namespace(struct mnt_namespace *ns)
455{ 477{
456 if (ns) { 478 if (ns) {
@@ -459,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
459 } 481 }
460} 482}
461 483
484/*
485 * vfsmount lock must be held for write
486 */
462static void __touch_mnt_namespace(struct mnt_namespace *ns) 487static void __touch_mnt_namespace(struct mnt_namespace *ns)
463{ 488{
464 if (ns && ns->event != event) { 489 if (ns && ns->event != event) {
@@ -467,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
467 } 492 }
468} 493}
469 494
495/*
496 * vfsmount lock must be held for write
497 */
470static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 498static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
471{ 499{
472 old_path->dentry = mnt->mnt_mountpoint; 500 old_path->dentry = mnt->mnt_mountpoint;
@@ -478,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
478 old_path->dentry->d_mounted--; 506 old_path->dentry->d_mounted--;
479} 507}
480 508
509/*
510 * vfsmount lock must be held for write
511 */
481void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 512void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
482 struct vfsmount *child_mnt) 513 struct vfsmount *child_mnt)
483{ 514{
@@ -486,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
486 dentry->d_mounted++; 517 dentry->d_mounted++;
487} 518}
488 519
520/*
521 * vfsmount lock must be held for write
522 */
489static void attach_mnt(struct vfsmount *mnt, struct path *path) 523static void attach_mnt(struct vfsmount *mnt, struct path *path)
490{ 524{
491 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 525 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -495,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
495} 529}
496 530
497/* 531/*
498 * the caller must hold vfsmount_lock 532 * vfsmount lock must be held for write
499 */ 533 */
500static void commit_tree(struct vfsmount *mnt) 534static void commit_tree(struct vfsmount *mnt)
501{ 535{
@@ -561,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
561 goto out_free; 595 goto out_free;
562 } 596 }
563 597
564 mnt->mnt_flags = old->mnt_flags; 598 mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
565 atomic_inc(&sb->s_active); 599 atomic_inc(&sb->s_active);
566 mnt->mnt_sb = sb; 600 mnt->mnt_sb = sb;
567 mnt->mnt_root = dget(root); 601 mnt->mnt_root = dget(root);
@@ -610,6 +644,7 @@ static inline void __mntput(struct vfsmount *mnt)
610 * provides barriers, so count_mnt_writers() below is safe. AV 644 * provides barriers, so count_mnt_writers() below is safe. AV
611 */ 645 */
612 WARN_ON(count_mnt_writers(mnt)); 646 WARN_ON(count_mnt_writers(mnt));
647 fsnotify_vfsmount_delete(mnt);
613 dput(mnt->mnt_root); 648 dput(mnt->mnt_root);
614 free_vfsmnt(mnt); 649 free_vfsmnt(mnt);
615 deactivate_super(sb); 650 deactivate_super(sb);
@@ -618,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
618void mntput_no_expire(struct vfsmount *mnt) 653void mntput_no_expire(struct vfsmount *mnt)
619{ 654{
620repeat: 655repeat:
621 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { 656 if (atomic_add_unless(&mnt->mnt_count, -1, 1))
622 if (likely(!mnt->mnt_pinned)) { 657 return;
623 spin_unlock(&vfsmount_lock); 658 br_write_lock(vfsmount_lock);
624 __mntput(mnt); 659 if (!atomic_dec_and_test(&mnt->mnt_count)) {
625 return; 660 br_write_unlock(vfsmount_lock);
626 } 661 return;
627 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
628 mnt->mnt_pinned = 0;
629 spin_unlock(&vfsmount_lock);
630 acct_auto_close_mnt(mnt);
631 goto repeat;
632 } 662 }
663 if (likely(!mnt->mnt_pinned)) {
664 br_write_unlock(vfsmount_lock);
665 __mntput(mnt);
666 return;
667 }
668 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
669 mnt->mnt_pinned = 0;
670 br_write_unlock(vfsmount_lock);
671 acct_auto_close_mnt(mnt);
672 goto repeat;
633} 673}
634
635EXPORT_SYMBOL(mntput_no_expire); 674EXPORT_SYMBOL(mntput_no_expire);
636 675
637void mnt_pin(struct vfsmount *mnt) 676void mnt_pin(struct vfsmount *mnt)
638{ 677{
639 spin_lock(&vfsmount_lock); 678 br_write_lock(vfsmount_lock);
640 mnt->mnt_pinned++; 679 mnt->mnt_pinned++;
641 spin_unlock(&vfsmount_lock); 680 br_write_unlock(vfsmount_lock);
642} 681}
643 682
644EXPORT_SYMBOL(mnt_pin); 683EXPORT_SYMBOL(mnt_pin);
645 684
646void mnt_unpin(struct vfsmount *mnt) 685void mnt_unpin(struct vfsmount *mnt)
647{ 686{
648 spin_lock(&vfsmount_lock); 687 br_write_lock(vfsmount_lock);
649 if (mnt->mnt_pinned) { 688 if (mnt->mnt_pinned) {
650 atomic_inc(&mnt->mnt_count); 689 atomic_inc(&mnt->mnt_count);
651 mnt->mnt_pinned--; 690 mnt->mnt_pinned--;
652 } 691 }
653 spin_unlock(&vfsmount_lock); 692 br_write_unlock(vfsmount_lock);
654} 693}
655 694
656EXPORT_SYMBOL(mnt_unpin); 695EXPORT_SYMBOL(mnt_unpin);
@@ -741,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
741 struct mnt_namespace *ns = p->ns; 780 struct mnt_namespace *ns = p->ns;
742 int res = 0; 781 int res = 0;
743 782
744 spin_lock(&vfsmount_lock); 783 br_read_lock(vfsmount_lock);
745 if (p->event != ns->event) { 784 if (p->event != ns->event) {
746 p->event = ns->event; 785 p->event = ns->event;
747 res = 1; 786 res = 1;
748 } 787 }
749 spin_unlock(&vfsmount_lock); 788 br_read_unlock(vfsmount_lock);
750 789
751 return res; 790 return res;
752} 791}
@@ -783,7 +822,6 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
783 { MNT_NOATIME, ",noatime" }, 822 { MNT_NOATIME, ",noatime" },
784 { MNT_NODIRATIME, ",nodiratime" }, 823 { MNT_NODIRATIME, ",nodiratime" },
785 { MNT_RELATIME, ",relatime" }, 824 { MNT_RELATIME, ",relatime" },
786 { MNT_STRICTATIME, ",strictatime" },
787 { 0, NULL } 825 { 0, NULL }
788 }; 826 };
789 const struct proc_fs_info *fs_infop; 827 const struct proc_fs_info *fs_infop;
@@ -948,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
948 int minimum_refs = 0; 986 int minimum_refs = 0;
949 struct vfsmount *p; 987 struct vfsmount *p;
950 988
951 spin_lock(&vfsmount_lock); 989 br_read_lock(vfsmount_lock);
952 for (p = mnt; p; p = next_mnt(p, mnt)) { 990 for (p = mnt; p; p = next_mnt(p, mnt)) {
953 actual_refs += atomic_read(&p->mnt_count); 991 actual_refs += atomic_read(&p->mnt_count);
954 minimum_refs += 2; 992 minimum_refs += 2;
955 } 993 }
956 spin_unlock(&vfsmount_lock); 994 br_read_unlock(vfsmount_lock);
957 995
958 if (actual_refs > minimum_refs) 996 if (actual_refs > minimum_refs)
959 return 0; 997 return 0;
@@ -980,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
980{ 1018{
981 int ret = 1; 1019 int ret = 1;
982 down_read(&namespace_sem); 1020 down_read(&namespace_sem);
983 spin_lock(&vfsmount_lock); 1021 br_read_lock(vfsmount_lock);
984 if (propagate_mount_busy(mnt, 2)) 1022 if (propagate_mount_busy(mnt, 2))
985 ret = 0; 1023 ret = 0;
986 spin_unlock(&vfsmount_lock); 1024 br_read_unlock(vfsmount_lock);
987 up_read(&namespace_sem); 1025 up_read(&namespace_sem);
988 return ret; 1026 return ret;
989} 1027}
@@ -999,13 +1037,14 @@ void release_mounts(struct list_head *head)
999 if (mnt->mnt_parent != mnt) { 1037 if (mnt->mnt_parent != mnt) {
1000 struct dentry *dentry; 1038 struct dentry *dentry;
1001 struct vfsmount *m; 1039 struct vfsmount *m;
1002 spin_lock(&vfsmount_lock); 1040
1041 br_write_lock(vfsmount_lock);
1003 dentry = mnt->mnt_mountpoint; 1042 dentry = mnt->mnt_mountpoint;
1004 m = mnt->mnt_parent; 1043 m = mnt->mnt_parent;
1005 mnt->mnt_mountpoint = mnt->mnt_root; 1044 mnt->mnt_mountpoint = mnt->mnt_root;
1006 mnt->mnt_parent = mnt; 1045 mnt->mnt_parent = mnt;
1007 m->mnt_ghosts--; 1046 m->mnt_ghosts--;
1008 spin_unlock(&vfsmount_lock); 1047 br_write_unlock(vfsmount_lock);
1009 dput(dentry); 1048 dput(dentry);
1010 mntput(m); 1049 mntput(m);
1011 } 1050 }
@@ -1013,6 +1052,10 @@ void release_mounts(struct list_head *head)
1013 } 1052 }
1014} 1053}
1015 1054
1055/*
1056 * vfsmount lock must be held for write
1057 * namespace_sem must be held for write
1058 */
1016void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1059void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1017{ 1060{
1018 struct vfsmount *p; 1061 struct vfsmount *p;
@@ -1103,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1103 } 1146 }
1104 1147
1105 down_write(&namespace_sem); 1148 down_write(&namespace_sem);
1106 spin_lock(&vfsmount_lock); 1149 br_write_lock(vfsmount_lock);
1107 event++; 1150 event++;
1108 1151
1109 if (!(flags & MNT_DETACH)) 1152 if (!(flags & MNT_DETACH))
@@ -1115,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1115 umount_tree(mnt, 1, &umount_list); 1158 umount_tree(mnt, 1, &umount_list);
1116 retval = 0; 1159 retval = 0;
1117 } 1160 }
1118 spin_unlock(&vfsmount_lock); 1161 br_write_unlock(vfsmount_lock);
1119 up_write(&namespace_sem); 1162 up_write(&namespace_sem);
1120 release_mounts(&umount_list); 1163 release_mounts(&umount_list);
1121 return retval; 1164 return retval;
@@ -1227,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1227 q = clone_mnt(p, p->mnt_root, flag); 1270 q = clone_mnt(p, p->mnt_root, flag);
1228 if (!q) 1271 if (!q)
1229 goto Enomem; 1272 goto Enomem;
1230 spin_lock(&vfsmount_lock); 1273 br_write_lock(vfsmount_lock);
1231 list_add_tail(&q->mnt_list, &res->mnt_list); 1274 list_add_tail(&q->mnt_list, &res->mnt_list);
1232 attach_mnt(q, &path); 1275 attach_mnt(q, &path);
1233 spin_unlock(&vfsmount_lock); 1276 br_write_unlock(vfsmount_lock);
1234 } 1277 }
1235 } 1278 }
1236 return res; 1279 return res;
1237Enomem: 1280Enomem:
1238 if (res) { 1281 if (res) {
1239 LIST_HEAD(umount_list); 1282 LIST_HEAD(umount_list);
1240 spin_lock(&vfsmount_lock); 1283 br_write_lock(vfsmount_lock);
1241 umount_tree(res, 0, &umount_list); 1284 umount_tree(res, 0, &umount_list);
1242 spin_unlock(&vfsmount_lock); 1285 br_write_unlock(vfsmount_lock);
1243 release_mounts(&umount_list); 1286 release_mounts(&umount_list);
1244 } 1287 }
1245 return NULL; 1288 return NULL;
@@ -1258,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
1258{ 1301{
1259 LIST_HEAD(umount_list); 1302 LIST_HEAD(umount_list);
1260 down_write(&namespace_sem); 1303 down_write(&namespace_sem);
1261 spin_lock(&vfsmount_lock); 1304 br_write_lock(vfsmount_lock);
1262 umount_tree(mnt, 0, &umount_list); 1305 umount_tree(mnt, 0, &umount_list);
1263 spin_unlock(&vfsmount_lock); 1306 br_write_unlock(vfsmount_lock);
1264 up_write(&namespace_sem); 1307 up_write(&namespace_sem);
1265 release_mounts(&umount_list); 1308 release_mounts(&umount_list);
1266} 1309}
@@ -1388,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1388 if (err) 1431 if (err)
1389 goto out_cleanup_ids; 1432 goto out_cleanup_ids;
1390 1433
1391 spin_lock(&vfsmount_lock); 1434 br_write_lock(vfsmount_lock);
1392 1435
1393 if (IS_MNT_SHARED(dest_mnt)) { 1436 if (IS_MNT_SHARED(dest_mnt)) {
1394 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1437 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1407,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1407 list_del_init(&child->mnt_hash); 1450 list_del_init(&child->mnt_hash);
1408 commit_tree(child); 1451 commit_tree(child);
1409 } 1452 }
1410 spin_unlock(&vfsmount_lock); 1453 br_write_unlock(vfsmount_lock);
1454
1411 return 0; 1455 return 0;
1412 1456
1413 out_cleanup_ids: 1457 out_cleanup_ids:
@@ -1440,13 +1484,30 @@ out_unlock:
1440} 1484}
1441 1485
1442/* 1486/*
1487 * Sanity check the flags to change_mnt_propagation.
1488 */
1489
1490static int flags_to_propagation_type(int flags)
1491{
1492 int type = flags & ~MS_REC;
1493
1494 /* Fail if any non-propagation flags are set */
1495 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1496 return 0;
1497 /* Only one propagation flag should be set */
1498 if (!is_power_of_2(type))
1499 return 0;
1500 return type;
1501}
1502
1503/*
1443 * recursively change the type of the mountpoint. 1504 * recursively change the type of the mountpoint.
1444 */ 1505 */
1445static int do_change_type(struct path *path, int flag) 1506static int do_change_type(struct path *path, int flag)
1446{ 1507{
1447 struct vfsmount *m, *mnt = path->mnt; 1508 struct vfsmount *m, *mnt = path->mnt;
1448 int recurse = flag & MS_REC; 1509 int recurse = flag & MS_REC;
1449 int type = flag & ~MS_REC; 1510 int type;
1450 int err = 0; 1511 int err = 0;
1451 1512
1452 if (!capable(CAP_SYS_ADMIN)) 1513 if (!capable(CAP_SYS_ADMIN))
@@ -1455,6 +1516,10 @@ static int do_change_type(struct path *path, int flag)
1455 if (path->dentry != path->mnt->mnt_root) 1516 if (path->dentry != path->mnt->mnt_root)
1456 return -EINVAL; 1517 return -EINVAL;
1457 1518
1519 type = flags_to_propagation_type(flag);
1520 if (!type)
1521 return -EINVAL;
1522
1458 down_write(&namespace_sem); 1523 down_write(&namespace_sem);
1459 if (type == MS_SHARED) { 1524 if (type == MS_SHARED) {
1460 err = invent_group_ids(mnt, recurse); 1525 err = invent_group_ids(mnt, recurse);
@@ -1462,10 +1527,10 @@ static int do_change_type(struct path *path, int flag)
1462 goto out_unlock; 1527 goto out_unlock;
1463 } 1528 }
1464 1529
1465 spin_lock(&vfsmount_lock); 1530 br_write_lock(vfsmount_lock);
1466 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1531 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1467 change_mnt_propagation(m, type); 1532 change_mnt_propagation(m, type);
1468 spin_unlock(&vfsmount_lock); 1533 br_write_unlock(vfsmount_lock);
1469 1534
1470 out_unlock: 1535 out_unlock:
1471 up_write(&namespace_sem); 1536 up_write(&namespace_sem);
@@ -1509,9 +1574,10 @@ static int do_loopback(struct path *path, char *old_name,
1509 err = graft_tree(mnt, path); 1574 err = graft_tree(mnt, path);
1510 if (err) { 1575 if (err) {
1511 LIST_HEAD(umount_list); 1576 LIST_HEAD(umount_list);
1512 spin_lock(&vfsmount_lock); 1577
1578 br_write_lock(vfsmount_lock);
1513 umount_tree(mnt, 0, &umount_list); 1579 umount_tree(mnt, 0, &umount_list);
1514 spin_unlock(&vfsmount_lock); 1580 br_write_unlock(vfsmount_lock);
1515 release_mounts(&umount_list); 1581 release_mounts(&umount_list);
1516 } 1582 }
1517 1583
@@ -1564,16 +1630,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1564 else 1630 else
1565 err = do_remount_sb(sb, flags, data, 0); 1631 err = do_remount_sb(sb, flags, data, 0);
1566 if (!err) { 1632 if (!err) {
1567 spin_lock(&vfsmount_lock); 1633 br_write_lock(vfsmount_lock);
1568 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1634 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1569 path->mnt->mnt_flags = mnt_flags; 1635 path->mnt->mnt_flags = mnt_flags;
1570 spin_unlock(&vfsmount_lock); 1636 br_write_unlock(vfsmount_lock);
1571 } 1637 }
1572 up_write(&sb->s_umount); 1638 up_write(&sb->s_umount);
1573 if (!err) { 1639 if (!err) {
1574 spin_lock(&vfsmount_lock); 1640 br_write_lock(vfsmount_lock);
1575 touch_mnt_namespace(path->mnt->mnt_ns); 1641 touch_mnt_namespace(path->mnt->mnt_ns);
1576 spin_unlock(&vfsmount_lock); 1642 br_write_unlock(vfsmount_lock);
1577 } 1643 }
1578 return err; 1644 return err;
1579} 1645}
@@ -1678,9 +1744,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
1678 if (!capable(CAP_SYS_ADMIN)) 1744 if (!capable(CAP_SYS_ADMIN))
1679 return -EPERM; 1745 return -EPERM;
1680 1746
1681 lock_kernel();
1682 mnt = do_kern_mount(type, flags, name, data); 1747 mnt = do_kern_mount(type, flags, name, data);
1683 unlock_kernel();
1684 if (IS_ERR(mnt)) 1748 if (IS_ERR(mnt))
1685 return PTR_ERR(mnt); 1749 return PTR_ERR(mnt);
1686 1750
@@ -1750,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1750 return; 1814 return;
1751 1815
1752 down_write(&namespace_sem); 1816 down_write(&namespace_sem);
1753 spin_lock(&vfsmount_lock); 1817 br_write_lock(vfsmount_lock);
1754 1818
1755 /* extract from the expiration list every vfsmount that matches the 1819 /* extract from the expiration list every vfsmount that matches the
1756 * following criteria: 1820 * following criteria:
@@ -1769,7 +1833,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1769 touch_mnt_namespace(mnt->mnt_ns); 1833 touch_mnt_namespace(mnt->mnt_ns);
1770 umount_tree(mnt, 1, &umounts); 1834 umount_tree(mnt, 1, &umounts);
1771 } 1835 }
1772 spin_unlock(&vfsmount_lock); 1836 br_write_unlock(vfsmount_lock);
1773 up_write(&namespace_sem); 1837 up_write(&namespace_sem);
1774 1838
1775 release_mounts(&umounts); 1839 release_mounts(&umounts);
@@ -1826,6 +1890,8 @@ resume:
1826/* 1890/*
1827 * process a list of expirable mountpoints with the intent of discarding any 1891 * process a list of expirable mountpoints with the intent of discarding any
1828 * submounts of a specific parent mountpoint 1892 * submounts of a specific parent mountpoint
1893 *
1894 * vfsmount_lock must be held for write
1829 */ 1895 */
1830static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 1896static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1831{ 1897{
@@ -1984,7 +2050,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1984 if (flags & MS_RDONLY) 2050 if (flags & MS_RDONLY)
1985 mnt_flags |= MNT_READONLY; 2051 mnt_flags |= MNT_READONLY;
1986 2052
1987 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 2053 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
1988 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 2054 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1989 MS_STRICTATIME); 2055 MS_STRICTATIME);
1990 2056
@@ -2044,9 +2110,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2044 kfree(new_ns); 2110 kfree(new_ns);
2045 return ERR_PTR(-ENOMEM); 2111 return ERR_PTR(-ENOMEM);
2046 } 2112 }
2047 spin_lock(&vfsmount_lock); 2113 br_write_lock(vfsmount_lock);
2048 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2114 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2049 spin_unlock(&vfsmount_lock); 2115 br_write_unlock(vfsmount_lock);
2050 2116
2051 /* 2117 /*
2052 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2118 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2208,10 +2274,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2208 goto out1; 2274 goto out1;
2209 } 2275 }
2210 2276
2211 read_lock(&current->fs->lock); 2277 get_fs_root(current->fs, &root);
2212 root = current->fs->root;
2213 path_get(&current->fs->root);
2214 read_unlock(&current->fs->lock);
2215 down_write(&namespace_sem); 2278 down_write(&namespace_sem);
2216 mutex_lock(&old.dentry->d_inode->i_mutex); 2279 mutex_lock(&old.dentry->d_inode->i_mutex);
2217 error = -EINVAL; 2280 error = -EINVAL;
@@ -2243,7 +2306,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2243 goto out2; /* not attached */ 2306 goto out2; /* not attached */
2244 /* make sure we can reach put_old from new_root */ 2307 /* make sure we can reach put_old from new_root */
2245 tmp = old.mnt; 2308 tmp = old.mnt;
2246 spin_lock(&vfsmount_lock); 2309 br_write_lock(vfsmount_lock);
2247 if (tmp != new.mnt) { 2310 if (tmp != new.mnt) {
2248 for (;;) { 2311 for (;;) {
2249 if (tmp->mnt_parent == tmp) 2312 if (tmp->mnt_parent == tmp)
@@ -2263,7 +2326,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2263 /* mount new_root on / */ 2326 /* mount new_root on / */
2264 attach_mnt(new.mnt, &root_parent); 2327 attach_mnt(new.mnt, &root_parent);
2265 touch_mnt_namespace(current->nsproxy->mnt_ns); 2328 touch_mnt_namespace(current->nsproxy->mnt_ns);
2266 spin_unlock(&vfsmount_lock); 2329 br_write_unlock(vfsmount_lock);
2267 chroot_fs_refs(&root, &new); 2330 chroot_fs_refs(&root, &new);
2268 error = 0; 2331 error = 0;
2269 path_put(&root_parent); 2332 path_put(&root_parent);
@@ -2278,7 +2341,7 @@ out1:
2278out0: 2341out0:
2279 return error; 2342 return error;
2280out3: 2343out3:
2281 spin_unlock(&vfsmount_lock); 2344 br_write_unlock(vfsmount_lock);
2282 goto out2; 2345 goto out2;
2283} 2346}
2284 2347
@@ -2325,6 +2388,8 @@ void __init mnt_init(void)
2325 for (u = 0; u < HASH_SIZE; u++) 2388 for (u = 0; u < HASH_SIZE; u++)
2326 INIT_LIST_HEAD(&mount_hashtable[u]); 2389 INIT_LIST_HEAD(&mount_hashtable[u]);
2327 2390
2391 br_lock_init(vfsmount_lock);
2392
2328 err = sysfs_init(); 2393 err = sysfs_init();
2329 if (err) 2394 if (err)
2330 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2395 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2343,9 +2408,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
2343 if (!atomic_dec_and_test(&ns->count)) 2408 if (!atomic_dec_and_test(&ns->count))
2344 return; 2409 return;
2345 down_write(&namespace_sem); 2410 down_write(&namespace_sem);
2346 spin_lock(&vfsmount_lock); 2411 br_write_lock(vfsmount_lock);
2347 umount_tree(ns->root, 0, &umount_list); 2412 umount_tree(ns->root, 0, &umount_list);
2348 spin_unlock(&vfsmount_lock); 2413 br_write_unlock(vfsmount_lock);
2349 up_write(&namespace_sem); 2414 up_write(&namespace_sem);
2350 release_mounts(&umount_list); 2415 release_mounts(&umount_list);
2351 kfree(ns); 2416 kfree(ns);