aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namei.c7
-rw-r--r--fs/namespace.c177
-rw-r--r--fs/pnode.c11
5 files changed, 134 insertions, 77 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index d56a40b5a577..83293be48149 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1935,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root,
1935 bool slash = false; 1935 bool slash = false;
1936 int error = 0; 1936 int error = 0;
1937 1937
1938 spin_lock(&vfsmount_lock); 1938 br_read_lock(vfsmount_lock);
1939 while (dentry != root->dentry || vfsmnt != root->mnt) { 1939 while (dentry != root->dentry || vfsmnt != root->mnt) {
1940 struct dentry * parent; 1940 struct dentry * parent;
1941 1941
@@ -1964,7 +1964,7 @@ out:
1964 if (!error && !slash) 1964 if (!error && !slash)
1965 error = prepend(buffer, buflen, "/", 1); 1965 error = prepend(buffer, buflen, "/", 1);
1966 1966
1967 spin_unlock(&vfsmount_lock); 1967 br_read_unlock(vfsmount_lock);
1968 return error; 1968 return error;
1969 1969
1970global_root: 1970global_root:
@@ -2302,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2)
2302 struct vfsmount *mnt = path1->mnt; 2302 struct vfsmount *mnt = path1->mnt;
2303 struct dentry *dentry = path1->dentry; 2303 struct dentry *dentry = path1->dentry;
2304 int res; 2304 int res;
2305 spin_lock(&vfsmount_lock); 2305
2306 br_read_lock(vfsmount_lock);
2306 if (mnt != path2->mnt) { 2307 if (mnt != path2->mnt) {
2307 for (;;) { 2308 for (;;) {
2308 if (mnt->mnt_parent == mnt) { 2309 if (mnt->mnt_parent == mnt) {
2309 spin_unlock(&vfsmount_lock); 2310 br_read_unlock(vfsmount_lock);
2310 return 0; 2311 return 0;
2311 } 2312 }
2312 if (mnt->mnt_parent == path2->mnt) 2313 if (mnt->mnt_parent == path2->mnt)
@@ -2316,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2)
2316 dentry = mnt->mnt_mountpoint; 2317 dentry = mnt->mnt_mountpoint;
2317 } 2318 }
2318 res = is_subdir(dentry, path2->dentry); 2319 res = is_subdir(dentry, path2->dentry);
2319 spin_unlock(&vfsmount_lock); 2320 br_read_unlock(vfsmount_lock);
2320 return res; 2321 return res;
2321} 2322}
2322EXPORT_SYMBOL(path_is_under); 2323EXPORT_SYMBOL(path_is_under);
diff --git a/fs/internal.h b/fs/internal.h
index 6a5c13a80660..a6910e91cee8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/lglock.h>
13
12struct super_block; 14struct super_block;
13struct linux_binprm; 15struct linux_binprm;
14struct path; 16struct path;
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
70 72
71extern void __init mnt_init(void); 73extern void __init mnt_init(void);
72 74
73extern spinlock_t vfsmount_lock; 75DECLARE_BRLOCK(vfsmount_lock);
76
74 77
75/* 78/*
76 * fs_struct.c 79 * fs_struct.c
diff --git a/fs/namei.c b/fs/namei.c
index 11de7c39ff76..24896e833565 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -595,15 +595,16 @@ int follow_up(struct path *path)
595{ 595{
596 struct vfsmount *parent; 596 struct vfsmount *parent;
597 struct dentry *mountpoint; 597 struct dentry *mountpoint;
598 spin_lock(&vfsmount_lock); 598
599 br_read_lock(vfsmount_lock);
599 parent = path->mnt->mnt_parent; 600 parent = path->mnt->mnt_parent;
600 if (parent == path->mnt) { 601 if (parent == path->mnt) {
601 spin_unlock(&vfsmount_lock); 602 br_read_unlock(vfsmount_lock);
602 return 0; 603 return 0;
603 } 604 }
604 mntget(parent); 605 mntget(parent);
605 mountpoint = dget(path->mnt->mnt_mountpoint); 606 mountpoint = dget(path->mnt->mnt_mountpoint);
606 spin_unlock(&vfsmount_lock); 607 br_read_unlock(vfsmount_lock);
607 dput(path->dentry); 608 dput(path->dentry);
608 path->dentry = mountpoint; 609 path->dentry = mountpoint;
609 mntput(path->mnt); 610 mntput(path->mnt);
diff --git a/fs/namespace.c b/fs/namespace.c
index 2e10cb19c5b0..de402eb6eafb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
14#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
15#include <linux/init.h> 17#include <linux/init.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -38,12 +40,10 @@
38#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 40#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
39#define HASH_SIZE (1UL << HASH_SHIFT) 41#define HASH_SIZE (1UL << HASH_SHIFT)
40 42
41/* spinlock for vfsmount related operations, inplace of dcache_lock */
42__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
43
44static int event; 43static int event;
45static DEFINE_IDA(mnt_id_ida); 44static DEFINE_IDA(mnt_id_ida);
46static DEFINE_IDA(mnt_group_ida); 45static DEFINE_IDA(mnt_group_ida);
46static DEFINE_SPINLOCK(mnt_id_lock);
47static int mnt_id_start = 0; 47static int mnt_id_start = 0;
48static int mnt_group_start = 1; 48static int mnt_group_start = 1;
49 49
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem;
55struct kobject *fs_kobj; 55struct kobject *fs_kobj;
56EXPORT_SYMBOL_GPL(fs_kobj); 56EXPORT_SYMBOL_GPL(fs_kobj);
57 57
58/*
59 * vfsmount lock may be taken for read to prevent changes to the
60 * vfsmount hash, ie. during mountpoint lookups or walking back
61 * up the tree.
62 *
63 * It should be taken for write in all cases where the vfsmount
64 * tree or hash is modified or when a vfsmount structure is modified.
65 */
66DEFINE_BRLOCK(vfsmount_lock);
67
58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 68static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
59{ 69{
60 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 70 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
65 75
66#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 76#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
67 77
68/* allocation is serialized by namespace_sem */ 78/*
79 * allocation is serialized by namespace_sem, but we need the spinlock to
80 * serialize with freeing.
81 */
69static int mnt_alloc_id(struct vfsmount *mnt) 82static int mnt_alloc_id(struct vfsmount *mnt)
70{ 83{
71 int res; 84 int res;
72 85
73retry: 86retry:
74 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 87 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
75 spin_lock(&vfsmount_lock); 88 spin_lock(&mnt_id_lock);
76 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); 89 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
77 if (!res) 90 if (!res)
78 mnt_id_start = mnt->mnt_id + 1; 91 mnt_id_start = mnt->mnt_id + 1;
79 spin_unlock(&vfsmount_lock); 92 spin_unlock(&mnt_id_lock);
80 if (res == -EAGAIN) 93 if (res == -EAGAIN)
81 goto retry; 94 goto retry;
82 95
@@ -86,11 +99,11 @@ retry:
86static void mnt_free_id(struct vfsmount *mnt) 99static void mnt_free_id(struct vfsmount *mnt)
87{ 100{
88 int id = mnt->mnt_id; 101 int id = mnt->mnt_id;
89 spin_lock(&vfsmount_lock); 102 spin_lock(&mnt_id_lock);
90 ida_remove(&mnt_id_ida, id); 103 ida_remove(&mnt_id_ida, id);
91 if (mnt_id_start > id) 104 if (mnt_id_start > id)
92 mnt_id_start = id; 105 mnt_id_start = id;
93 spin_unlock(&vfsmount_lock); 106 spin_unlock(&mnt_id_lock);
94} 107}
95 108
96/* 109/*
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
348{ 361{
349 int ret = 0; 362 int ret = 0;
350 363
351 spin_lock(&vfsmount_lock); 364 br_write_lock(vfsmount_lock);
352 mnt->mnt_flags |= MNT_WRITE_HOLD; 365 mnt->mnt_flags |= MNT_WRITE_HOLD;
353 /* 366 /*
354 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 367 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
382 */ 395 */
383 smp_wmb(); 396 smp_wmb();
384 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 397 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
385 spin_unlock(&vfsmount_lock); 398 br_write_unlock(vfsmount_lock);
386 return ret; 399 return ret;
387} 400}
388 401
389static void __mnt_unmake_readonly(struct vfsmount *mnt) 402static void __mnt_unmake_readonly(struct vfsmount *mnt)
390{ 403{
391 spin_lock(&vfsmount_lock); 404 br_write_lock(vfsmount_lock);
392 mnt->mnt_flags &= ~MNT_READONLY; 405 mnt->mnt_flags &= ~MNT_READONLY;
393 spin_unlock(&vfsmount_lock); 406 br_write_unlock(vfsmount_lock);
394} 407}
395 408
396void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 409void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
414/* 427/*
415 * find the first or last mount at @dentry on vfsmount @mnt depending on 428 * find the first or last mount at @dentry on vfsmount @mnt depending on
416 * @dir. If @dir is set return the first mount else return the last mount. 429 * @dir. If @dir is set return the first mount else return the last mount.
430 * vfsmount_lock must be held for read or write.
417 */ 431 */
418struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 432struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
419 int dir) 433 int dir)
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
443struct vfsmount *lookup_mnt(struct path *path) 457struct vfsmount *lookup_mnt(struct path *path)
444{ 458{
445 struct vfsmount *child_mnt; 459 struct vfsmount *child_mnt;
446 spin_lock(&vfsmount_lock); 460
461 br_read_lock(vfsmount_lock);
447 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 462 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
448 mntget(child_mnt); 463 mntget(child_mnt);
449 spin_unlock(&vfsmount_lock); 464 br_read_unlock(vfsmount_lock);
450 return child_mnt; 465 return child_mnt;
451} 466}
452 467
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
455 return mnt->mnt_ns == current->nsproxy->mnt_ns; 470 return mnt->mnt_ns == current->nsproxy->mnt_ns;
456} 471}
457 472
473/*
474 * vfsmount lock must be held for write
475 */
458static void touch_mnt_namespace(struct mnt_namespace *ns) 476static void touch_mnt_namespace(struct mnt_namespace *ns)
459{ 477{
460 if (ns) { 478 if (ns) {
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
463 } 481 }
464} 482}
465 483
484/*
485 * vfsmount lock must be held for write
486 */
466static void __touch_mnt_namespace(struct mnt_namespace *ns) 487static void __touch_mnt_namespace(struct mnt_namespace *ns)
467{ 488{
468 if (ns && ns->event != event) { 489 if (ns && ns->event != event) {
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
471 } 492 }
472} 493}
473 494
495/*
496 * vfsmount lock must be held for write
497 */
474static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 498static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
475{ 499{
476 old_path->dentry = mnt->mnt_mountpoint; 500 old_path->dentry = mnt->mnt_mountpoint;
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
482 old_path->dentry->d_mounted--; 506 old_path->dentry->d_mounted--;
483} 507}
484 508
509/*
510 * vfsmount lock must be held for write
511 */
485void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 512void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
486 struct vfsmount *child_mnt) 513 struct vfsmount *child_mnt)
487{ 514{
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
490 dentry->d_mounted++; 517 dentry->d_mounted++;
491} 518}
492 519
520/*
521 * vfsmount lock must be held for write
522 */
493static void attach_mnt(struct vfsmount *mnt, struct path *path) 523static void attach_mnt(struct vfsmount *mnt, struct path *path)
494{ 524{
495 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 525 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
499} 529}
500 530
501/* 531/*
502 * the caller must hold vfsmount_lock 532 * vfsmount lock must be held for write
503 */ 533 */
504static void commit_tree(struct vfsmount *mnt) 534static void commit_tree(struct vfsmount *mnt)
505{ 535{
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
623void mntput_no_expire(struct vfsmount *mnt) 653void mntput_no_expire(struct vfsmount *mnt)
624{ 654{
625repeat: 655repeat:
626 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { 656 if (atomic_add_unless(&mnt->mnt_count, -1, 1))
627 if (likely(!mnt->mnt_pinned)) { 657 return;
628 spin_unlock(&vfsmount_lock); 658 br_write_lock(vfsmount_lock);
629 __mntput(mnt); 659 if (!atomic_dec_and_test(&mnt->mnt_count)) {
630 return; 660 br_write_unlock(vfsmount_lock);
631 } 661 return;
632 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); 662 }
633 mnt->mnt_pinned = 0; 663 if (likely(!mnt->mnt_pinned)) {
634 spin_unlock(&vfsmount_lock); 664 br_write_unlock(vfsmount_lock);
635 acct_auto_close_mnt(mnt); 665 __mntput(mnt);
636 goto repeat; 666 return;
637 } 667 }
668 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
669 mnt->mnt_pinned = 0;
670 br_write_unlock(vfsmount_lock);
671 acct_auto_close_mnt(mnt);
672 goto repeat;
638} 673}
639
640EXPORT_SYMBOL(mntput_no_expire); 674EXPORT_SYMBOL(mntput_no_expire);
641 675
642void mnt_pin(struct vfsmount *mnt) 676void mnt_pin(struct vfsmount *mnt)
643{ 677{
644 spin_lock(&vfsmount_lock); 678 br_write_lock(vfsmount_lock);
645 mnt->mnt_pinned++; 679 mnt->mnt_pinned++;
646 spin_unlock(&vfsmount_lock); 680 br_write_unlock(vfsmount_lock);
647} 681}
648 682
649EXPORT_SYMBOL(mnt_pin); 683EXPORT_SYMBOL(mnt_pin);
650 684
651void mnt_unpin(struct vfsmount *mnt) 685void mnt_unpin(struct vfsmount *mnt)
652{ 686{
653 spin_lock(&vfsmount_lock); 687 br_write_lock(vfsmount_lock);
654 if (mnt->mnt_pinned) { 688 if (mnt->mnt_pinned) {
655 atomic_inc(&mnt->mnt_count); 689 atomic_inc(&mnt->mnt_count);
656 mnt->mnt_pinned--; 690 mnt->mnt_pinned--;
657 } 691 }
658 spin_unlock(&vfsmount_lock); 692 br_write_unlock(vfsmount_lock);
659} 693}
660 694
661EXPORT_SYMBOL(mnt_unpin); 695EXPORT_SYMBOL(mnt_unpin);
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
746 struct mnt_namespace *ns = p->ns; 780 struct mnt_namespace *ns = p->ns;
747 int res = 0; 781 int res = 0;
748 782
749 spin_lock(&vfsmount_lock); 783 br_read_lock(vfsmount_lock);
750 if (p->event != ns->event) { 784 if (p->event != ns->event) {
751 p->event = ns->event; 785 p->event = ns->event;
752 res = 1; 786 res = 1;
753 } 787 }
754 spin_unlock(&vfsmount_lock); 788 br_read_unlock(vfsmount_lock);
755 789
756 return res; 790 return res;
757} 791}
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
952 int minimum_refs = 0; 986 int minimum_refs = 0;
953 struct vfsmount *p; 987 struct vfsmount *p;
954 988
955 spin_lock(&vfsmount_lock); 989 br_read_lock(vfsmount_lock);
956 for (p = mnt; p; p = next_mnt(p, mnt)) { 990 for (p = mnt; p; p = next_mnt(p, mnt)) {
957 actual_refs += atomic_read(&p->mnt_count); 991 actual_refs += atomic_read(&p->mnt_count);
958 minimum_refs += 2; 992 minimum_refs += 2;
959 } 993 }
960 spin_unlock(&vfsmount_lock); 994 br_read_unlock(vfsmount_lock);
961 995
962 if (actual_refs > minimum_refs) 996 if (actual_refs > minimum_refs)
963 return 0; 997 return 0;
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
984{ 1018{
985 int ret = 1; 1019 int ret = 1;
986 down_read(&namespace_sem); 1020 down_read(&namespace_sem);
987 spin_lock(&vfsmount_lock); 1021 br_read_lock(vfsmount_lock);
988 if (propagate_mount_busy(mnt, 2)) 1022 if (propagate_mount_busy(mnt, 2))
989 ret = 0; 1023 ret = 0;
990 spin_unlock(&vfsmount_lock); 1024 br_read_unlock(vfsmount_lock);
991 up_read(&namespace_sem); 1025 up_read(&namespace_sem);
992 return ret; 1026 return ret;
993} 1027}
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head)
1003 if (mnt->mnt_parent != mnt) { 1037 if (mnt->mnt_parent != mnt) {
1004 struct dentry *dentry; 1038 struct dentry *dentry;
1005 struct vfsmount *m; 1039 struct vfsmount *m;
1006 spin_lock(&vfsmount_lock); 1040
1041 br_write_lock(vfsmount_lock);
1007 dentry = mnt->mnt_mountpoint; 1042 dentry = mnt->mnt_mountpoint;
1008 m = mnt->mnt_parent; 1043 m = mnt->mnt_parent;
1009 mnt->mnt_mountpoint = mnt->mnt_root; 1044 mnt->mnt_mountpoint = mnt->mnt_root;
1010 mnt->mnt_parent = mnt; 1045 mnt->mnt_parent = mnt;
1011 m->mnt_ghosts--; 1046 m->mnt_ghosts--;
1012 spin_unlock(&vfsmount_lock); 1047 br_write_unlock(vfsmount_lock);
1013 dput(dentry); 1048 dput(dentry);
1014 mntput(m); 1049 mntput(m);
1015 } 1050 }
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head)
1017 } 1052 }
1018} 1053}
1019 1054
1055/*
1056 * vfsmount lock must be held for write
1057 * namespace_sem must be held for write
1058 */
1020void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1059void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1021{ 1060{
1022 struct vfsmount *p; 1061 struct vfsmount *p;
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1107 } 1146 }
1108 1147
1109 down_write(&namespace_sem); 1148 down_write(&namespace_sem);
1110 spin_lock(&vfsmount_lock); 1149 br_write_lock(vfsmount_lock);
1111 event++; 1150 event++;
1112 1151
1113 if (!(flags & MNT_DETACH)) 1152 if (!(flags & MNT_DETACH))
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1119 umount_tree(mnt, 1, &umount_list); 1158 umount_tree(mnt, 1, &umount_list);
1120 retval = 0; 1159 retval = 0;
1121 } 1160 }
1122 spin_unlock(&vfsmount_lock); 1161 br_write_unlock(vfsmount_lock);
1123 up_write(&namespace_sem); 1162 up_write(&namespace_sem);
1124 release_mounts(&umount_list); 1163 release_mounts(&umount_list);
1125 return retval; 1164 return retval;
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1231 q = clone_mnt(p, p->mnt_root, flag); 1270 q = clone_mnt(p, p->mnt_root, flag);
1232 if (!q) 1271 if (!q)
1233 goto Enomem; 1272 goto Enomem;
1234 spin_lock(&vfsmount_lock); 1273 br_write_lock(vfsmount_lock);
1235 list_add_tail(&q->mnt_list, &res->mnt_list); 1274 list_add_tail(&q->mnt_list, &res->mnt_list);
1236 attach_mnt(q, &path); 1275 attach_mnt(q, &path);
1237 spin_unlock(&vfsmount_lock); 1276 br_write_unlock(vfsmount_lock);
1238 } 1277 }
1239 } 1278 }
1240 return res; 1279 return res;
1241Enomem: 1280Enomem:
1242 if (res) { 1281 if (res) {
1243 LIST_HEAD(umount_list); 1282 LIST_HEAD(umount_list);
1244 spin_lock(&vfsmount_lock); 1283 br_write_lock(vfsmount_lock);
1245 umount_tree(res, 0, &umount_list); 1284 umount_tree(res, 0, &umount_list);
1246 spin_unlock(&vfsmount_lock); 1285 br_write_unlock(vfsmount_lock);
1247 release_mounts(&umount_list); 1286 release_mounts(&umount_list);
1248 } 1287 }
1249 return NULL; 1288 return NULL;
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
1262{ 1301{
1263 LIST_HEAD(umount_list); 1302 LIST_HEAD(umount_list);
1264 down_write(&namespace_sem); 1303 down_write(&namespace_sem);
1265 spin_lock(&vfsmount_lock); 1304 br_write_lock(vfsmount_lock);
1266 umount_tree(mnt, 0, &umount_list); 1305 umount_tree(mnt, 0, &umount_list);
1267 spin_unlock(&vfsmount_lock); 1306 br_write_unlock(vfsmount_lock);
1268 up_write(&namespace_sem); 1307 up_write(&namespace_sem);
1269 release_mounts(&umount_list); 1308 release_mounts(&umount_list);
1270} 1309}
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1392 if (err) 1431 if (err)
1393 goto out_cleanup_ids; 1432 goto out_cleanup_ids;
1394 1433
1395 spin_lock(&vfsmount_lock); 1434 br_write_lock(vfsmount_lock);
1396 1435
1397 if (IS_MNT_SHARED(dest_mnt)) { 1436 if (IS_MNT_SHARED(dest_mnt)) {
1398 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1437 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1411 list_del_init(&child->mnt_hash); 1450 list_del_init(&child->mnt_hash);
1412 commit_tree(child); 1451 commit_tree(child);
1413 } 1452 }
1414 spin_unlock(&vfsmount_lock); 1453 br_write_unlock(vfsmount_lock);
1454
1415 return 0; 1455 return 0;
1416 1456
1417 out_cleanup_ids: 1457 out_cleanup_ids:
@@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag)
1466 goto out_unlock; 1506 goto out_unlock;
1467 } 1507 }
1468 1508
1469 spin_lock(&vfsmount_lock); 1509 br_write_lock(vfsmount_lock);
1470 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1510 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1471 change_mnt_propagation(m, type); 1511 change_mnt_propagation(m, type);
1472 spin_unlock(&vfsmount_lock); 1512 br_write_unlock(vfsmount_lock);
1473 1513
1474 out_unlock: 1514 out_unlock:
1475 up_write(&namespace_sem); 1515 up_write(&namespace_sem);
@@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name,
1513 err = graft_tree(mnt, path); 1553 err = graft_tree(mnt, path);
1514 if (err) { 1554 if (err) {
1515 LIST_HEAD(umount_list); 1555 LIST_HEAD(umount_list);
1516 spin_lock(&vfsmount_lock); 1556
1557 br_write_lock(vfsmount_lock);
1517 umount_tree(mnt, 0, &umount_list); 1558 umount_tree(mnt, 0, &umount_list);
1518 spin_unlock(&vfsmount_lock); 1559 br_write_unlock(vfsmount_lock);
1519 release_mounts(&umount_list); 1560 release_mounts(&umount_list);
1520 } 1561 }
1521 1562
@@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1568 else 1609 else
1569 err = do_remount_sb(sb, flags, data, 0); 1610 err = do_remount_sb(sb, flags, data, 0);
1570 if (!err) { 1611 if (!err) {
1571 spin_lock(&vfsmount_lock); 1612 br_write_lock(vfsmount_lock);
1572 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1613 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1573 path->mnt->mnt_flags = mnt_flags; 1614 path->mnt->mnt_flags = mnt_flags;
1574 spin_unlock(&vfsmount_lock); 1615 br_write_unlock(vfsmount_lock);
1575 } 1616 }
1576 up_write(&sb->s_umount); 1617 up_write(&sb->s_umount);
1577 if (!err) { 1618 if (!err) {
1578 spin_lock(&vfsmount_lock); 1619 br_write_lock(vfsmount_lock);
1579 touch_mnt_namespace(path->mnt->mnt_ns); 1620 touch_mnt_namespace(path->mnt->mnt_ns);
1580 spin_unlock(&vfsmount_lock); 1621 br_write_unlock(vfsmount_lock);
1581 } 1622 }
1582 return err; 1623 return err;
1583} 1624}
@@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1754 return; 1795 return;
1755 1796
1756 down_write(&namespace_sem); 1797 down_write(&namespace_sem);
1757 spin_lock(&vfsmount_lock); 1798 br_write_lock(vfsmount_lock);
1758 1799
1759 /* extract from the expiration list every vfsmount that matches the 1800 /* extract from the expiration list every vfsmount that matches the
1760 * following criteria: 1801 * following criteria:
@@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1773 touch_mnt_namespace(mnt->mnt_ns); 1814 touch_mnt_namespace(mnt->mnt_ns);
1774 umount_tree(mnt, 1, &umounts); 1815 umount_tree(mnt, 1, &umounts);
1775 } 1816 }
1776 spin_unlock(&vfsmount_lock); 1817 br_write_unlock(vfsmount_lock);
1777 up_write(&namespace_sem); 1818 up_write(&namespace_sem);
1778 1819
1779 release_mounts(&umounts); 1820 release_mounts(&umounts);
@@ -1830,6 +1871,8 @@ resume:
1830/* 1871/*
1831 * process a list of expirable mountpoints with the intent of discarding any 1872 * process a list of expirable mountpoints with the intent of discarding any
1832 * submounts of a specific parent mountpoint 1873 * submounts of a specific parent mountpoint
1874 *
1875 * vfsmount_lock must be held for write
1833 */ 1876 */
1834static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 1877static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1835{ 1878{
@@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2048 kfree(new_ns); 2091 kfree(new_ns);
2049 return ERR_PTR(-ENOMEM); 2092 return ERR_PTR(-ENOMEM);
2050 } 2093 }
2051 spin_lock(&vfsmount_lock); 2094 br_write_lock(vfsmount_lock);
2052 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2095 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2053 spin_unlock(&vfsmount_lock); 2096 br_write_unlock(vfsmount_lock);
2054 2097
2055 /* 2098 /*
2056 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2099 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2244 goto out2; /* not attached */ 2287 goto out2; /* not attached */
2245 /* make sure we can reach put_old from new_root */ 2288 /* make sure we can reach put_old from new_root */
2246 tmp = old.mnt; 2289 tmp = old.mnt;
2247 spin_lock(&vfsmount_lock); 2290 br_write_lock(vfsmount_lock);
2248 if (tmp != new.mnt) { 2291 if (tmp != new.mnt) {
2249 for (;;) { 2292 for (;;) {
2250 if (tmp->mnt_parent == tmp) 2293 if (tmp->mnt_parent == tmp)
@@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2264 /* mount new_root on / */ 2307 /* mount new_root on / */
2265 attach_mnt(new.mnt, &root_parent); 2308 attach_mnt(new.mnt, &root_parent);
2266 touch_mnt_namespace(current->nsproxy->mnt_ns); 2309 touch_mnt_namespace(current->nsproxy->mnt_ns);
2267 spin_unlock(&vfsmount_lock); 2310 br_write_unlock(vfsmount_lock);
2268 chroot_fs_refs(&root, &new); 2311 chroot_fs_refs(&root, &new);
2269 error = 0; 2312 error = 0;
2270 path_put(&root_parent); 2313 path_put(&root_parent);
@@ -2279,7 +2322,7 @@ out1:
2279out0: 2322out0:
2280 return error; 2323 return error;
2281out3: 2324out3:
2282 spin_unlock(&vfsmount_lock); 2325 br_write_unlock(vfsmount_lock);
2283 goto out2; 2326 goto out2;
2284} 2327}
2285 2328
@@ -2326,6 +2369,8 @@ void __init mnt_init(void)
2326 for (u = 0; u < HASH_SIZE; u++) 2369 for (u = 0; u < HASH_SIZE; u++)
2327 INIT_LIST_HEAD(&mount_hashtable[u]); 2370 INIT_LIST_HEAD(&mount_hashtable[u]);
2328 2371
2372 br_lock_init(vfsmount_lock);
2373
2329 err = sysfs_init(); 2374 err = sysfs_init();
2330 if (err) 2375 if (err)
2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2376 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
2344 if (!atomic_dec_and_test(&ns->count)) 2389 if (!atomic_dec_and_test(&ns->count))
2345 return; 2390 return;
2346 down_write(&namespace_sem); 2391 down_write(&namespace_sem);
2347 spin_lock(&vfsmount_lock); 2392 br_write_lock(vfsmount_lock);
2348 umount_tree(ns->root, 0, &umount_list); 2393 umount_tree(ns->root, 0, &umount_list);
2349 spin_unlock(&vfsmount_lock); 2394 br_write_unlock(vfsmount_lock);
2350 up_write(&namespace_sem); 2395 up_write(&namespace_sem);
2351 release_mounts(&umount_list); 2396 release_mounts(&umount_list);
2352 kfree(ns); 2397 kfree(ns);
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a83149..8066b8dd748f 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt)
126 return 0; 126 return 0;
127} 127}
128 128
129/*
130 * vfsmount lock must be held for write
131 */
129void change_mnt_propagation(struct vfsmount *mnt, int type) 132void change_mnt_propagation(struct vfsmount *mnt, int type)
130{ 133{
131 if (type == MS_SHARED) { 134 if (type == MS_SHARED) {
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
270 prev_src_mnt = child; 273 prev_src_mnt = child;
271 } 274 }
272out: 275out:
273 spin_lock(&vfsmount_lock); 276 br_write_lock(vfsmount_lock);
274 while (!list_empty(&tmp_list)) { 277 while (!list_empty(&tmp_list)) {
275 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); 278 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
276 umount_tree(child, 0, &umount_list); 279 umount_tree(child, 0, &umount_list);
277 } 280 }
278 spin_unlock(&vfsmount_lock); 281 br_write_unlock(vfsmount_lock);
279 release_mounts(&umount_list); 282 release_mounts(&umount_list);
280 return ret; 283 return ret;
281} 284}
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
296 * other mounts its parent propagates to. 299 * other mounts its parent propagates to.
297 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
298 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 *
303 * vfsmount lock must be held for read or write
299 */ 304 */
300int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
301{ 306{
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt)
353 * collect all mounts that receive propagation from the mount in @list, 358 * collect all mounts that receive propagation from the mount in @list,
354 * and return these additional mounts in the same list. 359 * and return these additional mounts in the same list.
355 * @list: the list of mounts to be unmounted. 360 * @list: the list of mounts to be unmounted.
361 *
362 * vfsmount lock must be held for write
356 */ 363 */
357int propagate_umount(struct list_head *list) 364int propagate_umount(struct list_head *list)
358{ 365{