aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c384
1 files changed, 302 insertions, 82 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index c768f733c8d6..35c56c2af61b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
14#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
15#include <linux/init.h> 17#include <linux/init.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -37,12 +39,16 @@
37#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 39#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
38#define HASH_SIZE (1UL << HASH_SHIFT) 40#define HASH_SIZE (1UL << HASH_SHIFT)
39 41
40/* spinlock for vfsmount related operations, inplace of dcache_lock */ 42/*
41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 43 * vfsmount "brlock" style spinlock for vfsmount related operations, use
44 * vfsmount_read_lock/vfsmount_write_lock functions.
45 */
46static DEFINE_PER_CPU(spinlock_t, vfsmount_lock);
42 47
43static int event; 48static int event;
44static DEFINE_IDA(mnt_id_ida); 49static DEFINE_IDA(mnt_id_ida);
45static DEFINE_IDA(mnt_group_ida); 50static DEFINE_IDA(mnt_group_ida);
51static DEFINE_SPINLOCK(mnt_id_lock);
46static int mnt_id_start = 0; 52static int mnt_id_start = 0;
47static int mnt_group_start = 1; 53static int mnt_group_start = 1;
48 54
@@ -54,6 +60,48 @@ static struct rw_semaphore namespace_sem;
54struct kobject *fs_kobj; 60struct kobject *fs_kobj;
55EXPORT_SYMBOL_GPL(fs_kobj); 61EXPORT_SYMBOL_GPL(fs_kobj);
56 62
63void vfsmount_read_lock(int cpu)
64{
65 spinlock_t *lock;
66
67 lock = &per_cpu(vfsmount_lock, cpu);
68 spin_lock(lock);
69}
70
71void vfsmount_read_unlock(int cpu)
72{
73 spinlock_t *lock;
74
75 lock = &per_cpu(vfsmount_lock, cpu);
76 spin_unlock(lock);
77}
78
79void vfsmount_write_lock(void)
80{
81 int i;
82 int nr = 0;
83
84 for_each_possible_cpu(i) {
85 spinlock_t *lock;
86
87 lock = &per_cpu(vfsmount_lock, i);
88 spin_lock_nested(lock, nr);
89 nr++;
90 }
91}
92
93void vfsmount_write_unlock(void)
94{
95 int i;
96
97 for_each_possible_cpu(i) {
98 spinlock_t *lock;
99
100 lock = &per_cpu(vfsmount_lock, i);
101 spin_unlock(lock);
102 }
103}
104
57static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 105static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
58{ 106{
59 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 107 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -64,18 +112,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
64 112
65#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 113#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
66 114
67/* allocation is serialized by namespace_sem */ 115/*
116 * allocation is serialized by namespace_sem, but we need the spinlock to
117 * serialise with freeing.
118 */
68static int mnt_alloc_id(struct vfsmount *mnt) 119static int mnt_alloc_id(struct vfsmount *mnt)
69{ 120{
70 int res; 121 int res;
71 122
72retry: 123retry:
73 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 124 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
74 spin_lock(&vfsmount_lock); 125 spin_lock(&mnt_id_lock);
75 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); 126 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
76 if (!res) 127 if (!res)
77 mnt_id_start = mnt->mnt_id + 1; 128 mnt_id_start = mnt->mnt_id + 1;
78 spin_unlock(&vfsmount_lock); 129 spin_unlock(&mnt_id_lock);
79 if (res == -EAGAIN) 130 if (res == -EAGAIN)
80 goto retry; 131 goto retry;
81 132
@@ -85,11 +136,11 @@ retry:
85static void mnt_free_id(struct vfsmount *mnt) 136static void mnt_free_id(struct vfsmount *mnt)
86{ 137{
87 int id = mnt->mnt_id; 138 int id = mnt->mnt_id;
88 spin_lock(&vfsmount_lock); 139 spin_lock(&mnt_id_lock);
89 ida_remove(&mnt_id_ida, id); 140 ida_remove(&mnt_id_ida, id);
90 if (mnt_id_start > id) 141 if (mnt_id_start > id)
91 mnt_id_start = id; 142 mnt_id_start = id;
92 spin_unlock(&vfsmount_lock); 143 spin_unlock(&mnt_id_lock);
93} 144}
94 145
95/* 146/*
@@ -125,6 +176,49 @@ void mnt_release_group_id(struct vfsmount *mnt)
125 mnt->mnt_group_id = 0; 176 mnt->mnt_group_id = 0;
126} 177}
127 178
179static inline void add_mnt_count(struct vfsmount *mnt, int n)
180{
181#ifdef CONFIG_SMP
182 (*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n;
183#else
184 mnt->mnt_count += n;
185#endif
186}
187
188static inline void inc_mnt_count(struct vfsmount *mnt)
189{
190#ifdef CONFIG_SMP
191 (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++;
192#else
193 mnt->mnt_count++;
194#endif
195}
196
197static inline void dec_mnt_count(struct vfsmount *mnt)
198{
199#ifdef CONFIG_SMP
200 (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--;
201#else
202 mnt->mnt_count--;
203#endif
204}
205
206unsigned int count_mnt_count(struct vfsmount *mnt)
207{
208#ifdef CONFIG_SMP
209 unsigned int count = 0;
210 int cpu;
211
212 for_each_possible_cpu(cpu) {
213 count += *per_cpu_ptr(mnt->mnt_count, cpu);
214 }
215
216 return count;
217#else
218 return mnt->mnt_count;
219#endif
220}
221
128struct vfsmount *alloc_vfsmnt(const char *name) 222struct vfsmount *alloc_vfsmnt(const char *name)
129{ 223{
130 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 224 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -141,7 +235,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
141 goto out_free_id; 235 goto out_free_id;
142 } 236 }
143 237
144 atomic_set(&mnt->mnt_count, 1); 238#ifdef CONFIG_SMP
239 mnt->mnt_count = alloc_percpu(int);
240 if (!mnt->mnt_count)
241 goto out_free_devname;
242#else
243 mnt->mnt_count = 0;
244#endif
145 INIT_LIST_HEAD(&mnt->mnt_hash); 245 INIT_LIST_HEAD(&mnt->mnt_hash);
146 INIT_LIST_HEAD(&mnt->mnt_child); 246 INIT_LIST_HEAD(&mnt->mnt_child);
147 INIT_LIST_HEAD(&mnt->mnt_mounts); 247 INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -153,14 +253,19 @@ struct vfsmount *alloc_vfsmnt(const char *name)
153#ifdef CONFIG_SMP 253#ifdef CONFIG_SMP
154 mnt->mnt_writers = alloc_percpu(int); 254 mnt->mnt_writers = alloc_percpu(int);
155 if (!mnt->mnt_writers) 255 if (!mnt->mnt_writers)
156 goto out_free_devname; 256 goto out_free_mntcount;
157#else 257#else
158 mnt->mnt_writers = 0; 258 mnt->mnt_writers = 0;
159#endif 259#endif
260 preempt_disable();
261 inc_mnt_count(mnt);
262 preempt_enable();
160 } 263 }
161 return mnt; 264 return mnt;
162 265
163#ifdef CONFIG_SMP 266#ifdef CONFIG_SMP
267out_free_mntcount:
268 free_percpu(mnt->mnt_count);
164out_free_devname: 269out_free_devname:
165 kfree(mnt->mnt_devname); 270 kfree(mnt->mnt_devname);
166#endif 271#endif
@@ -264,8 +369,16 @@ int mnt_want_write(struct vfsmount *mnt)
264 * incremented count after it has set MNT_WRITE_HOLD. 369 * incremented count after it has set MNT_WRITE_HOLD.
265 */ 370 */
266 smp_mb(); 371 smp_mb();
267 while (mnt->mnt_flags & MNT_WRITE_HOLD) 372 preempt_enable();
268 cpu_relax(); 373 /*
374 * HACK ALERT. on RT we can not spin here with cpu_relax() and
375 * preemption disabled so we block on the vfsmount lock which is
376 * held by mnt_make_readonly(). Works on !RT as well.
377 */
378 while (mnt->mnt_flags & MNT_WRITE_HOLD) {
379 vfsmount_write_lock();
380 vfsmount_write_unlock();
381 }
269 /* 382 /*
270 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will 383 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
271 * be set to match its requirements. So we must not load that until 384 * be set to match its requirements. So we must not load that until
@@ -273,12 +386,11 @@ int mnt_want_write(struct vfsmount *mnt)
273 */ 386 */
274 smp_rmb(); 387 smp_rmb();
275 if (__mnt_is_readonly(mnt)) { 388 if (__mnt_is_readonly(mnt)) {
389 preempt_disable();
276 dec_mnt_writers(mnt); 390 dec_mnt_writers(mnt);
391 preempt_enable();
277 ret = -EROFS; 392 ret = -EROFS;
278 goto out;
279 } 393 }
280out:
281 preempt_enable();
282 return ret; 394 return ret;
283} 395}
284EXPORT_SYMBOL_GPL(mnt_want_write); 396EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -344,7 +456,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
344{ 456{
345 int ret = 0; 457 int ret = 0;
346 458
347 spin_lock(&vfsmount_lock); 459 vfsmount_write_lock();
348 mnt->mnt_flags |= MNT_WRITE_HOLD; 460 mnt->mnt_flags |= MNT_WRITE_HOLD;
349 /* 461 /*
350 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 462 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -378,15 +490,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
378 */ 490 */
379 smp_wmb(); 491 smp_wmb();
380 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 492 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
381 spin_unlock(&vfsmount_lock); 493 vfsmount_write_unlock();
382 return ret; 494 return ret;
383} 495}
384 496
385static void __mnt_unmake_readonly(struct vfsmount *mnt) 497static void __mnt_unmake_readonly(struct vfsmount *mnt)
386{ 498{
387 spin_lock(&vfsmount_lock); 499 vfsmount_write_lock();
388 mnt->mnt_flags &= ~MNT_READONLY; 500 mnt->mnt_flags &= ~MNT_READONLY;
389 spin_unlock(&vfsmount_lock); 501 vfsmount_write_unlock();
390} 502}
391 503
392void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 504void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -439,10 +551,13 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
439struct vfsmount *lookup_mnt(struct path *path) 551struct vfsmount *lookup_mnt(struct path *path)
440{ 552{
441 struct vfsmount *child_mnt; 553 struct vfsmount *child_mnt;
442 spin_lock(&vfsmount_lock); 554 int cpu = get_cpu();
555 put_cpu();
556
557 vfsmount_read_lock(cpu);
443 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 558 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
444 mntget(child_mnt); 559 mntget(child_mnt);
445 spin_unlock(&vfsmount_lock); 560 vfsmount_read_unlock(cpu);
446 return child_mnt; 561 return child_mnt;
447} 562}
448 563
@@ -467,6 +582,16 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
467 } 582 }
468} 583}
469 584
585static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
586{
587 if (!__lookup_mnt(mnt, dentry, 0)) {
588 spin_lock(&dentry->d_lock);
589 WARN_ON(dentry->d_mounted == 0);
590 dentry->d_mounted--;
591 spin_unlock(&dentry->d_lock);
592 }
593}
594
470static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 595static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
471{ 596{
472 old_path->dentry = mnt->mnt_mountpoint; 597 old_path->dentry = mnt->mnt_mountpoint;
@@ -475,15 +600,19 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
475 mnt->mnt_mountpoint = mnt->mnt_root; 600 mnt->mnt_mountpoint = mnt->mnt_root;
476 list_del_init(&mnt->mnt_child); 601 list_del_init(&mnt->mnt_child);
477 list_del_init(&mnt->mnt_hash); 602 list_del_init(&mnt->mnt_hash);
478 old_path->dentry->d_mounted--; 603 dentry_reset_mounted(old_path->mnt, old_path->dentry);
604 WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED));
605 mnt->mnt_flags &= ~MNT_MOUNTED;
479} 606}
480 607
481void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 608void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
482 struct vfsmount *child_mnt) 609 struct vfsmount *child_mnt)
483{ 610{
484 child_mnt->mnt_parent = mntget(mnt); 611 child_mnt->mnt_parent = mntget(mnt);
485 child_mnt->mnt_mountpoint = dget(dentry); 612 spin_lock(&dentry->d_lock);
613 child_mnt->mnt_mountpoint = dget_dlock(dentry);
486 dentry->d_mounted++; 614 dentry->d_mounted++;
615 spin_unlock(&dentry->d_lock);
487} 616}
488 617
489static void attach_mnt(struct vfsmount *mnt, struct path *path) 618static void attach_mnt(struct vfsmount *mnt, struct path *path)
@@ -492,6 +621,8 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
492 list_add_tail(&mnt->mnt_hash, mount_hashtable + 621 list_add_tail(&mnt->mnt_hash, mount_hashtable +
493 hash(path->mnt, path->dentry)); 622 hash(path->mnt, path->dentry));
494 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); 623 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
624 WARN_ON(mnt->mnt_flags & MNT_MOUNTED);
625 mnt->mnt_flags |= MNT_MOUNTED;
495} 626}
496 627
497/* 628/*
@@ -514,6 +645,8 @@ static void commit_tree(struct vfsmount *mnt)
514 list_add_tail(&mnt->mnt_hash, mount_hashtable + 645 list_add_tail(&mnt->mnt_hash, mount_hashtable +
515 hash(parent, mnt->mnt_mountpoint)); 646 hash(parent, mnt->mnt_mountpoint));
516 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 647 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
648 WARN_ON(mnt->mnt_flags & MNT_MOUNTED);
649 mnt->mnt_flags |= MNT_MOUNTED;
517 touch_mnt_namespace(n); 650 touch_mnt_namespace(n);
518} 651}
519 652
@@ -561,7 +694,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
561 goto out_free; 694 goto out_free;
562 } 695 }
563 696
564 mnt->mnt_flags = old->mnt_flags; 697 mnt->mnt_flags = (old->mnt_flags & ~MNT_MOUNTED);
565 atomic_inc(&sb->s_active); 698 atomic_inc(&sb->s_active);
566 mnt->mnt_sb = sb; 699 mnt->mnt_sb = sb;
567 mnt->mnt_root = dget(root); 700 mnt->mnt_root = dget(root);
@@ -617,43 +750,87 @@ static inline void __mntput(struct vfsmount *mnt)
617 750
618void mntput_no_expire(struct vfsmount *mnt) 751void mntput_no_expire(struct vfsmount *mnt)
619{ 752{
620repeat: 753 int cpu = get_cpu();
621 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { 754 put_cpu();
622 if (likely(!mnt->mnt_pinned)) { 755 if (likely(mnt->mnt_flags & MNT_MOUNTED)) {
623 spin_unlock(&vfsmount_lock); 756 vfsmount_read_lock(cpu);
624 __mntput(mnt); 757 if (unlikely(!(mnt->mnt_flags & MNT_MOUNTED))) {
625 return; 758 vfsmount_read_unlock(cpu);
759 goto repeat;
626 } 760 }
627 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); 761 preempt_disable();
628 mnt->mnt_pinned = 0; 762 dec_mnt_count(mnt);
629 spin_unlock(&vfsmount_lock); 763 preempt_enable();
630 acct_auto_close_mnt(mnt); 764 vfsmount_read_unlock(cpu);
631 security_sb_umount_close(mnt); 765
632 goto repeat; 766 return;
633 } 767 }
634}
635 768
769repeat:
770 vfsmount_write_lock();
771 BUG_ON(mnt->mnt_flags & MNT_MOUNTED);
772 preempt_disable();
773 dec_mnt_count(mnt);
774 preempt_enable();
775 if (count_mnt_count(mnt)) {
776 vfsmount_write_unlock();
777 return;
778 }
779 if (likely(!mnt->mnt_pinned)) {
780 vfsmount_write_unlock();
781 __mntput(mnt);
782 return;
783 }
784 add_mnt_count(mnt, mnt->mnt_pinned + 1);
785 mnt->mnt_pinned = 0;
786 vfsmount_write_unlock();
787 acct_auto_close_mnt(mnt);
788 security_sb_umount_close(mnt);
789 goto repeat;
790}
636EXPORT_SYMBOL(mntput_no_expire); 791EXPORT_SYMBOL(mntput_no_expire);
637 792
793void mntput(struct vfsmount *mnt)
794{
795 if (mnt) {
796 /* avoid cacheline pingpong */
797 if (unlikely(mnt->mnt_expiry_mark))
798 mnt->mnt_expiry_mark = 0;
799 mntput_no_expire(mnt);
800 }
801}
802EXPORT_SYMBOL(mntput);
803
804struct vfsmount *mntget(struct vfsmount *mnt)
805{
806 if (mnt) {
807 preempt_disable();
808 inc_mnt_count(mnt);
809 preempt_enable();
810 }
811 return mnt;
812}
813EXPORT_SYMBOL(mntget);
814
638void mnt_pin(struct vfsmount *mnt) 815void mnt_pin(struct vfsmount *mnt)
639{ 816{
640 spin_lock(&vfsmount_lock); 817 vfsmount_write_lock();
641 mnt->mnt_pinned++; 818 mnt->mnt_pinned++;
642 spin_unlock(&vfsmount_lock); 819 vfsmount_write_unlock();
643} 820}
644
645EXPORT_SYMBOL(mnt_pin); 821EXPORT_SYMBOL(mnt_pin);
646 822
647void mnt_unpin(struct vfsmount *mnt) 823void mnt_unpin(struct vfsmount *mnt)
648{ 824{
649 spin_lock(&vfsmount_lock); 825 vfsmount_write_lock();
650 if (mnt->mnt_pinned) { 826 if (mnt->mnt_pinned) {
651 atomic_inc(&mnt->mnt_count); 827 preempt_disable();
828 dec_mnt_count(mnt);
829 preempt_enable();
652 mnt->mnt_pinned--; 830 mnt->mnt_pinned--;
653 } 831 }
654 spin_unlock(&vfsmount_lock); 832 vfsmount_write_unlock();
655} 833}
656
657EXPORT_SYMBOL(mnt_unpin); 834EXPORT_SYMBOL(mnt_unpin);
658 835
659static inline void mangle(struct seq_file *m, const char *s) 836static inline void mangle(struct seq_file *m, const char *s)
@@ -934,12 +1111,13 @@ int may_umount_tree(struct vfsmount *mnt)
934 int minimum_refs = 0; 1111 int minimum_refs = 0;
935 struct vfsmount *p; 1112 struct vfsmount *p;
936 1113
937 spin_lock(&vfsmount_lock); 1114 /* write lock needed for count_mnt_count */
1115 vfsmount_write_lock();
938 for (p = mnt; p; p = next_mnt(p, mnt)) { 1116 for (p = mnt; p; p = next_mnt(p, mnt)) {
939 actual_refs += atomic_read(&p->mnt_count); 1117 actual_refs += count_mnt_count(p);
940 minimum_refs += 2; 1118 minimum_refs += 2;
941 } 1119 }
942 spin_unlock(&vfsmount_lock); 1120 vfsmount_write_unlock();
943 1121
944 if (actual_refs > minimum_refs) 1122 if (actual_refs > minimum_refs)
945 return 0; 1123 return 0;
@@ -966,11 +1144,12 @@ int may_umount(struct vfsmount *mnt)
966{ 1144{
967 int ret = 1; 1145 int ret = 1;
968 down_read(&namespace_sem); 1146 down_read(&namespace_sem);
969 spin_lock(&vfsmount_lock); 1147 vfsmount_write_lock();
970 if (propagate_mount_busy(mnt, 2)) 1148 if (propagate_mount_busy(mnt, 2))
971 ret = 0; 1149 ret = 0;
972 spin_unlock(&vfsmount_lock); 1150 vfsmount_write_unlock();
973 up_read(&namespace_sem); 1151 up_read(&namespace_sem);
1152
974 return ret; 1153 return ret;
975} 1154}
976 1155
@@ -985,13 +1164,14 @@ void release_mounts(struct list_head *head)
985 if (mnt->mnt_parent != mnt) { 1164 if (mnt->mnt_parent != mnt) {
986 struct dentry *dentry; 1165 struct dentry *dentry;
987 struct vfsmount *m; 1166 struct vfsmount *m;
988 spin_lock(&vfsmount_lock); 1167
1168 vfsmount_write_lock();
989 dentry = mnt->mnt_mountpoint; 1169 dentry = mnt->mnt_mountpoint;
990 m = mnt->mnt_parent; 1170 m = mnt->mnt_parent;
991 mnt->mnt_mountpoint = mnt->mnt_root; 1171 mnt->mnt_mountpoint = mnt->mnt_root;
992 mnt->mnt_parent = mnt; 1172 mnt->mnt_parent = mnt;
993 m->mnt_ghosts--; 1173 m->mnt_ghosts--;
994 spin_unlock(&vfsmount_lock); 1174 vfsmount_write_unlock();
995 dput(dentry); 1175 dput(dentry);
996 mntput(m); 1176 mntput(m);
997 } 1177 }
@@ -1015,9 +1195,11 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1015 __touch_mnt_namespace(p->mnt_ns); 1195 __touch_mnt_namespace(p->mnt_ns);
1016 p->mnt_ns = NULL; 1196 p->mnt_ns = NULL;
1017 list_del_init(&p->mnt_child); 1197 list_del_init(&p->mnt_child);
1198 WARN_ON(!(p->mnt_flags & MNT_MOUNTED));
1199 p->mnt_flags &= ~MNT_MOUNTED;
1018 if (p->mnt_parent != p) { 1200 if (p->mnt_parent != p) {
1019 p->mnt_parent->mnt_ghosts++; 1201 p->mnt_parent->mnt_ghosts++;
1020 p->mnt_mountpoint->d_mounted--; 1202 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
1021 } 1203 }
1022 change_mnt_propagation(p, MS_PRIVATE); 1204 change_mnt_propagation(p, MS_PRIVATE);
1023 } 1205 }
@@ -1046,8 +1228,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
1046 flags & (MNT_FORCE | MNT_DETACH)) 1228 flags & (MNT_FORCE | MNT_DETACH))
1047 return -EINVAL; 1229 return -EINVAL;
1048 1230
1049 if (atomic_read(&mnt->mnt_count) != 2) 1231 /*
1232 * probably don't strictly need the lock here if we examined
1233 * all race cases, but it's a slowpath.
1234 */
1235 vfsmount_write_lock();
1236 if (count_mnt_count(mnt) != 2) {
1237 vfsmount_write_unlock();
1050 return -EBUSY; 1238 return -EBUSY;
1239 }
1240 vfsmount_write_unlock();
1051 1241
1052 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1242 if (!xchg(&mnt->mnt_expiry_mark, 1))
1053 return -EAGAIN; 1243 return -EAGAIN;
@@ -1089,7 +1279,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1089 } 1279 }
1090 1280
1091 down_write(&namespace_sem); 1281 down_write(&namespace_sem);
1092 spin_lock(&vfsmount_lock); 1282 vfsmount_write_lock();
1093 event++; 1283 event++;
1094 1284
1095 if (!(flags & MNT_DETACH)) 1285 if (!(flags & MNT_DETACH))
@@ -1101,7 +1291,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1101 umount_tree(mnt, 1, &umount_list); 1291 umount_tree(mnt, 1, &umount_list);
1102 retval = 0; 1292 retval = 0;
1103 } 1293 }
1104 spin_unlock(&vfsmount_lock); 1294 vfsmount_write_unlock();
1105 if (retval) 1295 if (retval)
1106 security_sb_umount_busy(mnt); 1296 security_sb_umount_busy(mnt);
1107 up_write(&namespace_sem); 1297 up_write(&namespace_sem);
@@ -1188,6 +1378,13 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1188 goto Enomem; 1378 goto Enomem;
1189 q->mnt_mountpoint = mnt->mnt_mountpoint; 1379 q->mnt_mountpoint = mnt->mnt_mountpoint;
1190 1380
1381 /*
1382 * We don't call attach_mnt on a cloned rootfs, so set it as
1383 * mounted here.
1384 */
1385 WARN_ON(q->mnt_flags & MNT_MOUNTED);
1386 q->mnt_flags |= MNT_MOUNTED;
1387
1191 p = mnt; 1388 p = mnt;
1192 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1389 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1193 if (!is_subdir(r->mnt_mountpoint, dentry)) 1390 if (!is_subdir(r->mnt_mountpoint, dentry))
@@ -1208,19 +1405,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1208 q = clone_mnt(p, p->mnt_root, flag); 1405 q = clone_mnt(p, p->mnt_root, flag);
1209 if (!q) 1406 if (!q)
1210 goto Enomem; 1407 goto Enomem;
1211 spin_lock(&vfsmount_lock); 1408 vfsmount_write_lock();
1212 list_add_tail(&q->mnt_list, &res->mnt_list); 1409 list_add_tail(&q->mnt_list, &res->mnt_list);
1213 attach_mnt(q, &path); 1410 attach_mnt(q, &path);
1214 spin_unlock(&vfsmount_lock); 1411 vfsmount_write_unlock();
1215 } 1412 }
1216 } 1413 }
1217 return res; 1414 return res;
1218Enomem: 1415Enomem:
1219 if (res) { 1416 if (res) {
1220 LIST_HEAD(umount_list); 1417 LIST_HEAD(umount_list);
1221 spin_lock(&vfsmount_lock); 1418 vfsmount_write_lock();
1222 umount_tree(res, 0, &umount_list); 1419 umount_tree(res, 0, &umount_list);
1223 spin_unlock(&vfsmount_lock); 1420 vfsmount_write_unlock();
1224 release_mounts(&umount_list); 1421 release_mounts(&umount_list);
1225 } 1422 }
1226 return NULL; 1423 return NULL;
@@ -1239,9 +1436,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
1239{ 1436{
1240 LIST_HEAD(umount_list); 1437 LIST_HEAD(umount_list);
1241 down_write(&namespace_sem); 1438 down_write(&namespace_sem);
1242 spin_lock(&vfsmount_lock); 1439 vfsmount_write_lock();
1243 umount_tree(mnt, 0, &umount_list); 1440 umount_tree(mnt, 0, &umount_list);
1244 spin_unlock(&vfsmount_lock); 1441 vfsmount_write_unlock();
1245 up_write(&namespace_sem); 1442 up_write(&namespace_sem);
1246 release_mounts(&umount_list); 1443 release_mounts(&umount_list);
1247} 1444}
@@ -1354,12 +1551,13 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1354 if (err) 1551 if (err)
1355 goto out_cleanup_ids; 1552 goto out_cleanup_ids;
1356 1553
1357 spin_lock(&vfsmount_lock); 1554 vfsmount_write_lock();
1358 1555
1359 if (IS_MNT_SHARED(dest_mnt)) { 1556 if (IS_MNT_SHARED(dest_mnt)) {
1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1557 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1361 set_mnt_shared(p); 1558 set_mnt_shared(p);
1362 } 1559 }
1560
1363 if (parent_path) { 1561 if (parent_path) {
1364 detach_mnt(source_mnt, parent_path); 1562 detach_mnt(source_mnt, parent_path);
1365 attach_mnt(source_mnt, path); 1563 attach_mnt(source_mnt, path);
@@ -1373,7 +1571,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1373 list_del_init(&child->mnt_hash); 1571 list_del_init(&child->mnt_hash);
1374 commit_tree(child); 1572 commit_tree(child);
1375 } 1573 }
1376 spin_unlock(&vfsmount_lock); 1574 vfsmount_write_unlock();
1575
1377 return 0; 1576 return 0;
1378 1577
1379 out_cleanup_ids: 1578 out_cleanup_ids:
@@ -1435,10 +1634,10 @@ static int do_change_type(struct path *path, int flag)
1435 goto out_unlock; 1634 goto out_unlock;
1436 } 1635 }
1437 1636
1438 spin_lock(&vfsmount_lock); 1637 vfsmount_write_lock();
1439 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1638 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1440 change_mnt_propagation(m, type); 1639 change_mnt_propagation(m, type);
1441 spin_unlock(&vfsmount_lock); 1640 vfsmount_write_unlock();
1442 1641
1443 out_unlock: 1642 out_unlock:
1444 up_write(&namespace_sem); 1643 up_write(&namespace_sem);
@@ -1482,9 +1681,10 @@ static int do_loopback(struct path *path, char *old_name,
1482 err = graft_tree(mnt, path); 1681 err = graft_tree(mnt, path);
1483 if (err) { 1682 if (err) {
1484 LIST_HEAD(umount_list); 1683 LIST_HEAD(umount_list);
1485 spin_lock(&vfsmount_lock); 1684
1685 vfsmount_write_lock();
1486 umount_tree(mnt, 0, &umount_list); 1686 umount_tree(mnt, 0, &umount_list);
1487 spin_unlock(&vfsmount_lock); 1687 vfsmount_write_unlock();
1488 release_mounts(&umount_list); 1688 release_mounts(&umount_list);
1489 } 1689 }
1490 1690
@@ -1537,18 +1737,19 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1537 else 1737 else
1538 err = do_remount_sb(sb, flags, data, 0); 1738 err = do_remount_sb(sb, flags, data, 0);
1539 if (!err) { 1739 if (!err) {
1540 spin_lock(&vfsmount_lock); 1740 vfsmount_write_lock();
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK; 1741 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1742 mnt_flags |= path->mnt->mnt_flags & MNT_MOUNTED;
1542 path->mnt->mnt_flags = mnt_flags; 1743 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock); 1744 vfsmount_write_unlock();
1544 } 1745 }
1545 up_write(&sb->s_umount); 1746 up_write(&sb->s_umount);
1546 if (!err) { 1747 if (!err) {
1547 security_sb_post_remount(path->mnt, flags, data); 1748 security_sb_post_remount(path->mnt, flags, data);
1548 1749
1549 spin_lock(&vfsmount_lock); 1750 vfsmount_write_lock();
1550 touch_mnt_namespace(path->mnt->mnt_ns); 1751 touch_mnt_namespace(path->mnt->mnt_ns);
1551 spin_unlock(&vfsmount_lock); 1752 vfsmount_write_unlock();
1552 } 1753 }
1553 return err; 1754 return err;
1554} 1755}
@@ -1725,7 +1926,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1725 return; 1926 return;
1726 1927
1727 down_write(&namespace_sem); 1928 down_write(&namespace_sem);
1728 spin_lock(&vfsmount_lock); 1929 vfsmount_write_lock();
1729 1930
1730 /* extract from the expiration list every vfsmount that matches the 1931 /* extract from the expiration list every vfsmount that matches the
1731 * following criteria: 1932 * following criteria:
@@ -1744,7 +1945,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1744 touch_mnt_namespace(mnt->mnt_ns); 1945 touch_mnt_namespace(mnt->mnt_ns);
1745 umount_tree(mnt, 1, &umounts); 1946 umount_tree(mnt, 1, &umounts);
1746 } 1947 }
1747 spin_unlock(&vfsmount_lock); 1948 vfsmount_write_unlock();
1748 up_write(&namespace_sem); 1949 up_write(&namespace_sem);
1749 1950
1750 release_mounts(&umounts); 1951 release_mounts(&umounts);
@@ -2019,9 +2220,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2019 kfree(new_ns); 2220 kfree(new_ns);
2020 return ERR_PTR(-ENOMEM); 2221 return ERR_PTR(-ENOMEM);
2021 } 2222 }
2022 spin_lock(&vfsmount_lock); 2223 vfsmount_write_lock();
2023 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2224 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2024 spin_unlock(&vfsmount_lock); 2225 vfsmount_write_unlock();
2025 2226
2026 /* 2227 /*
2027 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2228 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2218,7 +2419,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2218 goto out2; /* not attached */ 2419 goto out2; /* not attached */
2219 /* make sure we can reach put_old from new_root */ 2420 /* make sure we can reach put_old from new_root */
2220 tmp = old.mnt; 2421 tmp = old.mnt;
2221 spin_lock(&vfsmount_lock); 2422 vfsmount_write_lock();
2222 if (tmp != new.mnt) { 2423 if (tmp != new.mnt) {
2223 for (;;) { 2424 for (;;) {
2224 if (tmp->mnt_parent == tmp) 2425 if (tmp->mnt_parent == tmp)
@@ -2238,7 +2439,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2238 /* mount new_root on / */ 2439 /* mount new_root on / */
2239 attach_mnt(new.mnt, &root_parent); 2440 attach_mnt(new.mnt, &root_parent);
2240 touch_mnt_namespace(current->nsproxy->mnt_ns); 2441 touch_mnt_namespace(current->nsproxy->mnt_ns);
2241 spin_unlock(&vfsmount_lock); 2442 vfsmount_write_unlock();
2242 chroot_fs_refs(&root, &new); 2443 chroot_fs_refs(&root, &new);
2243 security_sb_post_pivotroot(&root, &new); 2444 security_sb_post_pivotroot(&root, &new);
2244 error = 0; 2445 error = 0;
@@ -2254,7 +2455,7 @@ out1:
2254out0: 2455out0:
2255 return error; 2456 return error;
2256out3: 2457out3:
2257 spin_unlock(&vfsmount_lock); 2458 vfsmount_write_unlock();
2258 goto out2; 2459 goto out2;
2259} 2460}
2260 2461
@@ -2284,6 +2485,7 @@ static void __init init_mount_tree(void)
2284void __init mnt_init(void) 2485void __init mnt_init(void)
2285{ 2486{
2286 unsigned u; 2487 unsigned u;
2488 int i;
2287 int err; 2489 int err;
2288 2490
2289 init_rwsem(&namespace_sem); 2491 init_rwsem(&namespace_sem);
@@ -2301,6 +2503,9 @@ void __init mnt_init(void)
2301 for (u = 0; u < HASH_SIZE; u++) 2503 for (u = 0; u < HASH_SIZE; u++)
2302 INIT_LIST_HEAD(&mount_hashtable[u]); 2504 INIT_LIST_HEAD(&mount_hashtable[u]);
2303 2505
2506 for_each_possible_cpu(i)
2507 spin_lock_init(&per_cpu(vfsmount_lock, i));
2508
2304 err = sysfs_init(); 2509 err = sysfs_init();
2305 if (err) 2510 if (err)
2306 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2511 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2317,15 +2522,30 @@ void put_mnt_ns(struct mnt_namespace *ns)
2317 struct vfsmount *root; 2522 struct vfsmount *root;
2318 LIST_HEAD(umount_list); 2523 LIST_HEAD(umount_list);
2319 2524
2320 if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) 2525 /*
2526 * We open code this to avoid vfsmount_write_lock() in case of
2527 * ns->count > 1
2528 */
2529 if (atomic_add_unless(&ns->count, -1, 1))
2321 return; 2530 return;
2531
2532 /*
2533 * Do the full locking here as it's likely that ns->count will
2534 * drop to zero and we have to take namespace_sem and all vfs
2535 * mount locks anyway for umount_tree().
2536 */
2537 down_write(&namespace_sem);
2538 vfsmount_write_lock();
2539 if (!atomic_dec_and_test(&ns->count)) {
2540 vfsmount_write_unlock();
2541 up_write(&namespace_sem);
2542 return;
2543 }
2322 root = ns->root; 2544 root = ns->root;
2323 ns->root = NULL; 2545 ns->root = NULL;
2324 spin_unlock(&vfsmount_lock); 2546
2325 down_write(&namespace_sem);
2326 spin_lock(&vfsmount_lock);
2327 umount_tree(root, 0, &umount_list); 2547 umount_tree(root, 0, &umount_list);
2328 spin_unlock(&vfsmount_lock); 2548 vfsmount_write_unlock();
2329 up_write(&namespace_sem); 2549 up_write(&namespace_sem);
2330 release_mounts(&umount_list); 2550 release_mounts(&umount_list);
2331 kfree(ns); 2551 kfree(ns);