aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--drivers/mtd/mtdchar.c2
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/fs_struct.c26
-rw-r--r--fs/internal.h1
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c242
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/super.c2
-rw-r--r--include/linux/mount.h53
-rw-r--r--include/linux/path.h2
-rw-r--r--net/socket.c19
13 files changed, 283 insertions, 98 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a24f40bb48e..f099b82703d8 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
1542 * any operations on the root directory. However, we need a non-trivial 1542 * any operations on the root directory. However, we need a non-trivial
1543 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1543 * d_name - pfm: will go nicely and kill the special-casing in procfs.
1544 */ 1544 */
1545static struct vfsmount *pfmfs_mnt; 1545static struct vfsmount *pfmfs_mnt __read_mostly;
1546 1546
1547static int __init 1547static int __init
1548init_pfm_fs(void) 1548init_pfm_fs(void)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4759d827e8c7..f511dd15fd31 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1201,7 +1201,7 @@ err_unregister_chdev:
1201static void __exit cleanup_mtdchar(void) 1201static void __exit cleanup_mtdchar(void)
1202{ 1202{
1203 unregister_mtd_user(&mtdchar_notifier); 1203 unregister_mtd_user(&mtdchar_notifier);
1204 mntput(mtd_inode_mnt); 1204 mntput_long(mtd_inode_mnt);
1205 unregister_filesystem(&mtd_inodefs_type); 1205 unregister_filesystem(&mtd_inodefs_type);
1206 __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); 1206 __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
1207} 1207}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9d92b33da8a0..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
232 return 0; 232 return 0;
233 233
234err_mntput: 234err_mntput:
235 mntput(anon_inode_mnt); 235 mntput_long(anon_inode_mnt);
236err_unregister_filesystem: 236err_unregister_filesystem:
237 unregister_filesystem(&anon_inode_fs_type); 237 unregister_filesystem(&anon_inode_fs_type);
238err_exit: 238err_exit:
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 60b8531f41c5..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
17 write_seqcount_begin(&fs->seq); 17 write_seqcount_begin(&fs->seq);
18 old_root = fs->root; 18 old_root = fs->root;
19 fs->root = *path; 19 fs->root = *path;
20 path_get(path); 20 path_get_long(path);
21 write_seqcount_end(&fs->seq); 21 write_seqcount_end(&fs->seq);
22 spin_unlock(&fs->lock); 22 spin_unlock(&fs->lock);
23 if (old_root.dentry) 23 if (old_root.dentry)
24 path_put(&old_root); 24 path_put_long(&old_root);
25} 25}
26 26
27/* 27/*
@@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
36 write_seqcount_begin(&fs->seq); 36 write_seqcount_begin(&fs->seq);
37 old_pwd = fs->pwd; 37 old_pwd = fs->pwd;
38 fs->pwd = *path; 38 fs->pwd = *path;
39 path_get(path); 39 path_get_long(path);
40 write_seqcount_end(&fs->seq); 40 write_seqcount_end(&fs->seq);
41 spin_unlock(&fs->lock); 41 spin_unlock(&fs->lock);
42 42
43 if (old_pwd.dentry) 43 if (old_pwd.dentry)
44 path_put(&old_pwd); 44 path_put_long(&old_pwd);
45} 45}
46 46
47void chroot_fs_refs(struct path *old_root, struct path *new_root) 47void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
59 write_seqcount_begin(&fs->seq); 59 write_seqcount_begin(&fs->seq);
60 if (fs->root.dentry == old_root->dentry 60 if (fs->root.dentry == old_root->dentry
61 && fs->root.mnt == old_root->mnt) { 61 && fs->root.mnt == old_root->mnt) {
62 path_get(new_root); 62 path_get_long(new_root);
63 fs->root = *new_root; 63 fs->root = *new_root;
64 count++; 64 count++;
65 } 65 }
66 if (fs->pwd.dentry == old_root->dentry 66 if (fs->pwd.dentry == old_root->dentry
67 && fs->pwd.mnt == old_root->mnt) { 67 && fs->pwd.mnt == old_root->mnt) {
68 path_get(new_root); 68 path_get_long(new_root);
69 fs->pwd = *new_root; 69 fs->pwd = *new_root;
70 count++; 70 count++;
71 } 71 }
@@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
76 } while_each_thread(g, p); 76 } while_each_thread(g, p);
77 read_unlock(&tasklist_lock); 77 read_unlock(&tasklist_lock);
78 while (count--) 78 while (count--)
79 path_put(old_root); 79 path_put_long(old_root);
80} 80}
81 81
82void free_fs_struct(struct fs_struct *fs) 82void free_fs_struct(struct fs_struct *fs)
83{ 83{
84 path_put(&fs->root); 84 path_put_long(&fs->root);
85 path_put(&fs->pwd); 85 path_put_long(&fs->pwd);
86 kmem_cache_free(fs_cachep, fs); 86 kmem_cache_free(fs_cachep, fs);
87} 87}
88 88
@@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
115 spin_lock_init(&fs->lock); 115 spin_lock_init(&fs->lock);
116 seqcount_init(&fs->seq); 116 seqcount_init(&fs->seq);
117 fs->umask = old->umask; 117 fs->umask = old->umask;
118 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118
119 spin_lock(&old->lock);
120 fs->root = old->root;
121 path_get_long(&fs->root);
122 fs->pwd = old->pwd;
123 path_get_long(&fs->pwd);
124 spin_unlock(&old->lock);
119 } 125 }
120 return fs; 126 return fs;
121} 127}
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..9687c2ee2735 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
63 63
64extern void free_vfsmnt(struct vfsmount *); 64extern void free_vfsmnt(struct vfsmount *);
65extern struct vfsmount *alloc_vfsmnt(const char *); 65extern struct vfsmount *alloc_vfsmnt(const char *);
66extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 67extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
diff --git a/fs/namei.c b/fs/namei.c
index 4e957bf744ae..19433cdba011 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,6 +368,18 @@ void path_get(struct path *path)
368EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
369 369
370/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
371 * path_put - put a reference to a path 383 * path_put - put a reference to a path
372 * @path: path to put the reference to 384 * @path: path to put the reference to
373 * 385 *
@@ -381,6 +393,18 @@ void path_put(struct path *path)
381EXPORT_SYMBOL(path_put); 393EXPORT_SYMBOL(path_put);
382 394
383/** 395/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
384 * nameidata_drop_rcu - drop this nameidata out of rcu-walk 408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
385 * @nd: nameidata pathwalk data to drop 409 * @nd: nameidata pathwalk data to drop
386 * @Returns: 0 on success, -ECHLID on failure 410 * @Returns: 0 on success, -ECHLID on failure
diff --git a/fs/namespace.c b/fs/namespace.c
index 03b82350f020..3ddfd9046c44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
138 mnt->mnt_group_id = 0; 138 mnt->mnt_group_id = 0;
139} 139}
140 140
141/*
142 * vfsmount lock must be held for read
143 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n)
145{
146#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
148#else
149 preempt_disable();
150 mnt->mnt_count += n;
151 preempt_enable();
152#endif
153}
154
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/*
181 * vfsmount lock must be held for write
182 */
183unsigned int mnt_get_count(struct vfsmount *mnt)
184{
185#ifdef CONFIG_SMP
186 unsigned int count = atomic_read(&mnt->mnt_longrefs);
187 int cpu;
188
189 for_each_possible_cpu(cpu) {
190 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
191 }
192
193 return count;
194#else
195 return mnt->mnt_count;
196#endif
197}
198
141struct vfsmount *alloc_vfsmnt(const char *name) 199struct vfsmount *alloc_vfsmnt(const char *name)
142{ 200{
143 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
154 goto out_free_id; 212 goto out_free_id;
155 } 213 }
156 214
157 atomic_set(&mnt->mnt_count, 1); 215#ifdef CONFIG_SMP
216 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
217 if (!mnt->mnt_pcp)
218 goto out_free_devname;
219
220 atomic_set(&mnt->mnt_longrefs, 1);
221#else
222 mnt->mnt_count = 1;
223 mnt->mnt_writers = 0;
224#endif
225
158 INIT_LIST_HEAD(&mnt->mnt_hash); 226 INIT_LIST_HEAD(&mnt->mnt_hash);
159 INIT_LIST_HEAD(&mnt->mnt_child); 227 INIT_LIST_HEAD(&mnt->mnt_child);
160 INIT_LIST_HEAD(&mnt->mnt_mounts); 228 INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
166#ifdef CONFIG_FSNOTIFY 234#ifdef CONFIG_FSNOTIFY
167 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 235 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
168#endif 236#endif
169#ifdef CONFIG_SMP
170 mnt->mnt_writers = alloc_percpu(int);
171 if (!mnt->mnt_writers)
172 goto out_free_devname;
173#else
174 mnt->mnt_writers = 0;
175#endif
176 } 237 }
177 return mnt; 238 return mnt;
178 239
@@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly);
219static inline void mnt_inc_writers(struct vfsmount *mnt) 280static inline void mnt_inc_writers(struct vfsmount *mnt)
220{ 281{
221#ifdef CONFIG_SMP 282#ifdef CONFIG_SMP
222 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; 283 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
223#else 284#else
224 mnt->mnt_writers++; 285 mnt->mnt_writers++;
225#endif 286#endif
@@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
228static inline void mnt_dec_writers(struct vfsmount *mnt) 289static inline void mnt_dec_writers(struct vfsmount *mnt)
229{ 290{
230#ifdef CONFIG_SMP 291#ifdef CONFIG_SMP
231 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; 292 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
232#else 293#else
233 mnt->mnt_writers--; 294 mnt->mnt_writers--;
234#endif 295#endif
@@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
241 int cpu; 302 int cpu;
242 303
243 for_each_possible_cpu(cpu) { 304 for_each_possible_cpu(cpu) {
244 count += *per_cpu_ptr(mnt->mnt_writers, cpu); 305 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
245 } 306 }
246 307
247 return count; 308 return count;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
418 kfree(mnt->mnt_devname); 479 kfree(mnt->mnt_devname);
419 mnt_free_id(mnt); 480 mnt_free_id(mnt);
420#ifdef CONFIG_SMP 481#ifdef CONFIG_SMP
421 free_percpu(mnt->mnt_writers); 482 free_percpu(mnt->mnt_pcp);
422#endif 483#endif
423 kmem_cache_free(mnt_cache, mnt); 484 kmem_cache_free(mnt_cache, mnt);
424} 485}
@@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
652 return NULL; 713 return NULL;
653} 714}
654 715
655static inline void __mntput(struct vfsmount *mnt) 716static inline void mntfree(struct vfsmount *mnt)
656{ 717{
657 struct super_block *sb = mnt->mnt_sb; 718 struct super_block *sb = mnt->mnt_sb;
719
658 /* 720 /*
659 * This probably indicates that somebody messed 721 * This probably indicates that somebody messed
660 * up a mnt_want/drop_write() pair. If this 722 * up a mnt_want/drop_write() pair. If this
@@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt)
662 * to make r/w->r/o transitions. 724 * to make r/w->r/o transitions.
663 */ 725 */
664 /* 726 /*
665 * atomic_dec_and_lock() used to deal with ->mnt_count decrements 727 * The locking used to deal with mnt_count decrement provides barriers,
666 * provides barriers, so mnt_get_writers() below is safe. AV 728 * so mnt_get_writers() below is safe.
667 */ 729 */
668 WARN_ON(mnt_get_writers(mnt)); 730 WARN_ON(mnt_get_writers(mnt));
669 fsnotify_vfsmount_delete(mnt); 731 fsnotify_vfsmount_delete(mnt);
@@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt)
672 deactivate_super(sb); 734 deactivate_super(sb);
673} 735}
674 736
675void mntput_no_expire(struct vfsmount *mnt) 737#ifdef CONFIG_SMP
676{ 738static inline void __mntput(struct vfsmount *mnt, int longrefs)
677repeat: 739{
678 if (atomic_add_unless(&mnt->mnt_count, -1, 1)) 740 if (!longrefs) {
679 return; 741put_again:
742 br_read_lock(vfsmount_lock);
743 if (likely(atomic_read(&mnt->mnt_longrefs))) {
744 mnt_dec_count(mnt);
745 br_read_unlock(vfsmount_lock);
746 return;
747 }
748 br_read_unlock(vfsmount_lock);
749 } else {
750 BUG_ON(!atomic_read(&mnt->mnt_longrefs));
751 if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
752 return;
753 }
754
680 br_write_lock(vfsmount_lock); 755 br_write_lock(vfsmount_lock);
681 if (!atomic_dec_and_test(&mnt->mnt_count)) { 756 if (!longrefs)
757 mnt_dec_count(mnt);
758 else
759 atomic_dec(&mnt->mnt_longrefs);
760 if (mnt_get_count(mnt)) {
682 br_write_unlock(vfsmount_lock); 761 br_write_unlock(vfsmount_lock);
683 return; 762 return;
684 } 763 }
685 if (likely(!mnt->mnt_pinned)) { 764 if (unlikely(mnt->mnt_pinned)) {
765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
766 mnt->mnt_pinned = 0;
686 br_write_unlock(vfsmount_lock); 767 br_write_unlock(vfsmount_lock);
687 __mntput(mnt); 768 acct_auto_close_mnt(mnt);
769 goto put_again;
770 }
771 br_write_unlock(vfsmount_lock);
772 mntfree(mnt);
773}
774#else
775static inline void __mntput(struct vfsmount *mnt, int longrefs)
776{
777put_again:
778 mnt_dec_count(mnt);
779 if (likely(mnt_get_count(mnt)))
688 return; 780 return;
781 br_write_lock(vfsmount_lock);
782 if (unlikely(mnt->mnt_pinned)) {
783 mnt_add_count(mnt, mnt->mnt_pinned + 1);
784 mnt->mnt_pinned = 0;
785 br_write_unlock(vfsmount_lock);
786 acct_auto_close_mnt(mnt);
787 goto put_again;
689 } 788 }
690 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
691 mnt->mnt_pinned = 0;
692 br_write_unlock(vfsmount_lock); 789 br_write_unlock(vfsmount_lock);
693 acct_auto_close_mnt(mnt); 790 mntfree(mnt);
694 goto repeat; 791}
792#endif
793
794static void mntput_no_expire(struct vfsmount *mnt)
795{
796 __mntput(mnt, 0);
797}
798
799void mntput(struct vfsmount *mnt)
800{
801 if (mnt) {
802 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
803 if (unlikely(mnt->mnt_expiry_mark))
804 mnt->mnt_expiry_mark = 0;
805 __mntput(mnt, 0);
806 }
807}
808EXPORT_SYMBOL(mntput);
809
810struct vfsmount *mntget(struct vfsmount *mnt)
811{
812 if (mnt)
813 mnt_inc_count(mnt);
814 return mnt;
815}
816EXPORT_SYMBOL(mntget);
817
818void mntput_long(struct vfsmount *mnt)
819{
820#ifdef CONFIG_SMP
821 if (mnt) {
822 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
823 if (unlikely(mnt->mnt_expiry_mark))
824 mnt->mnt_expiry_mark = 0;
825 __mntput(mnt, 1);
826 }
827#else
828 mntput(mnt);
829#endif
695} 830}
696EXPORT_SYMBOL(mntput_no_expire); 831EXPORT_SYMBOL(mntput_long);
832
833struct vfsmount *mntget_long(struct vfsmount *mnt)
834{
835#ifdef CONFIG_SMP
836 if (mnt)
837 atomic_inc(&mnt->mnt_longrefs);
838 return mnt;
839#else
840 return mntget(mnt);
841#endif
842}
843EXPORT_SYMBOL(mntget_long);
697 844
698void mnt_pin(struct vfsmount *mnt) 845void mnt_pin(struct vfsmount *mnt)
699{ 846{
@@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
701 mnt->mnt_pinned++; 848 mnt->mnt_pinned++;
702 br_write_unlock(vfsmount_lock); 849 br_write_unlock(vfsmount_lock);
703} 850}
704
705EXPORT_SYMBOL(mnt_pin); 851EXPORT_SYMBOL(mnt_pin);
706 852
707void mnt_unpin(struct vfsmount *mnt) 853void mnt_unpin(struct vfsmount *mnt)
708{ 854{
709 br_write_lock(vfsmount_lock); 855 br_write_lock(vfsmount_lock);
710 if (mnt->mnt_pinned) { 856 if (mnt->mnt_pinned) {
711 atomic_inc(&mnt->mnt_count); 857 mnt_inc_count(mnt);
712 mnt->mnt_pinned--; 858 mnt->mnt_pinned--;
713 } 859 }
714 br_write_unlock(vfsmount_lock); 860 br_write_unlock(vfsmount_lock);
715} 861}
716
717EXPORT_SYMBOL(mnt_unpin); 862EXPORT_SYMBOL(mnt_unpin);
718 863
719static inline void mangle(struct seq_file *m, const char *s) 864static inline void mangle(struct seq_file *m, const char *s)
@@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
1008 int minimum_refs = 0; 1153 int minimum_refs = 0;
1009 struct vfsmount *p; 1154 struct vfsmount *p;
1010 1155
1011 br_read_lock(vfsmount_lock); 1156 /* write lock needed for mnt_get_count */
1157 br_write_lock(vfsmount_lock);
1012 for (p = mnt; p; p = next_mnt(p, mnt)) { 1158 for (p = mnt; p; p = next_mnt(p, mnt)) {
1013 actual_refs += atomic_read(&p->mnt_count); 1159 actual_refs += mnt_get_count(p);
1014 minimum_refs += 2; 1160 minimum_refs += 2;
1015 } 1161 }
1016 br_read_unlock(vfsmount_lock); 1162 br_write_unlock(vfsmount_lock);
1017 1163
1018 if (actual_refs > minimum_refs) 1164 if (actual_refs > minimum_refs)
1019 return 0; 1165 return 0;
@@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
1040{ 1186{
1041 int ret = 1; 1187 int ret = 1;
1042 down_read(&namespace_sem); 1188 down_read(&namespace_sem);
1043 br_read_lock(vfsmount_lock); 1189 br_write_lock(vfsmount_lock);
1044 if (propagate_mount_busy(mnt, 2)) 1190 if (propagate_mount_busy(mnt, 2))
1045 ret = 0; 1191 ret = 0;
1046 br_read_unlock(vfsmount_lock); 1192 br_write_unlock(vfsmount_lock);
1047 up_read(&namespace_sem); 1193 up_read(&namespace_sem);
1048 return ret; 1194 return ret;
1049} 1195}
@@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head)
1070 dput(dentry); 1216 dput(dentry);
1071 mntput(m); 1217 mntput(m);
1072 } 1218 }
1073 mntput(mnt); 1219 mntput_long(mnt);
1074 } 1220 }
1075} 1221}
1076 1222
@@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
1125 flags & (MNT_FORCE | MNT_DETACH)) 1271 flags & (MNT_FORCE | MNT_DETACH))
1126 return -EINVAL; 1272 return -EINVAL;
1127 1273
1128 if (atomic_read(&mnt->mnt_count) != 2) 1274 /*
1275 * probably don't strictly need the lock here if we examined
1276 * all race cases, but it's a slowpath.
1277 */
1278 br_write_lock(vfsmount_lock);
1279 if (mnt_get_count(mnt) != 2) {
1280 br_write_lock(vfsmount_lock);
1129 return -EBUSY; 1281 return -EBUSY;
1282 }
1283 br_write_unlock(vfsmount_lock);
1130 1284
1131 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1285 if (!xchg(&mnt->mnt_expiry_mark, 1))
1132 return -EAGAIN; 1286 return -EAGAIN;
@@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1815 1969
1816unlock: 1970unlock:
1817 up_write(&namespace_sem); 1971 up_write(&namespace_sem);
1818 mntput(newmnt); 1972 mntput_long(newmnt);
1819 return err; 1973 return err;
1820} 1974}
1821 1975
@@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2148 if (fs) { 2302 if (fs) {
2149 if (p == fs->root.mnt) { 2303 if (p == fs->root.mnt) {
2150 rootmnt = p; 2304 rootmnt = p;
2151 fs->root.mnt = mntget(q); 2305 fs->root.mnt = mntget_long(q);
2152 } 2306 }
2153 if (p == fs->pwd.mnt) { 2307 if (p == fs->pwd.mnt) {
2154 pwdmnt = p; 2308 pwdmnt = p;
2155 fs->pwd.mnt = mntget(q); 2309 fs->pwd.mnt = mntget_long(q);
2156 } 2310 }
2157 } 2311 }
2158 p = next_mnt(p, mnt_ns->root); 2312 p = next_mnt(p, mnt_ns->root);
@@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2161 up_write(&namespace_sem); 2315 up_write(&namespace_sem);
2162 2316
2163 if (rootmnt) 2317 if (rootmnt)
2164 mntput(rootmnt); 2318 mntput_long(rootmnt);
2165 if (pwdmnt) 2319 if (pwdmnt)
2166 mntput(pwdmnt); 2320 mntput_long(pwdmnt);
2167 2321
2168 return new_ns; 2322 return new_ns;
2169} 2323}
@@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2350 touch_mnt_namespace(current->nsproxy->mnt_ns); 2504 touch_mnt_namespace(current->nsproxy->mnt_ns);
2351 br_write_unlock(vfsmount_lock); 2505 br_write_unlock(vfsmount_lock);
2352 chroot_fs_refs(&root, &new); 2506 chroot_fs_refs(&root, &new);
2507
2353 error = 0; 2508 error = 0;
2354 path_put(&root_parent); 2509 path_put(&root_parent);
2355 path_put(&parent_path); 2510 path_put(&parent_path);
@@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void)
2376 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2531 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2377 if (IS_ERR(mnt)) 2532 if (IS_ERR(mnt))
2378 panic("Can't create rootfs"); 2533 panic("Can't create rootfs");
2534
2379 ns = create_mnt_ns(mnt); 2535 ns = create_mnt_ns(mnt);
2380 if (IS_ERR(ns)) 2536 if (IS_ERR(ns))
2381 panic("Can't allocate initial namespace"); 2537 panic("Can't allocate initial namespace");
diff --git a/fs/pipe.c b/fs/pipe.c
index cfe3a7f2ee21..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
1292static void __exit exit_pipe_fs(void) 1292static void __exit exit_pipe_fs(void)
1293{ 1293{
1294 unregister_filesystem(&pipe_fs_type); 1294 unregister_filesystem(&pipe_fs_type);
1295 mntput(pipe_mnt); 1295 mntput_long(pipe_mnt);
1296} 1296}
1297 1297
1298fs_initcall(init_pipe_fs); 1298fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..d42514e32380 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
288 */ 288 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 289static inline int do_refcount_check(struct vfsmount *mnt, int count)
290{ 290{
291 int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 292 return (mycount > count);
293} 293}
294 294
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
300 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
301 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 * 302 *
303 * vfsmount lock must be held for read or write 303 * vfsmount lock must be held for write
304 */ 304 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
306{ 306{
diff --git a/fs/super.c b/fs/super.c
index 968ba013011a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1140 return mnt; 1140 return mnt;
1141 1141
1142 err: 1142 err:
1143 mntput(mnt); 1143 mntput_long(mnt);
1144 return ERR_PTR(err); 1144 return ERR_PTR(err);
1145} 1145}
1146 1146
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/nodemask.h> 14#include <linux/nodemask.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/seqlock.h>
16#include <asm/atomic.h> 17#include <asm/atomic.h>
17 18
18struct super_block; 19struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
46 47
47#define MNT_INTERNAL 0x4000 48#define MNT_INTERNAL 0x4000
48 49
50struct mnt_pcp {
51 int mnt_count;
52 int mnt_writers;
53};
54
49struct vfsmount { 55struct vfsmount {
50 struct list_head mnt_hash; 56 struct list_head mnt_hash;
51 struct vfsmount *mnt_parent; /* fs we are mounted on */ 57 struct vfsmount *mnt_parent; /* fs we are mounted on */
52 struct dentry *mnt_mountpoint; /* dentry of mountpoint */ 58 struct dentry *mnt_mountpoint; /* dentry of mountpoint */
53 struct dentry *mnt_root; /* root of the mounted tree */ 59 struct dentry *mnt_root; /* root of the mounted tree */
54 struct super_block *mnt_sb; /* pointer to superblock */ 60 struct super_block *mnt_sb; /* pointer to superblock */
61#ifdef CONFIG_SMP
62 struct mnt_pcp __percpu *mnt_pcp;
63 atomic_t mnt_longrefs;
64#else
65 int mnt_count;
66 int mnt_writers;
67#endif
55 struct list_head mnt_mounts; /* list of children, anchored here */ 68 struct list_head mnt_mounts; /* list of children, anchored here */
56 struct list_head mnt_child; /* and going through their mnt_child */ 69 struct list_head mnt_child; /* and going through their mnt_child */
57 int mnt_flags; 70 int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
70 struct mnt_namespace *mnt_ns; /* containing namespace */ 83 struct mnt_namespace *mnt_ns; /* containing namespace */
71 int mnt_id; /* mount identifier */ 84 int mnt_id; /* mount identifier */
72 int mnt_group_id; /* peer group identifier */ 85 int mnt_group_id; /* peer group identifier */
73 /*
74 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
75 * to let these frequently modified fields in a separate cache line
76 * (so that reads of mnt_flags wont ping-pong on SMP machines)
77 */
78 atomic_t mnt_count;
79 int mnt_expiry_mark; /* true if marked for expiry */ 86 int mnt_expiry_mark; /* true if marked for expiry */
80 int mnt_pinned; 87 int mnt_pinned;
81 int mnt_ghosts; 88 int mnt_ghosts;
82#ifdef CONFIG_SMP
83 int __percpu *mnt_writers;
84#else
85 int mnt_writers;
86#endif
87}; 89};
88 90
89static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
90{
91#ifdef CONFIG_SMP
92 return mnt->mnt_writers;
93#else
94 return &mnt->mnt_writers;
95#endif
96}
97
98static inline struct vfsmount *mntget(struct vfsmount *mnt)
99{
100 if (mnt)
101 atomic_inc(&mnt->mnt_count);
102 return mnt;
103}
104
105struct file; /* forward dec */ 91struct file; /* forward dec */
106 92
107extern int mnt_want_write(struct vfsmount *mnt); 93extern int mnt_want_write(struct vfsmount *mnt);
108extern int mnt_want_write_file(struct file *file); 94extern int mnt_want_write_file(struct file *file);
109extern int mnt_clone_write(struct vfsmount *mnt); 95extern int mnt_clone_write(struct vfsmount *mnt);
110extern void mnt_drop_write(struct vfsmount *mnt); 96extern void mnt_drop_write(struct vfsmount *mnt);
111extern void mntput_no_expire(struct vfsmount *mnt); 97extern void mntput(struct vfsmount *mnt);
98extern struct vfsmount *mntget(struct vfsmount *mnt);
99extern void mntput_long(struct vfsmount *mnt);
100extern struct vfsmount *mntget_long(struct vfsmount *mnt);
112extern void mnt_pin(struct vfsmount *mnt); 101extern void mnt_pin(struct vfsmount *mnt);
113extern void mnt_unpin(struct vfsmount *mnt); 102extern void mnt_unpin(struct vfsmount *mnt);
114extern int __mnt_is_readonly(struct vfsmount *mnt); 103extern int __mnt_is_readonly(struct vfsmount *mnt);
115 104
116static inline void mntput(struct vfsmount *mnt)
117{
118 if (mnt) {
119 mnt->mnt_expiry_mark = 0;
120 mntput_no_expire(mnt);
121 }
122}
123
124extern struct vfsmount *do_kern_mount(const char *fstype, int flags, 105extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
125 const char *name, void *data); 106 const char *name, void *data);
126 107
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..a581e8c06533 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,7 +10,9 @@ struct path {
10}; 10};
11 11
12extern void path_get(struct path *); 12extern void path_get(struct path *);
13extern void path_get_long(struct path *);
13extern void path_put(struct path *); 14extern void path_put(struct path *);
15extern void path_put_long(struct path *);
14 16
15static inline int path_equal(const struct path *path1, const struct path *path2) 17static inline int path_equal(const struct path *path1, const struct path *path2)
16{ 18{
diff --git a/net/socket.c b/net/socket.c
index 0ee74c325320..815bba3d2fe0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister);
2390 2390
2391static int __init sock_init(void) 2391static int __init sock_init(void)
2392{ 2392{
2393 int err;
2394
2393 /* 2395 /*
2394 * Initialize sock SLAB cache. 2396 * Initialize sock SLAB cache.
2395 */ 2397 */
@@ -2406,8 +2408,15 @@ static int __init sock_init(void)
2406 */ 2408 */
2407 2409
2408 init_inodecache(); 2410 init_inodecache();
2409 register_filesystem(&sock_fs_type); 2411
2412 err = register_filesystem(&sock_fs_type);
2413 if (err)
2414 goto out_fs;
2410 sock_mnt = kern_mount(&sock_fs_type); 2415 sock_mnt = kern_mount(&sock_fs_type);
2416 if (IS_ERR(sock_mnt)) {
2417 err = PTR_ERR(sock_mnt);
2418 goto out_mount;
2419 }
2411 2420
2412 /* The real protocol initialization is performed in later initcalls. 2421 /* The real protocol initialization is performed in later initcalls.
2413 */ 2422 */
@@ -2420,7 +2429,13 @@ static int __init sock_init(void)
2420 skb_timestamping_init(); 2429 skb_timestamping_init();
2421#endif 2430#endif
2422 2431
2423 return 0; 2432out:
2433 return err;
2434
2435out_mount:
2436 unregister_filesystem(&sock_fs_type);
2437out_fs:
2438 goto out;
2424} 2439}
2425 2440
2426core_initcall(sock_init); /* early initcall */ 2441core_initcall(sock_init); /* early initcall */