aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2011-01-14 22:30:21 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2011-01-16 13:47:07 -0500
commitf03c65993b98eeb909a4012ce7833c5857d74755 (patch)
treea6dd5e353889b7fe4ab87c54170d09443d788fec /fs
parent7b8a53fd815deb39542085897743fa0063f9fe06 (diff)
sanitize vfsmount refcounting changes
Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/fs_struct.c35
-rw-r--r--fs/internal.h3
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c116
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/super.c2
7 files changed, 73 insertions, 111 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index cbe57f3c4d89..c5567cb78432 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
233 return 0; 233 return 0;
234 234
235err_mntput: 235err_mntput:
236 mntput_long(anon_inode_mnt); 236 mntput(anon_inode_mnt);
237err_unregister_filesystem: 237err_unregister_filesystem:
238 unregister_filesystem(&anon_inode_fs_type); 238 unregister_filesystem(&anon_inode_fs_type);
239err_exit: 239err_exit:
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 68ca487bedb1..78b519c13536 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -4,6 +4,19 @@
4#include <linux/path.h> 4#include <linux/path.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/fs_struct.h> 6#include <linux/fs_struct.h>
7#include "internal.h"
8
9static inline void path_get_longterm(struct path *path)
10{
11 path_get(path);
12 mnt_make_longterm(path->mnt);
13}
14
15static inline void path_put_longterm(struct path *path)
16{
17 mnt_make_shortterm(path->mnt);
18 path_put(path);
19}
7 20
8/* 21/*
9 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 22 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
17 write_seqcount_begin(&fs->seq); 30 write_seqcount_begin(&fs->seq);
18 old_root = fs->root; 31 old_root = fs->root;
19 fs->root = *path; 32 fs->root = *path;
20 path_get_long(path); 33 path_get_longterm(path);
21 write_seqcount_end(&fs->seq); 34 write_seqcount_end(&fs->seq);
22 spin_unlock(&fs->lock); 35 spin_unlock(&fs->lock);
23 if (old_root.dentry) 36 if (old_root.dentry)
24 path_put_long(&old_root); 37 path_put_longterm(&old_root);
25} 38}
26 39
27/* 40/*
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
36 write_seqcount_begin(&fs->seq); 49 write_seqcount_begin(&fs->seq);
37 old_pwd = fs->pwd; 50 old_pwd = fs->pwd;
38 fs->pwd = *path; 51 fs->pwd = *path;
39 path_get_long(path); 52 path_get_longterm(path);
40 write_seqcount_end(&fs->seq); 53 write_seqcount_end(&fs->seq);
41 spin_unlock(&fs->lock); 54 spin_unlock(&fs->lock);
42 55
43 if (old_pwd.dentry) 56 if (old_pwd.dentry)
44 path_put_long(&old_pwd); 57 path_put_longterm(&old_pwd);
45} 58}
46 59
47void chroot_fs_refs(struct path *old_root, struct path *new_root) 60void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
59 write_seqcount_begin(&fs->seq); 72 write_seqcount_begin(&fs->seq);
60 if (fs->root.dentry == old_root->dentry 73 if (fs->root.dentry == old_root->dentry
61 && fs->root.mnt == old_root->mnt) { 74 && fs->root.mnt == old_root->mnt) {
62 path_get_long(new_root); 75 path_get_longterm(new_root);
63 fs->root = *new_root; 76 fs->root = *new_root;
64 count++; 77 count++;
65 } 78 }
66 if (fs->pwd.dentry == old_root->dentry 79 if (fs->pwd.dentry == old_root->dentry
67 && fs->pwd.mnt == old_root->mnt) { 80 && fs->pwd.mnt == old_root->mnt) {
68 path_get_long(new_root); 81 path_get_longterm(new_root);
69 fs->pwd = *new_root; 82 fs->pwd = *new_root;
70 count++; 83 count++;
71 } 84 }
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
76 } while_each_thread(g, p); 89 } while_each_thread(g, p);
77 read_unlock(&tasklist_lock); 90 read_unlock(&tasklist_lock);
78 while (count--) 91 while (count--)
79 path_put_long(old_root); 92 path_put_longterm(old_root);
80} 93}
81 94
82void free_fs_struct(struct fs_struct *fs) 95void free_fs_struct(struct fs_struct *fs)
83{ 96{
84 path_put_long(&fs->root); 97 path_put_longterm(&fs->root);
85 path_put_long(&fs->pwd); 98 path_put_longterm(&fs->pwd);
86 kmem_cache_free(fs_cachep, fs); 99 kmem_cache_free(fs_cachep, fs);
87} 100}
88 101
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
118 131
119 spin_lock(&old->lock); 132 spin_lock(&old->lock);
120 fs->root = old->root; 133 fs->root = old->root;
121 path_get_long(&fs->root); 134 path_get_longterm(&fs->root);
122 fs->pwd = old->pwd; 135 fs->pwd = old->pwd;
123 path_get_long(&fs->pwd); 136 path_get_longterm(&fs->pwd);
124 spin_unlock(&old->lock); 137 spin_unlock(&old->lock);
125 } 138 }
126 return fs; 139 return fs;
diff --git a/fs/internal.h b/fs/internal.h
index 4931060fd089..12ccb86edef7 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
73extern int do_add_mount(struct vfsmount *, struct path *, int); 73extern int do_add_mount(struct vfsmount *, struct path *, int);
74extern void mnt_clear_expiry(struct vfsmount *); 74extern void mnt_clear_expiry(struct vfsmount *);
75 75
76extern void mnt_make_longterm(struct vfsmount *);
77extern void mnt_make_shortterm(struct vfsmount *);
78
76extern void __init mnt_init(void); 79extern void __init mnt_init(void);
77 80
78DECLARE_BRLOCK(vfsmount_lock); 81DECLARE_BRLOCK(vfsmount_lock);
diff --git a/fs/namei.c b/fs/namei.c
index c2e37727e3ab..8f7b41a14882 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
368EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
369 369
370/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
383 * path_put - put a reference to a path 371 * path_put - put a reference to a path
384 * @path: path to put the reference to 372 * @path: path to put the reference to
385 * 373 *
@@ -393,18 +381,6 @@ void path_put(struct path *path)
393EXPORT_SYMBOL(path_put); 381EXPORT_SYMBOL(path_put);
394 382
395/** 383/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk 384 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop 385 * @nd: nameidata pathwalk data to drop
410 * Returns: 0 on success, -ECHILD on failure 386 * Returns: 0 on success, -ECHILD on failure
diff --git a/fs/namespace.c b/fs/namespace.c
index d7fc05fac753..48809e21f270 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
183unsigned int mnt_get_count(struct vfsmount *mnt) 183unsigned int mnt_get_count(struct vfsmount *mnt)
184{ 184{
185#ifdef CONFIG_SMP 185#ifdef CONFIG_SMP
186 unsigned int count = atomic_read(&mnt->mnt_longrefs); 186 unsigned int count = 0;
187 int cpu; 187 int cpu;
188 188
189 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
217 if (!mnt->mnt_pcp) 217 if (!mnt->mnt_pcp)
218 goto out_free_devname; 218 goto out_free_devname;
219 219
220 atomic_set(&mnt->mnt_longrefs, 1); 220 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
221#else 221#else
222 mnt->mnt_count = 1; 222 mnt->mnt_count = 1;
223 mnt->mnt_writers = 0; 223 mnt->mnt_writers = 0;
@@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt)
624 BUG_ON(parent == mnt); 624 BUG_ON(parent == mnt);
625 625
626 list_add_tail(&head, &mnt->mnt_list); 626 list_add_tail(&head, &mnt->mnt_list);
627 list_for_each_entry(m, &head, mnt_list) 627 list_for_each_entry(m, &head, mnt_list) {
628 m->mnt_ns = n; 628 m->mnt_ns = n;
629 atomic_inc(&m->mnt_longterm);
630 }
631
629 list_splice(&head, n->list.prev); 632 list_splice(&head, n->list.prev);
630 633
631 list_add_tail(&mnt->mnt_hash, mount_hashtable + 634 list_add_tail(&mnt->mnt_hash, mount_hashtable +
@@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt)
734 deactivate_super(sb); 737 deactivate_super(sb);
735} 738}
736 739
737#ifdef CONFIG_SMP 740static void mntput_no_expire(struct vfsmount *mnt)
738static inline void __mntput(struct vfsmount *mnt, int longrefs)
739{ 741{
740 if (!longrefs) {
741put_again: 742put_again:
742 br_read_lock(vfsmount_lock); 743#ifdef CONFIG_SMP
743 if (likely(atomic_read(&mnt->mnt_longrefs))) { 744 br_read_lock(vfsmount_lock);
744 mnt_dec_count(mnt); 745 if (likely(atomic_read(&mnt->mnt_longterm))) {
745 br_read_unlock(vfsmount_lock); 746 mnt_dec_count(mnt);
746 return;
747 }
748 br_read_unlock(vfsmount_lock); 747 br_read_unlock(vfsmount_lock);
749 } else { 748 return;
750 BUG_ON(!atomic_read(&mnt->mnt_longrefs));
751 if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
752 return;
753 } 749 }
750 br_read_unlock(vfsmount_lock);
754 751
755 br_write_lock(vfsmount_lock); 752 br_write_lock(vfsmount_lock);
756 if (!longrefs) 753 mnt_dec_count(mnt);
757 mnt_dec_count(mnt);
758 else
759 atomic_dec(&mnt->mnt_longrefs);
760 if (mnt_get_count(mnt)) { 754 if (mnt_get_count(mnt)) {
761 br_write_unlock(vfsmount_lock); 755 br_write_unlock(vfsmount_lock);
762 return; 756 return;
763 } 757 }
764 if (unlikely(mnt->mnt_pinned)) {
765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
766 mnt->mnt_pinned = 0;
767 br_write_unlock(vfsmount_lock);
768 acct_auto_close_mnt(mnt);
769 goto put_again;
770 }
771 br_write_unlock(vfsmount_lock);
772 mntfree(mnt);
773}
774#else 758#else
775static inline void __mntput(struct vfsmount *mnt, int longrefs)
776{
777put_again:
778 mnt_dec_count(mnt); 759 mnt_dec_count(mnt);
779 if (likely(mnt_get_count(mnt))) 760 if (likely(mnt_get_count(mnt)))
780 return; 761 return;
781 br_write_lock(vfsmount_lock); 762 br_write_lock(vfsmount_lock);
763#endif
782 if (unlikely(mnt->mnt_pinned)) { 764 if (unlikely(mnt->mnt_pinned)) {
783 mnt_add_count(mnt, mnt->mnt_pinned + 1); 765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
784 mnt->mnt_pinned = 0; 766 mnt->mnt_pinned = 0;
@@ -789,12 +771,6 @@ put_again:
789 br_write_unlock(vfsmount_lock); 771 br_write_unlock(vfsmount_lock);
790 mntfree(mnt); 772 mntfree(mnt);
791} 773}
792#endif
793
794static void mntput_no_expire(struct vfsmount *mnt)
795{
796 __mntput(mnt, 0);
797}
798 774
799void mntput(struct vfsmount *mnt) 775void mntput(struct vfsmount *mnt)
800{ 776{
@@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt)
802 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 778 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
803 if (unlikely(mnt->mnt_expiry_mark)) 779 if (unlikely(mnt->mnt_expiry_mark))
804 mnt->mnt_expiry_mark = 0; 780 mnt->mnt_expiry_mark = 0;
805 __mntput(mnt, 0); 781 mntput_no_expire(mnt);
806 } 782 }
807} 783}
808EXPORT_SYMBOL(mntput); 784EXPORT_SYMBOL(mntput);
@@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
815} 791}
816EXPORT_SYMBOL(mntget); 792EXPORT_SYMBOL(mntget);
817 793
818void mntput_long(struct vfsmount *mnt)
819{
820#ifdef CONFIG_SMP
821 if (mnt) {
822 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
823 if (unlikely(mnt->mnt_expiry_mark))
824 mnt->mnt_expiry_mark = 0;
825 __mntput(mnt, 1);
826 }
827#else
828 mntput(mnt);
829#endif
830}
831EXPORT_SYMBOL(mntput_long);
832
833struct vfsmount *mntget_long(struct vfsmount *mnt)
834{
835#ifdef CONFIG_SMP
836 if (mnt)
837 atomic_inc(&mnt->mnt_longrefs);
838 return mnt;
839#else
840 return mntget(mnt);
841#endif
842}
843EXPORT_SYMBOL(mntget_long);
844
845void mnt_pin(struct vfsmount *mnt) 794void mnt_pin(struct vfsmount *mnt)
846{ 795{
847 br_write_lock(vfsmount_lock); 796 br_write_lock(vfsmount_lock);
@@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head)
1216 dput(dentry); 1165 dput(dentry);
1217 mntput(m); 1166 mntput(m);
1218 } 1167 }
1219 mntput_long(mnt); 1168 mntput(mnt);
1220 } 1169 }
1221} 1170}
1222 1171
@@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1240 list_del_init(&p->mnt_list); 1189 list_del_init(&p->mnt_list);
1241 __touch_mnt_namespace(p->mnt_ns); 1190 __touch_mnt_namespace(p->mnt_ns);
1242 p->mnt_ns = NULL; 1191 p->mnt_ns = NULL;
1192 atomic_dec(&p->mnt_longterm);
1243 list_del_init(&p->mnt_child); 1193 list_del_init(&p->mnt_child);
1244 if (p->mnt_parent != p) { 1194 if (p->mnt_parent != p) {
1245 p->mnt_parent->mnt_ghosts++; 1195 p->mnt_parent->mnt_ghosts++;
@@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
1969 1919
1970unlock: 1920unlock:
1971 up_write(&namespace_sem); 1921 up_write(&namespace_sem);
1972 mntput_long(newmnt); 1922 mntput(newmnt);
1973 return err; 1923 return err;
1974} 1924}
1975 1925
@@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2291 return new_ns; 2241 return new_ns;
2292} 2242}
2293 2243
2244void mnt_make_longterm(struct vfsmount *mnt)
2245{
2246 atomic_inc(&mnt->mnt_longterm);
2247}
2248
2249void mnt_make_shortterm(struct vfsmount *mnt)
2250{
2251 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
2252 return;
2253 br_write_lock(vfsmount_lock);
2254 atomic_dec(&mnt->mnt_longterm);
2255 br_write_unlock(vfsmount_lock);
2256}
2257
2294/* 2258/*
2295 * Allocate a new namespace structure and populate it with contents 2259 * Allocate a new namespace structure and populate it with contents
2296 * copied from the namespace of the passed in task structure. 2260 * copied from the namespace of the passed in task structure.
@@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2328 q = new_ns->root; 2292 q = new_ns->root;
2329 while (p) { 2293 while (p) {
2330 q->mnt_ns = new_ns; 2294 q->mnt_ns = new_ns;
2295 atomic_inc(&q->mnt_longterm);
2331 if (fs) { 2296 if (fs) {
2332 if (p == fs->root.mnt) { 2297 if (p == fs->root.mnt) {
2298 fs->root.mnt = mntget(q);
2299 atomic_inc(&q->mnt_longterm);
2300 mnt_make_shortterm(p);
2333 rootmnt = p; 2301 rootmnt = p;
2334 fs->root.mnt = mntget_long(q);
2335 } 2302 }
2336 if (p == fs->pwd.mnt) { 2303 if (p == fs->pwd.mnt) {
2304 fs->pwd.mnt = mntget(q);
2305 atomic_inc(&q->mnt_longterm);
2306 mnt_make_shortterm(p);
2337 pwdmnt = p; 2307 pwdmnt = p;
2338 fs->pwd.mnt = mntget_long(q);
2339 } 2308 }
2340 } 2309 }
2341 p = next_mnt(p, mnt_ns->root); 2310 p = next_mnt(p, mnt_ns->root);
@@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2344 up_write(&namespace_sem); 2313 up_write(&namespace_sem);
2345 2314
2346 if (rootmnt) 2315 if (rootmnt)
2347 mntput_long(rootmnt); 2316 mntput(rootmnt);
2348 if (pwdmnt) 2317 if (pwdmnt)
2349 mntput_long(pwdmnt); 2318 mntput(pwdmnt);
2350 2319
2351 return new_ns; 2320 return new_ns;
2352} 2321}
@@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2379 new_ns = alloc_mnt_ns(); 2348 new_ns = alloc_mnt_ns();
2380 if (!IS_ERR(new_ns)) { 2349 if (!IS_ERR(new_ns)) {
2381 mnt->mnt_ns = new_ns; 2350 mnt->mnt_ns = new_ns;
2351 atomic_inc(&mnt->mnt_longterm);
2382 new_ns->root = mnt; 2352 new_ns->root = mnt;
2383 list_add(&new_ns->list, &new_ns->root->mnt_list); 2353 list_add(&new_ns->list, &new_ns->root->mnt_list);
2384 } 2354 }
diff --git a/fs/pipe.c b/fs/pipe.c
index e2e95fb46a1e..89e9e19b1b2e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
1292static void __exit exit_pipe_fs(void) 1292static void __exit exit_pipe_fs(void)
1293{ 1293{
1294 unregister_filesystem(&pipe_fs_type); 1294 unregister_filesystem(&pipe_fs_type);
1295 mntput_long(pipe_mnt); 1295 mntput(pipe_mnt);
1296} 1296}
1297 1297
1298fs_initcall(init_pipe_fs); 1298fs_initcall(init_pipe_fs);
diff --git a/fs/super.c b/fs/super.c
index 4f6a3571a634..74e149efed81 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1141 return mnt; 1141 return mnt;
1142 1142
1143 err: 1143 err:
1144 mntput_long(mnt); 1144 mntput(mnt);
1145 return ERR_PTR(err); 1145 return ERR_PTR(err);
1146} 1146}
1147 1147