summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-20 12:15:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-20 12:15:51 -0400
commit18253e034d2aeee140f82fc9fe89c4bce5c81799 (patch)
tree0a01610fbe16b24800977d2c7a3b93721ca5639c
parentabdfd52a295fb5731ab07b5c9013e2e39f4d1cbe (diff)
parent56cbb429d911991170fe867b4bba14f0efed5829 (diff)
Merge branch 'work.dcache2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull dcache and mountpoint updates from Al Viro: "Saner handling of refcounts to mountpoints. Transfer the counting reference from struct mount ->mnt_mountpoint over to struct mountpoint ->m_dentry. That allows us to get rid of the convoluted games with ordering of mount shutdowns. The cost is in teaching shrink_dcache_{parent,for_umount} to cope with mixed-filesystem shrink lists, which we'll also need for the Slab Movable Objects patchset" * 'work.dcache2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: switch the remnants of releasing the mountpoint away from fs_pin get rid of detach_mnt() make struct mountpoint bear the dentry reference to mountpoint, not struct mount Teach shrink_dcache_parent() to cope with mixed-filesystem shrink lists fs/namespace.c: shift put_mountpoint() to callers of unhash_mnt() __detach_mounts(): lookup_mountpoint() can't return ERR_PTR() anymore nfs: dget_parent() never returns NULL ceph: don't open-code the check for dead lockref
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/dcache.c100
-rw-r--r--fs/fs_pin.c10
-rw-r--r--fs/internal.h2
-rw-r--r--fs/mount.h8
-rw-r--r--fs/namespace.c159
-rw-r--r--fs/nfs/super.c6
-rw-r--r--include/linux/fs_pin.h1
8 files changed, 172 insertions, 116 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index aab29f48c62d..4ca0b8ff9a72 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1267,7 +1267,7 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
1267 if (!spin_trylock(&dentry->d_lock)) 1267 if (!spin_trylock(&dentry->d_lock))
1268 continue; 1268 continue;
1269 1269
1270 if (dentry->d_lockref.count < 0) { 1270 if (__lockref_is_dead(&dentry->d_lockref)) {
1271 list_del_init(&di->lease_list); 1271 list_del_init(&di->lease_list);
1272 goto next; 1272 goto next;
1273 } 1273 }
diff --git a/fs/dcache.c b/fs/dcache.c
index f41121e5d1ec..e88cf0554e65 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -861,6 +861,32 @@ void dput(struct dentry *dentry)
861} 861}
862EXPORT_SYMBOL(dput); 862EXPORT_SYMBOL(dput);
863 863
864static void __dput_to_list(struct dentry *dentry, struct list_head *list)
865__must_hold(&dentry->d_lock)
866{
867 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
868 /* let the owner of the list it's on deal with it */
869 --dentry->d_lockref.count;
870 } else {
871 if (dentry->d_flags & DCACHE_LRU_LIST)
872 d_lru_del(dentry);
873 if (!--dentry->d_lockref.count)
874 d_shrink_add(dentry, list);
875 }
876}
877
878void dput_to_list(struct dentry *dentry, struct list_head *list)
879{
880 rcu_read_lock();
881 if (likely(fast_dput(dentry))) {
882 rcu_read_unlock();
883 return;
884 }
885 rcu_read_unlock();
886 if (!retain_dentry(dentry))
887 __dput_to_list(dentry, list);
888 spin_unlock(&dentry->d_lock);
889}
864 890
865/* This must be called with d_lock held */ 891/* This must be called with d_lock held */
866static inline void __dget_dlock(struct dentry *dentry) 892static inline void __dget_dlock(struct dentry *dentry)
@@ -1067,7 +1093,7 @@ out:
1067 return false; 1093 return false;
1068} 1094}
1069 1095
1070static void shrink_dentry_list(struct list_head *list) 1096void shrink_dentry_list(struct list_head *list)
1071{ 1097{
1072 while (!list_empty(list)) { 1098 while (!list_empty(list)) {
1073 struct dentry *dentry, *parent; 1099 struct dentry *dentry, *parent;
@@ -1089,18 +1115,9 @@ static void shrink_dentry_list(struct list_head *list)
1089 rcu_read_unlock(); 1115 rcu_read_unlock();
1090 d_shrink_del(dentry); 1116 d_shrink_del(dentry);
1091 parent = dentry->d_parent; 1117 parent = dentry->d_parent;
1118 if (parent != dentry)
1119 __dput_to_list(parent, list);
1092 __dentry_kill(dentry); 1120 __dentry_kill(dentry);
1093 if (parent == dentry)
1094 continue;
1095 /*
1096 * We need to prune ancestors too. This is necessary to prevent
1097 * quadratic behavior of shrink_dcache_parent(), but is also
1098 * expected to be beneficial in reducing dentry cache
1099 * fragmentation.
1100 */
1101 dentry = parent;
1102 while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
1103 dentry = dentry_kill(dentry);
1104 } 1121 }
1105} 1122}
1106 1123
@@ -1445,8 +1462,11 @@ out:
1445 1462
1446struct select_data { 1463struct select_data {
1447 struct dentry *start; 1464 struct dentry *start;
1465 union {
1466 long found;
1467 struct dentry *victim;
1468 };
1448 struct list_head dispose; 1469 struct list_head dispose;
1449 int found;
1450}; 1470};
1451 1471
1452static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) 1472static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
@@ -1478,6 +1498,37 @@ out:
1478 return ret; 1498 return ret;
1479} 1499}
1480 1500
1501static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
1502{
1503 struct select_data *data = _data;
1504 enum d_walk_ret ret = D_WALK_CONTINUE;
1505
1506 if (data->start == dentry)
1507 goto out;
1508
1509 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1510 if (!dentry->d_lockref.count) {
1511 rcu_read_lock();
1512 data->victim = dentry;
1513 return D_WALK_QUIT;
1514 }
1515 } else {
1516 if (dentry->d_flags & DCACHE_LRU_LIST)
1517 d_lru_del(dentry);
1518 if (!dentry->d_lockref.count)
1519 d_shrink_add(dentry, &data->dispose);
1520 }
1521 /*
1522 * We can return to the caller if we have found some (this
1523 * ensures forward progress). We'll be coming back to find
1524 * the rest.
1525 */
1526 if (!list_empty(&data->dispose))
1527 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1528out:
1529 return ret;
1530}
1531
1481/** 1532/**
1482 * shrink_dcache_parent - prune dcache 1533 * shrink_dcache_parent - prune dcache
1483 * @parent: parent of entries to prune 1534 * @parent: parent of entries to prune
@@ -1487,12 +1538,9 @@ out:
1487void shrink_dcache_parent(struct dentry *parent) 1538void shrink_dcache_parent(struct dentry *parent)
1488{ 1539{
1489 for (;;) { 1540 for (;;) {
1490 struct select_data data; 1541 struct select_data data = {.start = parent};
1491 1542
1492 INIT_LIST_HEAD(&data.dispose); 1543 INIT_LIST_HEAD(&data.dispose);
1493 data.start = parent;
1494 data.found = 0;
1495
1496 d_walk(parent, &data, select_collect); 1544 d_walk(parent, &data, select_collect);
1497 1545
1498 if (!list_empty(&data.dispose)) { 1546 if (!list_empty(&data.dispose)) {
@@ -1503,6 +1551,24 @@ void shrink_dcache_parent(struct dentry *parent)
1503 cond_resched(); 1551 cond_resched();
1504 if (!data.found) 1552 if (!data.found)
1505 break; 1553 break;
1554 data.victim = NULL;
1555 d_walk(parent, &data, select_collect2);
1556 if (data.victim) {
1557 struct dentry *parent;
1558 spin_lock(&data.victim->d_lock);
1559 if (!shrink_lock_dentry(data.victim)) {
1560 spin_unlock(&data.victim->d_lock);
1561 rcu_read_unlock();
1562 } else {
1563 rcu_read_unlock();
1564 parent = data.victim->d_parent;
1565 if (parent != data.victim)
1566 __dput_to_list(parent, &data.dispose);
1567 __dentry_kill(data.victim);
1568 }
1569 }
1570 if (!list_empty(&data.dispose))
1571 shrink_dentry_list(&data.dispose);
1506 } 1572 }
1507} 1573}
1508EXPORT_SYMBOL(shrink_dcache_parent); 1574EXPORT_SYMBOL(shrink_dcache_parent);
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index a6497cf8ae53..47ef3c71ce90 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -19,20 +19,14 @@ void pin_remove(struct fs_pin *pin)
19 spin_unlock_irq(&pin->wait.lock); 19 spin_unlock_irq(&pin->wait.lock);
20} 20}
21 21
22void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) 22void pin_insert(struct fs_pin *pin, struct vfsmount *m)
23{ 23{
24 spin_lock(&pin_lock); 24 spin_lock(&pin_lock);
25 if (p) 25 hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
26 hlist_add_head(&pin->s_list, p);
27 hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); 26 hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
28 spin_unlock(&pin_lock); 27 spin_unlock(&pin_lock);
29} 28}
30 29
31void pin_insert(struct fs_pin *pin, struct vfsmount *m)
32{
33 pin_insert_group(pin, m, &m->mnt_sb->s_pins);
34}
35
36void pin_kill(struct fs_pin *p) 30void pin_kill(struct fs_pin *p)
37{ 31{
38 wait_queue_entry_t wait; 32 wait_queue_entry_t wait;
diff --git a/fs/internal.h b/fs/internal.h
index ff5173212803..315fcd8d237c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -157,6 +157,8 @@ extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
157extern struct dentry *d_alloc_cursor(struct dentry *); 157extern struct dentry *d_alloc_cursor(struct dentry *);
158extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); 158extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
159extern char *simple_dname(struct dentry *, char *, int); 159extern char *simple_dname(struct dentry *, char *, int);
160extern void dput_to_list(struct dentry *, struct list_head *);
161extern void shrink_dentry_list(struct list_head *);
160 162
161/* 163/*
162 * read_write.c 164 * read_write.c
diff --git a/fs/mount.h b/fs/mount.h
index 6250de544760..711a4093e475 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -58,7 +58,10 @@ struct mount {
58 struct mount *mnt_master; /* slave is on master->mnt_slave_list */ 58 struct mount *mnt_master; /* slave is on master->mnt_slave_list */
59 struct mnt_namespace *mnt_ns; /* containing namespace */ 59 struct mnt_namespace *mnt_ns; /* containing namespace */
60 struct mountpoint *mnt_mp; /* where is it mounted */ 60 struct mountpoint *mnt_mp; /* where is it mounted */
61 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ 61 union {
62 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
63 struct hlist_node mnt_umount;
64 };
62 struct list_head mnt_umounting; /* list entry for umount propagation */ 65 struct list_head mnt_umounting; /* list entry for umount propagation */
63#ifdef CONFIG_FSNOTIFY 66#ifdef CONFIG_FSNOTIFY
64 struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; 67 struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
@@ -68,8 +71,7 @@ struct mount {
68 int mnt_group_id; /* peer group identifier */ 71 int mnt_group_id; /* peer group identifier */
69 int mnt_expiry_mark; /* true if marked for expiry */ 72 int mnt_expiry_mark; /* true if marked for expiry */
70 struct hlist_head mnt_pins; 73 struct hlist_head mnt_pins;
71 struct fs_pin mnt_umount; 74 struct hlist_head mnt_stuck_children;
72 struct dentry *mnt_ex_mountpoint;
73} __randomize_layout; 75} __randomize_layout;
74 76
75#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ 77#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
diff --git a/fs/namespace.c b/fs/namespace.c
index f0d664adb9ba..6464ea4acba9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,6 +70,8 @@ static struct hlist_head *mount_hashtable __read_mostly;
70static struct hlist_head *mountpoint_hashtable __read_mostly; 70static struct hlist_head *mountpoint_hashtable __read_mostly;
71static struct kmem_cache *mnt_cache __read_mostly; 71static struct kmem_cache *mnt_cache __read_mostly;
72static DECLARE_RWSEM(namespace_sem); 72static DECLARE_RWSEM(namespace_sem);
73static HLIST_HEAD(unmounted); /* protected by namespace_sem */
74static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
73 75
74/* /sys/fs */ 76/* /sys/fs */
75struct kobject *fs_kobj; 77struct kobject *fs_kobj;
@@ -170,14 +172,6 @@ unsigned int mnt_get_count(struct mount *mnt)
170#endif 172#endif
171} 173}
172 174
173static void drop_mountpoint(struct fs_pin *p)
174{
175 struct mount *m = container_of(p, struct mount, mnt_umount);
176 dput(m->mnt_ex_mountpoint);
177 pin_remove(p);
178 mntput(&m->mnt);
179}
180
181static struct mount *alloc_vfsmnt(const char *name) 175static struct mount *alloc_vfsmnt(const char *name)
182{ 176{
183 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 177 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -215,7 +209,7 @@ static struct mount *alloc_vfsmnt(const char *name)
215 INIT_LIST_HEAD(&mnt->mnt_slave); 209 INIT_LIST_HEAD(&mnt->mnt_slave);
216 INIT_HLIST_NODE(&mnt->mnt_mp_list); 210 INIT_HLIST_NODE(&mnt->mnt_mp_list);
217 INIT_LIST_HEAD(&mnt->mnt_umounting); 211 INIT_LIST_HEAD(&mnt->mnt_umounting);
218 init_fs_pin(&mnt->mnt_umount, drop_mountpoint); 212 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
219 } 213 }
220 return mnt; 214 return mnt;
221 215
@@ -740,7 +734,7 @@ mountpoint:
740 734
741 /* Add the new mountpoint to the hash table */ 735 /* Add the new mountpoint to the hash table */
742 read_seqlock_excl(&mount_lock); 736 read_seqlock_excl(&mount_lock);
743 new->m_dentry = dentry; 737 new->m_dentry = dget(dentry);
744 new->m_count = 1; 738 new->m_count = 1;
745 hlist_add_head(&new->m_hash, mp_hash(dentry)); 739 hlist_add_head(&new->m_hash, mp_hash(dentry));
746 INIT_HLIST_HEAD(&new->m_list); 740 INIT_HLIST_HEAD(&new->m_list);
@@ -753,7 +747,11 @@ done:
753 return mp; 747 return mp;
754} 748}
755 749
756static void put_mountpoint(struct mountpoint *mp) 750/*
751 * vfsmount lock must be held. Additionally, the caller is responsible
752 * for serializing calls for given disposal list.
753 */
754static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
757{ 755{
758 if (!--mp->m_count) { 756 if (!--mp->m_count) {
759 struct dentry *dentry = mp->m_dentry; 757 struct dentry *dentry = mp->m_dentry;
@@ -761,11 +759,18 @@ static void put_mountpoint(struct mountpoint *mp)
761 spin_lock(&dentry->d_lock); 759 spin_lock(&dentry->d_lock);
762 dentry->d_flags &= ~DCACHE_MOUNTED; 760 dentry->d_flags &= ~DCACHE_MOUNTED;
763 spin_unlock(&dentry->d_lock); 761 spin_unlock(&dentry->d_lock);
762 dput_to_list(dentry, list);
764 hlist_del(&mp->m_hash); 763 hlist_del(&mp->m_hash);
765 kfree(mp); 764 kfree(mp);
766 } 765 }
767} 766}
768 767
768/* called with namespace_lock and vfsmount lock */
769static void put_mountpoint(struct mountpoint *mp)
770{
771 __put_mountpoint(mp, &ex_mountpoints);
772}
773
769static inline int check_mnt(struct mount *mnt) 774static inline int check_mnt(struct mount *mnt)
770{ 775{
771 return mnt->mnt_ns == current->nsproxy->mnt_ns; 776 return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -796,25 +801,17 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
796/* 801/*
797 * vfsmount lock must be held for write 802 * vfsmount lock must be held for write
798 */ 803 */
799static void unhash_mnt(struct mount *mnt) 804static struct mountpoint *unhash_mnt(struct mount *mnt)
800{ 805{
806 struct mountpoint *mp;
801 mnt->mnt_parent = mnt; 807 mnt->mnt_parent = mnt;
802 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 808 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
803 list_del_init(&mnt->mnt_child); 809 list_del_init(&mnt->mnt_child);
804 hlist_del_init_rcu(&mnt->mnt_hash); 810 hlist_del_init_rcu(&mnt->mnt_hash);
805 hlist_del_init(&mnt->mnt_mp_list); 811 hlist_del_init(&mnt->mnt_mp_list);
806 put_mountpoint(mnt->mnt_mp); 812 mp = mnt->mnt_mp;
807 mnt->mnt_mp = NULL; 813 mnt->mnt_mp = NULL;
808} 814 return mp;
809
810/*
811 * vfsmount lock must be held for write
812 */
813static void detach_mnt(struct mount *mnt, struct path *old_path)
814{
815 old_path->dentry = mnt->mnt_mountpoint;
816 old_path->mnt = &mnt->mnt_parent->mnt;
817 unhash_mnt(mnt);
818} 815}
819 816
820/* 817/*
@@ -822,9 +819,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
822 */ 819 */
823static void umount_mnt(struct mount *mnt) 820static void umount_mnt(struct mount *mnt)
824{ 821{
825 /* old mountpoint will be dropped when we can do that */ 822 put_mountpoint(unhash_mnt(mnt));
826 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
827 unhash_mnt(mnt);
828} 823}
829 824
830/* 825/*
@@ -836,7 +831,7 @@ void mnt_set_mountpoint(struct mount *mnt,
836{ 831{
837 mp->m_count++; 832 mp->m_count++;
838 mnt_add_count(mnt, 1); /* essentially, that's mntget */ 833 mnt_add_count(mnt, 1); /* essentially, that's mntget */
839 child_mnt->mnt_mountpoint = dget(mp->m_dentry); 834 child_mnt->mnt_mountpoint = mp->m_dentry;
840 child_mnt->mnt_parent = mnt; 835 child_mnt->mnt_parent = mnt;
841 child_mnt->mnt_mp = mp; 836 child_mnt->mnt_mp = mp;
842 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); 837 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
@@ -863,7 +858,6 @@ static void attach_mnt(struct mount *mnt,
863void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) 858void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
864{ 859{
865 struct mountpoint *old_mp = mnt->mnt_mp; 860 struct mountpoint *old_mp = mnt->mnt_mp;
866 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
867 struct mount *old_parent = mnt->mnt_parent; 861 struct mount *old_parent = mnt->mnt_parent;
868 862
869 list_del_init(&mnt->mnt_child); 863 list_del_init(&mnt->mnt_child);
@@ -873,22 +867,6 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
873 attach_mnt(mnt, parent, mp); 867 attach_mnt(mnt, parent, mp);
874 868
875 put_mountpoint(old_mp); 869 put_mountpoint(old_mp);
876
877 /*
878 * Safely avoid even the suggestion this code might sleep or
879 * lock the mount hash by taking advantage of the knowledge that
880 * mnt_change_mountpoint will not release the final reference
881 * to a mountpoint.
882 *
883 * During mounting, the mount passed in as the parent mount will
884 * continue to use the old mountpoint and during unmounting, the
885 * old mountpoint will continue to exist until namespace_unlock,
886 * which happens well after mnt_change_mountpoint.
887 */
888 spin_lock(&old_mountpoint->d_lock);
889 old_mountpoint->d_lockref.count--;
890 spin_unlock(&old_mountpoint->d_lock);
891
892 mnt_add_count(old_parent, -1); 870 mnt_add_count(old_parent, -1);
893} 871}
894 872
@@ -1103,19 +1081,22 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1103 1081
1104static void cleanup_mnt(struct mount *mnt) 1082static void cleanup_mnt(struct mount *mnt)
1105{ 1083{
1084 struct hlist_node *p;
1085 struct mount *m;
1106 /* 1086 /*
1107 * This probably indicates that somebody messed 1087 * The warning here probably indicates that somebody messed
1108 * up a mnt_want/drop_write() pair. If this 1088 * up a mnt_want/drop_write() pair. If this happens, the
1109 * happens, the filesystem was probably unable 1089 * filesystem was probably unable to make r/w->r/o transitions.
1110 * to make r/w->r/o transitions.
1111 */
1112 /*
1113 * The locking used to deal with mnt_count decrement provides barriers, 1090 * The locking used to deal with mnt_count decrement provides barriers,
1114 * so mnt_get_writers() below is safe. 1091 * so mnt_get_writers() below is safe.
1115 */ 1092 */
1116 WARN_ON(mnt_get_writers(mnt)); 1093 WARN_ON(mnt_get_writers(mnt));
1117 if (unlikely(mnt->mnt_pins.first)) 1094 if (unlikely(mnt->mnt_pins.first))
1118 mnt_pin_kill(mnt); 1095 mnt_pin_kill(mnt);
1096 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1097 hlist_del(&m->mnt_umount);
1098 mntput(&m->mnt);
1099 }
1119 fsnotify_vfsmount_delete(&mnt->mnt); 1100 fsnotify_vfsmount_delete(&mnt->mnt);
1120 dput(mnt->mnt.mnt_root); 1101 dput(mnt->mnt.mnt_root);
1121 deactivate_super(mnt->mnt.mnt_sb); 1102 deactivate_super(mnt->mnt.mnt_sb);
@@ -1141,6 +1122,8 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1141 1122
1142static void mntput_no_expire(struct mount *mnt) 1123static void mntput_no_expire(struct mount *mnt)
1143{ 1124{
1125 LIST_HEAD(list);
1126
1144 rcu_read_lock(); 1127 rcu_read_lock();
1145 if (likely(READ_ONCE(mnt->mnt_ns))) { 1128 if (likely(READ_ONCE(mnt->mnt_ns))) {
1146 /* 1129 /*
@@ -1181,10 +1164,12 @@ static void mntput_no_expire(struct mount *mnt)
1181 if (unlikely(!list_empty(&mnt->mnt_mounts))) { 1164 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1182 struct mount *p, *tmp; 1165 struct mount *p, *tmp;
1183 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { 1166 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1184 umount_mnt(p); 1167 __put_mountpoint(unhash_mnt(p), &list);
1168 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1185 } 1169 }
1186 } 1170 }
1187 unlock_mount_hash(); 1171 unlock_mount_hash();
1172 shrink_dentry_list(&list);
1188 1173
1189 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { 1174 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1190 struct task_struct *task = current; 1175 struct task_struct *task = current;
@@ -1370,22 +1355,29 @@ int may_umount(struct vfsmount *mnt)
1370 1355
1371EXPORT_SYMBOL(may_umount); 1356EXPORT_SYMBOL(may_umount);
1372 1357
1373static HLIST_HEAD(unmounted); /* protected by namespace_sem */
1374
1375static void namespace_unlock(void) 1358static void namespace_unlock(void)
1376{ 1359{
1377 struct hlist_head head; 1360 struct hlist_head head;
1361 struct hlist_node *p;
1362 struct mount *m;
1363 LIST_HEAD(list);
1378 1364
1379 hlist_move_list(&unmounted, &head); 1365 hlist_move_list(&unmounted, &head);
1366 list_splice_init(&ex_mountpoints, &list);
1380 1367
1381 up_write(&namespace_sem); 1368 up_write(&namespace_sem);
1382 1369
1370 shrink_dentry_list(&list);
1371
1383 if (likely(hlist_empty(&head))) 1372 if (likely(hlist_empty(&head)))
1384 return; 1373 return;
1385 1374
1386 synchronize_rcu_expedited(); 1375 synchronize_rcu_expedited();
1387 1376
1388 group_pin_kill(&head); 1377 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1378 hlist_del(&m->mnt_umount);
1379 mntput(&m->mnt);
1380 }
1389} 1381}
1390 1382
1391static inline void namespace_lock(void) 1383static inline void namespace_lock(void)
@@ -1472,8 +1464,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1472 1464
1473 disconnect = disconnect_mount(p, how); 1465 disconnect = disconnect_mount(p, how);
1474 1466
1475 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1476 disconnect ? &unmounted : NULL);
1477 if (mnt_has_parent(p)) { 1467 if (mnt_has_parent(p)) {
1478 mnt_add_count(p->mnt_parent, -1); 1468 mnt_add_count(p->mnt_parent, -1);
1479 if (!disconnect) { 1469 if (!disconnect) {
@@ -1481,6 +1471,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1481 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); 1471 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1482 } else { 1472 } else {
1483 umount_mnt(p); 1473 umount_mnt(p);
1474 hlist_add_head(&p->mnt_umount, &unmounted);
1484 } 1475 }
1485 } 1476 }
1486 change_mnt_propagation(p, MS_PRIVATE); 1477 change_mnt_propagation(p, MS_PRIVATE);
@@ -1626,15 +1617,15 @@ void __detach_mounts(struct dentry *dentry)
1626 namespace_lock(); 1617 namespace_lock();
1627 lock_mount_hash(); 1618 lock_mount_hash();
1628 mp = lookup_mountpoint(dentry); 1619 mp = lookup_mountpoint(dentry);
1629 if (IS_ERR_OR_NULL(mp)) 1620 if (!mp)
1630 goto out_unlock; 1621 goto out_unlock;
1631 1622
1632 event++; 1623 event++;
1633 while (!hlist_empty(&mp->m_list)) { 1624 while (!hlist_empty(&mp->m_list)) {
1634 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); 1625 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1635 if (mnt->mnt.mnt_flags & MNT_UMOUNT) { 1626 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1636 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1637 umount_mnt(mnt); 1627 umount_mnt(mnt);
1628 hlist_add_head(&mnt->mnt_umount, &unmounted);
1638 } 1629 }
1639 else umount_tree(mnt, UMOUNT_CONNECTED); 1630 else umount_tree(mnt, UMOUNT_CONNECTED);
1640 } 1631 }
@@ -2046,7 +2037,7 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2046static int attach_recursive_mnt(struct mount *source_mnt, 2037static int attach_recursive_mnt(struct mount *source_mnt,
2047 struct mount *dest_mnt, 2038 struct mount *dest_mnt,
2048 struct mountpoint *dest_mp, 2039 struct mountpoint *dest_mp,
2049 struct path *parent_path) 2040 bool moving)
2050{ 2041{
2051 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 2042 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2052 HLIST_HEAD(tree_list); 2043 HLIST_HEAD(tree_list);
@@ -2064,7 +2055,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2064 return PTR_ERR(smp); 2055 return PTR_ERR(smp);
2065 2056
2066 /* Is there space to add these mounts to the mount namespace? */ 2057 /* Is there space to add these mounts to the mount namespace? */
2067 if (!parent_path) { 2058 if (!moving) {
2068 err = count_mounts(ns, source_mnt); 2059 err = count_mounts(ns, source_mnt);
2069 if (err) 2060 if (err)
2070 goto out; 2061 goto out;
@@ -2083,8 +2074,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
2083 } else { 2074 } else {
2084 lock_mount_hash(); 2075 lock_mount_hash();
2085 } 2076 }
2086 if (parent_path) { 2077 if (moving) {
2087 detach_mnt(source_mnt, parent_path); 2078 unhash_mnt(source_mnt);
2088 attach_mnt(source_mnt, dest_mnt, dest_mp); 2079 attach_mnt(source_mnt, dest_mnt, dest_mp);
2089 touch_mnt_namespace(source_mnt->mnt_ns); 2080 touch_mnt_namespace(source_mnt->mnt_ns);
2090 } else { 2081 } else {
@@ -2182,7 +2173,7 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2182 d_is_dir(mnt->mnt.mnt_root)) 2173 d_is_dir(mnt->mnt.mnt_root))
2183 return -ENOTDIR; 2174 return -ENOTDIR;
2184 2175
2185 return attach_recursive_mnt(mnt, p, mp, NULL); 2176 return attach_recursive_mnt(mnt, p, mp, false);
2186} 2177}
2187 2178
2188/* 2179/*
@@ -2575,11 +2566,11 @@ out:
2575 2566
2576static int do_move_mount(struct path *old_path, struct path *new_path) 2567static int do_move_mount(struct path *old_path, struct path *new_path)
2577{ 2568{
2578 struct path parent_path = {.mnt = NULL, .dentry = NULL};
2579 struct mnt_namespace *ns; 2569 struct mnt_namespace *ns;
2580 struct mount *p; 2570 struct mount *p;
2581 struct mount *old; 2571 struct mount *old;
2582 struct mountpoint *mp; 2572 struct mount *parent;
2573 struct mountpoint *mp, *old_mp;
2583 int err; 2574 int err;
2584 bool attached; 2575 bool attached;
2585 2576
@@ -2589,7 +2580,9 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
2589 2580
2590 old = real_mount(old_path->mnt); 2581 old = real_mount(old_path->mnt);
2591 p = real_mount(new_path->mnt); 2582 p = real_mount(new_path->mnt);
2583 parent = old->mnt_parent;
2592 attached = mnt_has_parent(old); 2584 attached = mnt_has_parent(old);
2585 old_mp = old->mnt_mp;
2593 ns = old->mnt_ns; 2586 ns = old->mnt_ns;
2594 2587
2595 err = -EINVAL; 2588 err = -EINVAL;
@@ -2617,7 +2610,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
2617 /* 2610 /*
2618 * Don't move a mount residing in a shared parent. 2611 * Don't move a mount residing in a shared parent.
2619 */ 2612 */
2620 if (attached && IS_MNT_SHARED(old->mnt_parent)) 2613 if (attached && IS_MNT_SHARED(parent))
2621 goto out; 2614 goto out;
2622 /* 2615 /*
2623 * Don't move a mount tree containing unbindable mounts to a destination 2616 * Don't move a mount tree containing unbindable mounts to a destination
@@ -2633,18 +2626,21 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
2633 goto out; 2626 goto out;
2634 2627
2635 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, 2628 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2636 attached ? &parent_path : NULL); 2629 attached);
2637 if (err) 2630 if (err)
2638 goto out; 2631 goto out;
2639 2632
2640 /* if the mount is moved, it should no longer be expire 2633 /* if the mount is moved, it should no longer be expire
2641 * automatically */ 2634 * automatically */
2642 list_del_init(&old->mnt_expire); 2635 list_del_init(&old->mnt_expire);
2636 if (attached)
2637 put_mountpoint(old_mp);
2643out: 2638out:
2644 unlock_mount(mp); 2639 unlock_mount(mp);
2645 if (!err) { 2640 if (!err) {
2646 path_put(&parent_path); 2641 if (attached)
2647 if (!attached) 2642 mntput_no_expire(parent);
2643 else
2648 free_mnt_ns(ns); 2644 free_mnt_ns(ns);
2649 } 2645 }
2650 return err; 2646 return err;
@@ -3589,8 +3585,8 @@ EXPORT_SYMBOL(path_is_under);
3589SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 3585SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3590 const char __user *, put_old) 3586 const char __user *, put_old)
3591{ 3587{
3592 struct path new, old, parent_path, root_parent, root; 3588 struct path new, old, root;
3593 struct mount *new_mnt, *root_mnt, *old_mnt; 3589 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3594 struct mountpoint *old_mp, *root_mp; 3590 struct mountpoint *old_mp, *root_mp;
3595 int error; 3591 int error;
3596 3592
@@ -3619,9 +3615,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3619 new_mnt = real_mount(new.mnt); 3615 new_mnt = real_mount(new.mnt);
3620 root_mnt = real_mount(root.mnt); 3616 root_mnt = real_mount(root.mnt);
3621 old_mnt = real_mount(old.mnt); 3617 old_mnt = real_mount(old.mnt);
3618 ex_parent = new_mnt->mnt_parent;
3619 root_parent = root_mnt->mnt_parent;
3622 if (IS_MNT_SHARED(old_mnt) || 3620 if (IS_MNT_SHARED(old_mnt) ||
3623 IS_MNT_SHARED(new_mnt->mnt_parent) || 3621 IS_MNT_SHARED(ex_parent) ||
3624 IS_MNT_SHARED(root_mnt->mnt_parent)) 3622 IS_MNT_SHARED(root_parent))
3625 goto out4; 3623 goto out4;
3626 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 3624 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3627 goto out4; 3625 goto out4;
@@ -3638,7 +3636,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3638 goto out4; /* not a mountpoint */ 3636 goto out4; /* not a mountpoint */
3639 if (!mnt_has_parent(root_mnt)) 3637 if (!mnt_has_parent(root_mnt))
3640 goto out4; /* not attached */ 3638 goto out4; /* not attached */
3641 root_mp = root_mnt->mnt_mp;
3642 if (new.mnt->mnt_root != new.dentry) 3639 if (new.mnt->mnt_root != new.dentry)
3643 goto out4; /* not a mountpoint */ 3640 goto out4; /* not a mountpoint */
3644 if (!mnt_has_parent(new_mnt)) 3641 if (!mnt_has_parent(new_mnt))
@@ -3649,10 +3646,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3649 /* make certain new is below the root */ 3646 /* make certain new is below the root */
3650 if (!is_path_reachable(new_mnt, new.dentry, &root)) 3647 if (!is_path_reachable(new_mnt, new.dentry, &root))
3651 goto out4; 3648 goto out4;
3652 root_mp->m_count++; /* pin it so it won't go away */
3653 lock_mount_hash(); 3649 lock_mount_hash();
3654 detach_mnt(new_mnt, &parent_path); 3650 umount_mnt(new_mnt);
3655 detach_mnt(root_mnt, &root_parent); 3651 root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */
3656 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { 3652 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3657 new_mnt->mnt.mnt_flags |= MNT_LOCKED; 3653 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3658 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; 3654 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
@@ -3660,7 +3656,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3660 /* mount old root on put_old */ 3656 /* mount old root on put_old */
3661 attach_mnt(root_mnt, old_mnt, old_mp); 3657 attach_mnt(root_mnt, old_mnt, old_mp);
3662 /* mount new_root on / */ 3658 /* mount new_root on / */
3663 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); 3659 attach_mnt(new_mnt, root_parent, root_mp);
3660 mnt_add_count(root_parent, -1);
3664 touch_mnt_namespace(current->nsproxy->mnt_ns); 3661 touch_mnt_namespace(current->nsproxy->mnt_ns);
3665 /* A moved mount should not expire automatically */ 3662 /* A moved mount should not expire automatically */
3666 list_del_init(&new_mnt->mnt_expire); 3663 list_del_init(&new_mnt->mnt_expire);
@@ -3670,10 +3667,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3670 error = 0; 3667 error = 0;
3671out4: 3668out4:
3672 unlock_mount(old_mp); 3669 unlock_mount(old_mp);
3673 if (!error) { 3670 if (!error)
3674 path_put(&root_parent); 3671 mntput_no_expire(ex_parent);
3675 path_put(&parent_path);
3676 }
3677out3: 3672out3:
3678 path_put(&root); 3673 path_put(&root);
3679out2: 3674out2:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3683d2b1cc8e..628631e2e34f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -457,10 +457,8 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
457 struct dentry *pd_dentry; 457 struct dentry *pd_dentry;
458 458
459 pd_dentry = dget_parent(dentry); 459 pd_dentry = dget_parent(dentry);
460 if (pd_dentry != NULL) { 460 nfs_zap_caches(d_inode(pd_dentry));
461 nfs_zap_caches(d_inode(pd_dentry)); 461 dput(pd_dentry);
462 dput(pd_dentry);
463 }
464 } 462 }
465 nfs_free_fattr(res.fattr); 463 nfs_free_fattr(res.fattr);
466 if (error < 0) 464 if (error < 0)
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 7cab74d66f85..bdd09fd2520c 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -20,6 +20,5 @@ static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
20} 20}
21 21
22void pin_remove(struct fs_pin *); 22void pin_remove(struct fs_pin *);
23void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
24void pin_insert(struct fs_pin *, struct vfsmount *); 23void pin_insert(struct fs_pin *, struct vfsmount *);
25void pin_kill(struct fs_pin *); 24void pin_kill(struct fs_pin *);