mnt: Make propagate_umount less slow for overlapping mount propagation trees

Andrei Vagin pointed out that time to executue propagate_umount can go non-linear (and take a ludicrious amount of time) when the mount propogation trees of the mounts to be unmunted by a lazy unmount overlap. Make the walk of the mount propagation trees nearly linear by remembering which mounts have already been visited, allowing subsequent walks to detect when walking a mount propgation tree or a subtree of a mount propgation tree would be duplicate work and to skip them entirely. Walk the list of mounts whose propgatation trees need to be traversed from the mount highest in the mount tree to mounts lower in the mount tree so that odds are higher that the code will walk the largest trees first, allowing later tree walks to be skipped entirely. Add cleanup_umount_visitation to remover the code's memory of which mounts have been visited. Add the functions last_slave and skip_propagation_subtree to allow skipping appropriate parts of the mount propagation tree without needing to change the logic of the rest of the code. A script to generate overlapping mount propagation trees: $ cat runs.h set -e mount -t tmpfs zdtm /mnt mkdir -p /mnt/1 /mnt/2 mount -t tmpfs zdtm /mnt/1 mount --make-shared /mnt/1 mkdir /mnt/1/1 iteration=10 if [ -n "$1" ] ; then iteration=$1 fi for i in $(seq $iteration); do mount --bind /mnt/1/1 /mnt/1/1 done mount --rbind /mnt/1 /mnt/2 TIMEFORMAT='%Rs' nr=$(( ( 2 ** ( $iteration + 1 ) ) + 1 )) echo -n "umount -l /mnt/1 -> $nr " time umount -l /mnt/1 nr=$(cat /proc/self/mountinfo | grep zdtm | wc -l ) time umount -l /mnt/2 $ for i in $(seq 9 19); do echo $i; unshare -Urm bash ./run.sh $i; done Here are the performance numbers with and without the patch: mhash | 8192 | 8192 | 1048576 | 1048576 mounts | before | after | before | after ------------------------------------------------ 1025 | 0.040s | 0.016s | 0.038s | 0.019s 2049 | 0.094s | 0.017s | 0.080s | 0.018s 4097 | 0.243s | 0.019s | 0.206s | 0.023s 8193 | 1.202s | 0.028s | 1.562s | 0.032s 16385 | 9.635s | 0.036s | 9.952s | 0.041s 32769 | 60.928s | 0.063s | 44.321s | 0.064s 65537 | | 0.097s | | 0.097s 131073 | | 0.233s | | 0.176s 262145 | | 0.653s | | 0.344s 524289 | | 2.305s | | 0.735s 1048577 | | 7.107s | | 2.603s Andrei Vagin reports fixing the performance problem is part of the work to fix CVE-2016-6213. Cc: stable@vger.kernel.org Fixes: a05964f3917c ("[PATCH] shared mounts handling: umount") Reported-by: Andrei Vagin <avagin@openvz.org> Reviewed-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
author: Eric W. Biederman <ebiederm@xmission.com> 2016-10-24 18:25:19 -0400
committer: Eric W. Biederman <ebiederm@xmission.com> 2017-05-23 09:41:17 -0400
commit: 296990deb389c7da21c78030376ba244dc1badf5 (patch)
tree: 1e3b8eac526bf7adda02804f6828b64a6d6fbb04 /fs/pnode.c
parent: 99b19d16471e9c3faa85cad38abc9cbbe04c6d55 (diff)
1 files changed, 62 insertions, 1 deletions
diff --git a/fs/pnode.c b/fs/pnode.c
index fbaca7df2eb0..53d411a371ce 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
        return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
 }
+static inline struct mount *last_slave(struct mount *p)
+{
+        return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+}
 static inline struct mount *next_slave(struct mount *p)
 {
        return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -162,6 +167,19 @@ static struct mount *propagation_next(struct mount *m,
        }
 }
+static struct mount *skip_propagation_subtree(struct mount *m,
+                                                struct mount *origin)
+{
+        /*
+         * Advance m such that propagation_next will not return
+         * the slaves of m.
+         */
+        if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+                m = last_slave(m);
+        return m;
+}
 static struct mount *next_group(struct mount *m, struct mount *origin)
 {
        while (1) {
@@ -505,6 +523,15 @@ static void restore_mounts(struct list_head *to_restore)
        }
 }
+static void cleanup_umount_visitations(struct list_head *visited)
+{
+        while (!list_empty(visited)) {
+                struct mount *mnt =
+                        list_first_entry(visited, struct mount, mnt_umounting);
+                list_del_init(&mnt->mnt_umounting);
+        }
+}
 /*
 * collect all mounts that receive propagation from the mount in @list,
 * and return these additional mounts in the same list.
@@ -517,11 +544,23 @@ int propagate_umount(struct list_head *list)
        struct mount *mnt;
        LIST_HEAD(to_restore);
        LIST_HEAD(to_umount);
+        LIST_HEAD(visited);
-        list_for_each_entry(mnt, list, mnt_list) {
+        /* Find candidates for unmounting */
+        list_for_each_entry_reverse(mnt, list, mnt_list) {
                struct mount *parent = mnt->mnt_parent;
                struct mount *m;
+                /*
+                 * If this mount has already been visited it is known that it's
+                 * entire peer group and all of their slaves in the propagation
+                 * tree for the mountpoint has already been visited and there is
+                 * no need to visit them again.
+                 */
+                if (!list_empty(&mnt->mnt_umounting))
+                        continue;
+                list_add_tail(&mnt->mnt_umounting, &visited);
                for (m = propagation_next(parent, parent); m;
                     m = propagation_next(m, parent)) {
                        struct mount *child = __lookup_mnt(&m->mnt,
@@ -529,6 +568,27 @@ int propagate_umount(struct list_head *list)
                        if (!child)
                                continue;
+                        if (!list_empty(&child->mnt_umounting)) {
+                                /*
+                                 * If the child has already been visited it is
+                                 * know that it's entire peer group and all of
+                                 * their slaves in the propgation tree for the
+                                 * mountpoint has already been visited and there
+                                 * is no need to visit this subtree again.
+                                 */
+                                m = skip_propagation_subtree(m, parent);
+                                continue;
+                        } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
+                                /*
+                                 * We have come accross an partially unmounted
+                                 * mount in list that has not been visited yet.
+                                 * Remember it has been visited and continue
+                                 * about our merry way.
+                                 */
+                                list_add_tail(&child->mnt_umounting, &visited);
+                                continue;
+                        }
                        /* Check the child and parents while progress is made */
                        while (__propagate_umount(child,
                                                  &to_umount, &to_restore)) {
@@ -542,6 +602,7 @@ int propagate_umount(struct list_head *list)
        umount_list(&to_umount, &to_restore);
        restore_mounts(&to_restore);
+        cleanup_umount_visitations(&visited);
        list_splice_tail(&to_umount, list);
        return 0;
author	Eric W. Biederman <ebiederm@xmission.com>	2016-10-24 18:25:19 -0400
committer	Eric W. Biederman <ebiederm@xmission.com>	2017-05-23 09:41:17 -0400
commit	296990deb389c7da21c78030376ba244dc1badf5 (patch)
tree	1e3b8eac526bf7adda02804f6828b64a6d6fbb04 /fs/pnode.c
parent	99b19d16471e9c3faa85cad38abc9cbbe04c6d55 (diff)

diff --git a/fs/pnode.c b/fs/pnode.c index fbaca7df2eb0..53d411a371ce 100644 --- a/fs/pnode.c +++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount first_slave(struct mount p)
24	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);	24	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25	}	25	}
26		26
		27	static inline struct mount last_slave(struct mount p)
		28	{
		29	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
		30	}
		31
27	static inline struct mount next_slave(struct mount p)	32	static inline struct mount next_slave(struct mount p)
28	{	33	{
29	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);	34	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -162,6 +167,19 @@ static struct mount propagation_next(struct mount m,
162	}	167	}
163	}	168	}
164		169
		170	static struct mount skip_propagation_subtree(struct mount m,
		171	struct mount *origin)
		172	{
		173	/*
		174	* Advance m such that propagation_next will not return
		175	* the slaves of m.
		176	*/
		177	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
		178	m = last_slave(m);
		179
		180	return m;
		181	}
		182
165	static struct mount next_group(struct mount m, struct mount *origin)	183	static struct mount next_group(struct mount m, struct mount *origin)
166	{	184	{
167	while (1) {	185	while (1) {
@@ -505,6 +523,15 @@ static void restore_mounts(struct list_head *to_restore)
505	}	523	}
506	}	524	}
507		525
		526	static void cleanup_umount_visitations(struct list_head *visited)
		527	{
		528	while (!list_empty(visited)) {
		529	struct mount *mnt =
		530	list_first_entry(visited, struct mount, mnt_umounting);
		531	list_del_init(&mnt->mnt_umounting);
		532	}
		533	}
		534
508	/*	535	/*
509	* collect all mounts that receive propagation from the mount in @list,	536	* collect all mounts that receive propagation from the mount in @list,
510	* and return these additional mounts in the same list.	537	* and return these additional mounts in the same list.
@@ -517,11 +544,23 @@ int propagate_umount(struct list_head *list)
517	struct mount *mnt;	544	struct mount *mnt;
518	LIST_HEAD(to_restore);	545	LIST_HEAD(to_restore);
519	LIST_HEAD(to_umount);	546	LIST_HEAD(to_umount);
		547	LIST_HEAD(visited);
520		548
521	list_for_each_entry(mnt, list, mnt_list) {	549	/* Find candidates for unmounting */
		550	list_for_each_entry_reverse(mnt, list, mnt_list) {
522	struct mount *parent = mnt->mnt_parent;	551	struct mount *parent = mnt->mnt_parent;
523	struct mount *m;	552	struct mount *m;
524		553
		554	/*
		555	* If this mount has already been visited it is known that it's
		556	* entire peer group and all of their slaves in the propagation
		557	* tree for the mountpoint has already been visited and there is
		558	* no need to visit them again.
		559	*/
		560	if (!list_empty(&mnt->mnt_umounting))
		561	continue;
		562
		563	list_add_tail(&mnt->mnt_umounting, &visited);
525	for (m = propagation_next(parent, parent); m;	564	for (m = propagation_next(parent, parent); m;
526	m = propagation_next(m, parent)) {	565	m = propagation_next(m, parent)) {
527	struct mount *child = __lookup_mnt(&m->mnt,	566	struct mount *child = __lookup_mnt(&m->mnt,
@@ -529,6 +568,27 @@ int propagate_umount(struct list_head *list)
529	if (!child)	568	if (!child)
530	continue;	569	continue;
531		570
		571	if (!list_empty(&child->mnt_umounting)) {
		572	/*
		573	* If the child has already been visited it is
		574	* know that it's entire peer group and all of
		575	* their slaves in the propgation tree for the
		576	* mountpoint has already been visited and there
		577	* is no need to visit this subtree again.
		578	*/
		579	m = skip_propagation_subtree(m, parent);
		580	continue;
		581	} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
		582	/*
		583	* We have come accross an partially unmounted
		584	* mount in list that has not been visited yet.
		585	* Remember it has been visited and continue
		586	* about our merry way.
		587	*/
		588	list_add_tail(&child->mnt_umounting, &visited);
		589	continue;
		590	}
		591
532	/* Check the child and parents while progress is made */	592	/* Check the child and parents while progress is made */
533	while (__propagate_umount(child,	593	while (__propagate_umount(child,
534	&to_umount, &to_restore)) {	594	&to_umount, &to_restore)) {
@@ -542,6 +602,7 @@ int propagate_umount(struct list_head *list)
542		602
543	umount_list(&to_umount, &to_restore);	603	umount_list(&to_umount, &to_restore);
544	restore_mounts(&to_restore);	604	restore_mounts(&to_restore);
		605	cleanup_umount_visitations(&visited);
545	list_splice_tail(&to_umount, list);	606	list_splice_tail(&to_umount, list);
546		607
547	return 0;	608	return 0;