aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2016-10-24 17:16:13 -0400
committerEric W. Biederman <ebiederm@xmission.com>2017-05-23 09:41:16 -0400
commit99b19d16471e9c3faa85cad38abc9cbbe04c6d55 (patch)
tree59e40753b417ab213afb32b14a84e682d3920b5b
parent570487d3faf2a1d8a220e6ee10f472163123d7da (diff)
mnt: In propgate_umount handle visiting mounts in any order
While investigating some poor umount performance I realized that in the case of overlapping mount trees where some of the mounts are locked the code has been failing to unmount all of the mounts it should have been unmounting. This failure to unmount all of the necessary mounts can be reproduced with: $ cat locked_mounts_test.sh mount -t tmpfs test-base /mnt mount --make-shared /mnt mkdir -p /mnt/b mount -t tmpfs test1 /mnt/b mount --make-shared /mnt/b mkdir -p /mnt/b/10 mount -t tmpfs test2 /mnt/b/10 mount --make-shared /mnt/b/10 mkdir -p /mnt/b/10/20 mount --rbind /mnt/b /mnt/b/10/20 unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi' sleep 1 umount -l /mnt/b wait %% $ unshare -Urm ./locked_mounts_test.sh This failure is corrected by removing the prepass that marks mounts that may be umounted. A first pass is added that umounts mounts if possible and if not sets mount mark if they could be unmounted if they weren't locked and adds them to a list to umount possibilities. This first pass reconsiders the mounts parent if it is on the list of umount possibilities, ensuring that information of umoutability will pass from child to mount parent. A second pass then walks through all mounts that are umounted and processes their children unmounting them or marking them for reparenting. A last pass cleans up the state on the mounts that could not be umounted and if applicable reparents them to their first parent that remained mounted. While a bit longer than the old code this code is much more robust as it allows information to flow up from the leaves and down from the trunk making the order in which mounts are encountered in the umount propgation tree irrelevant. Cc: stable@vger.kernel.org Fixes: 0c56fe31420c ("mnt: Don't propagate unmounts to locked mounts") Reviewed-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/pnode.c144
3 files changed, 88 insertions, 60 deletions
diff --git a/fs/mount.h b/fs/mount.h
index ede5a1d5cf99..de45d9e76748 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -58,7 +58,7 @@ struct mount {
58 struct mnt_namespace *mnt_ns; /* containing namespace */ 58 struct mnt_namespace *mnt_ns; /* containing namespace */
59 struct mountpoint *mnt_mp; /* where is it mounted */ 59 struct mountpoint *mnt_mp; /* where is it mounted */
60 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ 60 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
61 struct list_head mnt_reparent; /* reparent list entry */ 61 struct list_head mnt_umounting; /* list entry for umount propagation */
62#ifdef CONFIG_FSNOTIFY 62#ifdef CONFIG_FSNOTIFY
63 struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; 63 struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
64 __u32 mnt_fsnotify_mask; 64 __u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 51e49866e1fe..5e3dcbeb1de5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -236,7 +236,7 @@ static struct mount *alloc_vfsmnt(const char *name)
236 INIT_LIST_HEAD(&mnt->mnt_slave_list); 236 INIT_LIST_HEAD(&mnt->mnt_slave_list);
237 INIT_LIST_HEAD(&mnt->mnt_slave); 237 INIT_LIST_HEAD(&mnt->mnt_slave);
238 INIT_HLIST_NODE(&mnt->mnt_mp_list); 238 INIT_HLIST_NODE(&mnt->mnt_mp_list);
239 INIT_LIST_HEAD(&mnt->mnt_reparent); 239 INIT_LIST_HEAD(&mnt->mnt_umounting);
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint); 240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 } 241 }
242 return mnt; 242 return mnt;
diff --git a/fs/pnode.c b/fs/pnode.c
index 52aca0a118ff..fbaca7df2eb0 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -413,86 +413,95 @@ void propagate_mount_unlock(struct mount *mnt)
413 } 413 }
414} 414}
415 415
416/* 416static void umount_one(struct mount *mnt, struct list_head *to_umount)
417 * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
418 */
419static void mark_umount_candidates(struct mount *mnt)
420{ 417{
421 struct mount *parent = mnt->mnt_parent; 418 CLEAR_MNT_MARK(mnt);
422 struct mount *m; 419 mnt->mnt.mnt_flags |= MNT_UMOUNT;
423 420 list_del_init(&mnt->mnt_child);
424 BUG_ON(parent == mnt); 421 list_del_init(&mnt->mnt_umounting);
425 422 list_move_tail(&mnt->mnt_list, to_umount);
426 for (m = propagation_next(parent, parent); m;
427 m = propagation_next(m, parent)) {
428 struct mount *child = __lookup_mnt(&m->mnt,
429 mnt->mnt_mountpoint);
430 if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
431 continue;
432 if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
433 SET_MNT_MARK(child);
434 }
435 }
436} 423}
437 424
438/* 425/*
439 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 426 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
440 * parent propagates to. 427 * parent propagates to.
441 */ 428 */
442static void __propagate_umount(struct mount *mnt, struct list_head *to_reparent) 429static bool __propagate_umount(struct mount *mnt,
430 struct list_head *to_umount,
431 struct list_head *to_restore)
443{ 432{
444 struct mount *parent = mnt->mnt_parent; 433 bool progress = false;
445 struct mount *m; 434 struct mount *child;
446 435
447 BUG_ON(parent == mnt); 436 /*
437 * The state of the parent won't change if this mount is
438 * already unmounted or marked as without children.
439 */
440 if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
441 goto out;
448 442
449 for (m = propagation_next(parent, parent); m; 443 /* Verify topper is the only grandchild that has not been
450 m = propagation_next(m, parent)) { 444 * speculatively unmounted.
451 struct mount *topper; 445 */
452 struct mount *child = __lookup_mnt(&m->mnt, 446 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
453 mnt->mnt_mountpoint); 447 if (child->mnt_mountpoint == mnt->mnt.mnt_root)
454 /*
455 * umount the child only if the child has no children
456 * and the child is marked safe to unmount.
457 */
458 if (!child || !IS_MNT_MARKED(child))
459 continue; 448 continue;
460 CLEAR_MNT_MARK(child); 449 if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
450 continue;
451 /* Found a mounted child */
452 goto children;
453 }
461 454
462 /* If there is exactly one mount covering all of child 455 /* Mark mounts that can be unmounted if not locked */
463 * replace child with that mount. 456 SET_MNT_MARK(mnt);
464 */ 457 progress = true;
465 topper = find_topper(child);
466 if (topper)
467 list_add_tail(&topper->mnt_reparent, to_reparent);
468 458
469 if (topper || list_empty(&child->mnt_mounts)) { 459 /* If a mount is without children and not locked umount it. */
470 list_del_init(&child->mnt_child); 460 if (!IS_MNT_LOCKED(mnt)) {
471 list_del_init(&child->mnt_reparent); 461 umount_one(mnt, to_umount);
472 child->mnt.mnt_flags |= MNT_UMOUNT; 462 } else {
473 list_move_tail(&child->mnt_list, &mnt->mnt_list); 463children:
464 list_move_tail(&mnt->mnt_umounting, to_restore);
465 }
466out:
467 return progress;
468}
469
470static void umount_list(struct list_head *to_umount,
471 struct list_head *to_restore)
472{
473 struct mount *mnt, *child, *tmp;
474 list_for_each_entry(mnt, to_umount, mnt_list) {
475 list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
476 /* topper? */
477 if (child->mnt_mountpoint == mnt->mnt.mnt_root)
478 list_move_tail(&child->mnt_umounting, to_restore);
479 else
480 umount_one(child, to_umount);
474 } 481 }
475 } 482 }
476} 483}
477 484
478static void reparent_mounts(struct list_head *to_reparent) 485static void restore_mounts(struct list_head *to_restore)
479{ 486{
480 while (!list_empty(to_reparent)) { 487 /* Restore mounts to a clean working state */
488 while (!list_empty(to_restore)) {
481 struct mount *mnt, *parent; 489 struct mount *mnt, *parent;
482 struct mountpoint *mp; 490 struct mountpoint *mp;
483 491
484 mnt = list_first_entry(to_reparent, struct mount, mnt_reparent); 492 mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
485 list_del_init(&mnt->mnt_reparent); 493 CLEAR_MNT_MARK(mnt);
494 list_del_init(&mnt->mnt_umounting);
486 495
487 /* Where should this mount be reparented to? */ 496 /* Should this mount be reparented? */
488 mp = mnt->mnt_mp; 497 mp = mnt->mnt_mp;
489 parent = mnt->mnt_parent; 498 parent = mnt->mnt_parent;
490 while (parent->mnt.mnt_flags & MNT_UMOUNT) { 499 while (parent->mnt.mnt_flags & MNT_UMOUNT) {
491 mp = parent->mnt_mp; 500 mp = parent->mnt_mp;
492 parent = parent->mnt_parent; 501 parent = parent->mnt_parent;
493 } 502 }
494 503 if (parent != mnt->mnt_parent)
495 mnt_change_mountpoint(parent, mp, mnt); 504 mnt_change_mountpoint(parent, mp, mnt);
496 } 505 }
497} 506}
498 507
@@ -506,15 +515,34 @@ static void reparent_mounts(struct list_head *to_reparent)
506int propagate_umount(struct list_head *list) 515int propagate_umount(struct list_head *list)
507{ 516{
508 struct mount *mnt; 517 struct mount *mnt;
509 LIST_HEAD(to_reparent); 518 LIST_HEAD(to_restore);
519 LIST_HEAD(to_umount);
510 520
511 list_for_each_entry_reverse(mnt, list, mnt_list) 521 list_for_each_entry(mnt, list, mnt_list) {
512 mark_umount_candidates(mnt); 522 struct mount *parent = mnt->mnt_parent;
523 struct mount *m;
513 524
514 list_for_each_entry(mnt, list, mnt_list) 525 for (m = propagation_next(parent, parent); m;
515 __propagate_umount(mnt, &to_reparent); 526 m = propagation_next(m, parent)) {
527 struct mount *child = __lookup_mnt(&m->mnt,
528 mnt->mnt_mountpoint);
529 if (!child)
530 continue;
531
532 /* Check the child and parents while progress is made */
533 while (__propagate_umount(child,
534 &to_umount, &to_restore)) {
535 /* Is the parent a umount candidate? */
536 child = child->mnt_parent;
537 if (list_empty(&child->mnt_umounting))
538 break;
539 }
540 }
541 }
516 542
517 reparent_mounts(&to_reparent); 543 umount_list(&to_umount, &to_restore);
544 restore_mounts(&to_restore);
545 list_splice_tail(&to_umount, list);
518 546
519 return 0; 547 return 0;
520} 548}