diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2011-03-18 08:55:38 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-03-18 08:55:38 -0400 |
commit | b12cea9198fa99ffd3de1776c323bc7464d26b44 (patch) | |
tree | 3ae5818b2690e45c8a71432ed681751683091287 | |
parent | 27cb1572e3e6bb1f8cf6bb3d74c914a87b131792 (diff) |
change the locking order for namespace_sem
Have it nested inside ->i_mutex. Instead of using follow_down()
under namespace_sem, followed by grabbing i_mutex and checking that
mountpoint to be is not dead, do the following:
grab i_mutex
check that it's not dead
grab namespace_sem
see if anything is mounted there
if not, we've won
otherwise
drop locks
put_path on what we had
replace with what's mounted
retry everything with new mountpoint to be
New helper (lock_mount()) does that. do_add_mount(), do_move_mount(),
do_loopback() and pivot_root() switched to it; in case of the last
two that eliminates a race we used to have - original code didn't
do follow_down().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/namespace.c | 133 |
1 files changed, 73 insertions, 60 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 46cc26b5aaf2..9263995bf6a1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1663,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1663 | return err; | 1663 | return err; |
1664 | } | 1664 | } |
1665 | 1665 | ||
1666 | static int lock_mount(struct path *path) | ||
1667 | { | ||
1668 | struct vfsmount *mnt; | ||
1669 | retry: | ||
1670 | mutex_lock(&path->dentry->d_inode->i_mutex); | ||
1671 | if (unlikely(cant_mount(path->dentry))) { | ||
1672 | mutex_unlock(&path->dentry->d_inode->i_mutex); | ||
1673 | return -ENOENT; | ||
1674 | } | ||
1675 | down_write(&namespace_sem); | ||
1676 | mnt = lookup_mnt(path); | ||
1677 | if (likely(!mnt)) | ||
1678 | return 0; | ||
1679 | up_write(&namespace_sem); | ||
1680 | mutex_unlock(&path->dentry->d_inode->i_mutex); | ||
1681 | path_put(path); | ||
1682 | path->mnt = mnt; | ||
1683 | path->dentry = dget(mnt->mnt_root); | ||
1684 | goto retry; | ||
1685 | } | ||
1686 | |||
1687 | static void unlock_mount(struct path *path) | ||
1688 | { | ||
1689 | up_write(&namespace_sem); | ||
1690 | mutex_unlock(&path->dentry->d_inode->i_mutex); | ||
1691 | } | ||
1692 | |||
1666 | static int graft_tree(struct vfsmount *mnt, struct path *path) | 1693 | static int graft_tree(struct vfsmount *mnt, struct path *path) |
1667 | { | 1694 | { |
1668 | int err; | ||
1669 | if (mnt->mnt_sb->s_flags & MS_NOUSER) | 1695 | if (mnt->mnt_sb->s_flags & MS_NOUSER) |
1670 | return -EINVAL; | 1696 | return -EINVAL; |
1671 | 1697 | ||
@@ -1673,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) | |||
1673 | S_ISDIR(mnt->mnt_root->d_inode->i_mode)) | 1699 | S_ISDIR(mnt->mnt_root->d_inode->i_mode)) |
1674 | return -ENOTDIR; | 1700 | return -ENOTDIR; |
1675 | 1701 | ||
1676 | err = -ENOENT; | 1702 | if (d_unlinked(path->dentry)) |
1677 | mutex_lock(&path->dentry->d_inode->i_mutex); | 1703 | return -ENOENT; |
1678 | if (cant_mount(path->dentry)) | ||
1679 | goto out_unlock; | ||
1680 | 1704 | ||
1681 | if (!d_unlinked(path->dentry)) | 1705 | return attach_recursive_mnt(mnt, path, NULL); |
1682 | err = attach_recursive_mnt(mnt, path, NULL); | ||
1683 | out_unlock: | ||
1684 | mutex_unlock(&path->dentry->d_inode->i_mutex); | ||
1685 | return err; | ||
1686 | } | 1706 | } |
1687 | 1707 | ||
1688 | /* | 1708 | /* |
@@ -1745,6 +1765,7 @@ static int do_change_type(struct path *path, int flag) | |||
1745 | static int do_loopback(struct path *path, char *old_name, | 1765 | static int do_loopback(struct path *path, char *old_name, |
1746 | int recurse) | 1766 | int recurse) |
1747 | { | 1767 | { |
1768 | LIST_HEAD(umount_list); | ||
1748 | struct path old_path; | 1769 | struct path old_path; |
1749 | struct vfsmount *mnt = NULL; | 1770 | struct vfsmount *mnt = NULL; |
1750 | int err = mount_is_safe(path); | 1771 | int err = mount_is_safe(path); |
@@ -1756,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name, | |||
1756 | if (err) | 1777 | if (err) |
1757 | return err; | 1778 | return err; |
1758 | 1779 | ||
1759 | down_write(&namespace_sem); | 1780 | err = lock_mount(path); |
1781 | if (err) | ||
1782 | goto out; | ||
1783 | |||
1760 | err = -EINVAL; | 1784 | err = -EINVAL; |
1761 | if (IS_MNT_UNBINDABLE(old_path.mnt)) | 1785 | if (IS_MNT_UNBINDABLE(old_path.mnt)) |
1762 | goto out; | 1786 | goto out2; |
1763 | 1787 | ||
1764 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) | 1788 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
1765 | goto out; | 1789 | goto out2; |
1766 | 1790 | ||
1767 | err = -ENOMEM; | 1791 | err = -ENOMEM; |
1768 | if (recurse) | 1792 | if (recurse) |
@@ -1771,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name, | |||
1771 | mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); | 1795 | mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); |
1772 | 1796 | ||
1773 | if (!mnt) | 1797 | if (!mnt) |
1774 | goto out; | 1798 | goto out2; |
1775 | 1799 | ||
1776 | err = graft_tree(mnt, path); | 1800 | err = graft_tree(mnt, path); |
1777 | if (err) { | 1801 | if (err) { |
1778 | LIST_HEAD(umount_list); | ||
1779 | |||
1780 | br_write_lock(vfsmount_lock); | 1802 | br_write_lock(vfsmount_lock); |
1781 | umount_tree(mnt, 0, &umount_list); | 1803 | umount_tree(mnt, 0, &umount_list); |
1782 | br_write_unlock(vfsmount_lock); | 1804 | br_write_unlock(vfsmount_lock); |
1783 | release_mounts(&umount_list); | ||
1784 | } | 1805 | } |
1785 | 1806 | out2: | |
1807 | unlock_mount(path); | ||
1808 | release_mounts(&umount_list); | ||
1786 | out: | 1809 | out: |
1787 | up_write(&namespace_sem); | ||
1788 | path_put(&old_path); | 1810 | path_put(&old_path); |
1789 | return err; | 1811 | return err; |
1790 | } | 1812 | } |
@@ -1873,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1873 | if (err) | 1895 | if (err) |
1874 | return err; | 1896 | return err; |
1875 | 1897 | ||
1876 | down_write(&namespace_sem); | 1898 | err = lock_mount(path); |
1877 | err = follow_down(path, true); | ||
1878 | if (err < 0) | 1899 | if (err < 0) |
1879 | goto out; | 1900 | goto out; |
1880 | 1901 | ||
1881 | err = -EINVAL; | 1902 | err = -EINVAL; |
1882 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) | 1903 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
1883 | goto out; | ||
1884 | |||
1885 | err = -ENOENT; | ||
1886 | mutex_lock(&path->dentry->d_inode->i_mutex); | ||
1887 | if (cant_mount(path->dentry)) | ||
1888 | goto out1; | 1904 | goto out1; |
1889 | 1905 | ||
1890 | if (d_unlinked(path->dentry)) | 1906 | if (d_unlinked(path->dentry)) |
@@ -1926,9 +1942,8 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1926 | * automatically */ | 1942 | * automatically */ |
1927 | list_del_init(&old_path.mnt->mnt_expire); | 1943 | list_del_init(&old_path.mnt->mnt_expire); |
1928 | out1: | 1944 | out1: |
1929 | mutex_unlock(&path->dentry->d_inode->i_mutex); | 1945 | unlock_mount(path); |
1930 | out: | 1946 | out: |
1931 | up_write(&namespace_sem); | ||
1932 | if (!err) | 1947 | if (!err) |
1933 | path_put(&parent_path); | 1948 | path_put(&parent_path); |
1934 | path_put(&old_path); | 1949 | path_put(&old_path); |
@@ -1983,11 +1998,9 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag | |||
1983 | 1998 | ||
1984 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); | 1999 | mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); |
1985 | 2000 | ||
1986 | down_write(&namespace_sem); | 2001 | err = lock_mount(path); |
1987 | /* Something was mounted here while we slept */ | 2002 | if (err) |
1988 | err = follow_down(path, true); | 2003 | return err; |
1989 | if (err < 0) | ||
1990 | goto unlock; | ||
1991 | 2004 | ||
1992 | err = -EINVAL; | 2005 | err = -EINVAL; |
1993 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) | 2006 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) |
@@ -2007,7 +2020,7 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag | |||
2007 | err = graft_tree(newmnt, path); | 2020 | err = graft_tree(newmnt, path); |
2008 | 2021 | ||
2009 | unlock: | 2022 | unlock: |
2010 | up_write(&namespace_sem); | 2023 | unlock_mount(path); |
2011 | return err; | 2024 | return err; |
2012 | } | 2025 | } |
2013 | 2026 | ||
@@ -2575,55 +2588,53 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2575 | goto out1; | 2588 | goto out1; |
2576 | 2589 | ||
2577 | error = security_sb_pivotroot(&old, &new); | 2590 | error = security_sb_pivotroot(&old, &new); |
2578 | if (error) { | 2591 | if (error) |
2579 | path_put(&old); | 2592 | goto out2; |
2580 | goto out1; | ||
2581 | } | ||
2582 | 2593 | ||
2583 | get_fs_root(current->fs, &root); | 2594 | get_fs_root(current->fs, &root); |
2584 | down_write(&namespace_sem); | 2595 | error = lock_mount(&old); |
2585 | mutex_lock(&old.dentry->d_inode->i_mutex); | 2596 | if (error) |
2597 | goto out3; | ||
2598 | |||
2586 | error = -EINVAL; | 2599 | error = -EINVAL; |
2587 | if (IS_MNT_SHARED(old.mnt) || | 2600 | if (IS_MNT_SHARED(old.mnt) || |
2588 | IS_MNT_SHARED(new.mnt->mnt_parent) || | 2601 | IS_MNT_SHARED(new.mnt->mnt_parent) || |
2589 | IS_MNT_SHARED(root.mnt->mnt_parent)) | 2602 | IS_MNT_SHARED(root.mnt->mnt_parent)) |
2590 | goto out2; | 2603 | goto out4; |
2591 | if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) | 2604 | if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) |
2592 | goto out2; | 2605 | goto out4; |
2593 | error = -ENOENT; | 2606 | error = -ENOENT; |
2594 | if (cant_mount(old.dentry)) | ||
2595 | goto out2; | ||
2596 | if (d_unlinked(new.dentry)) | 2607 | if (d_unlinked(new.dentry)) |
2597 | goto out2; | 2608 | goto out4; |
2598 | if (d_unlinked(old.dentry)) | 2609 | if (d_unlinked(old.dentry)) |
2599 | goto out2; | 2610 | goto out4; |
2600 | error = -EBUSY; | 2611 | error = -EBUSY; |
2601 | if (new.mnt == root.mnt || | 2612 | if (new.mnt == root.mnt || |
2602 | old.mnt == root.mnt) | 2613 | old.mnt == root.mnt) |
2603 | goto out2; /* loop, on the same file system */ | 2614 | goto out4; /* loop, on the same file system */ |
2604 | error = -EINVAL; | 2615 | error = -EINVAL; |
2605 | if (root.mnt->mnt_root != root.dentry) | 2616 | if (root.mnt->mnt_root != root.dentry) |
2606 | goto out2; /* not a mountpoint */ | 2617 | goto out4; /* not a mountpoint */ |
2607 | if (root.mnt->mnt_parent == root.mnt) | 2618 | if (root.mnt->mnt_parent == root.mnt) |
2608 | goto out2; /* not attached */ | 2619 | goto out4; /* not attached */ |
2609 | if (new.mnt->mnt_root != new.dentry) | 2620 | if (new.mnt->mnt_root != new.dentry) |
2610 | goto out2; /* not a mountpoint */ | 2621 | goto out4; /* not a mountpoint */ |
2611 | if (new.mnt->mnt_parent == new.mnt) | 2622 | if (new.mnt->mnt_parent == new.mnt) |
2612 | goto out2; /* not attached */ | 2623 | goto out4; /* not attached */ |
2613 | /* make sure we can reach put_old from new_root */ | 2624 | /* make sure we can reach put_old from new_root */ |
2614 | tmp = old.mnt; | 2625 | tmp = old.mnt; |
2615 | if (tmp != new.mnt) { | 2626 | if (tmp != new.mnt) { |
2616 | for (;;) { | 2627 | for (;;) { |
2617 | if (tmp->mnt_parent == tmp) | 2628 | if (tmp->mnt_parent == tmp) |
2618 | goto out2; /* already mounted on put_old */ | 2629 | goto out4; /* already mounted on put_old */ |
2619 | if (tmp->mnt_parent == new.mnt) | 2630 | if (tmp->mnt_parent == new.mnt) |
2620 | break; | 2631 | break; |
2621 | tmp = tmp->mnt_parent; | 2632 | tmp = tmp->mnt_parent; |
2622 | } | 2633 | } |
2623 | if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) | 2634 | if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) |
2624 | goto out2; | 2635 | goto out4; |
2625 | } else if (!is_subdir(old.dentry, new.dentry)) | 2636 | } else if (!is_subdir(old.dentry, new.dentry)) |
2626 | goto out2; | 2637 | goto out4; |
2627 | br_write_lock(vfsmount_lock); | 2638 | br_write_lock(vfsmount_lock); |
2628 | detach_mnt(new.mnt, &parent_path); | 2639 | detach_mnt(new.mnt, &parent_path); |
2629 | detach_mnt(root.mnt, &root_parent); | 2640 | detach_mnt(root.mnt, &root_parent); |
@@ -2634,14 +2645,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2634 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2645 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2635 | br_write_unlock(vfsmount_lock); | 2646 | br_write_unlock(vfsmount_lock); |
2636 | chroot_fs_refs(&root, &new); | 2647 | chroot_fs_refs(&root, &new); |
2637 | |||
2638 | error = 0; | 2648 | error = 0; |
2639 | path_put(&root_parent); | 2649 | out4: |
2640 | path_put(&parent_path); | 2650 | unlock_mount(&old); |
2641 | out2: | 2651 | if (!error) { |
2642 | mutex_unlock(&old.dentry->d_inode->i_mutex); | 2652 | path_put(&root_parent); |
2643 | up_write(&namespace_sem); | 2653 | path_put(&parent_path); |
2654 | } | ||
2655 | out3: | ||
2644 | path_put(&root); | 2656 | path_put(&root); |
2657 | out2: | ||
2645 | path_put(&old); | 2658 | path_put(&old); |
2646 | out1: | 2659 | out1: |
2647 | path_put(&new); | 2660 | path_put(&new); |