aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2013-03-15 10:53:28 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2013-04-09 14:12:52 -0400
commit84d17192d2afd52aeba88c71ae4959a015f56a38 (patch)
treeccc7359f7b35352fb143eae16dec44d9ce766de6
parente9c5d8a562f01b211926d70443378eb14b29a676 (diff)
get rid of full-hash scan on detaching vfsmounts
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/mount.h7
-rw-r--r--fs/namespace.c229
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/pnode.h4
4 files changed, 149 insertions, 97 deletions
diff --git a/fs/mount.h b/fs/mount.h
index cd5007980400..64a858143ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -18,6 +18,12 @@ struct mnt_pcp {
18 int mnt_writers; 18 int mnt_writers;
19}; 19};
20 20
21struct mountpoint {
22 struct list_head m_hash;
23 struct dentry *m_dentry;
24 int m_count;
25};
26
21struct mount { 27struct mount {
22 struct list_head mnt_hash; 28 struct list_head mnt_hash;
23 struct mount *mnt_parent; 29 struct mount *mnt_parent;
@@ -40,6 +46,7 @@ struct mount {
40 struct list_head mnt_slave; /* slave list entry */ 46 struct list_head mnt_slave; /* slave list entry */
41 struct mount *mnt_master; /* slave is on master->mnt_slave_list */ 47 struct mount *mnt_master; /* slave is on master->mnt_slave_list */
42 struct mnt_namespace *mnt_ns; /* containing namespace */ 48 struct mnt_namespace *mnt_ns; /* containing namespace */
49 struct mountpoint *mnt_mp; /* where is it mounted */
43#ifdef CONFIG_FSNOTIFY 50#ifdef CONFIG_FSNOTIFY
44 struct hlist_head mnt_fsnotify_marks; 51 struct hlist_head mnt_fsnotify_marks;
45 __u32 mnt_fsnotify_mask; 52 __u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 6c7d31eebba4..d7bb5a55cf36 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -36,6 +36,7 @@ static int mnt_id_start = 0;
36static int mnt_group_start = 1; 36static int mnt_group_start = 1;
37 37
38static struct list_head *mount_hashtable __read_mostly; 38static struct list_head *mount_hashtable __read_mostly;
39static struct list_head *mountpoint_hashtable __read_mostly;
39static struct kmem_cache *mnt_cache __read_mostly; 40static struct kmem_cache *mnt_cache __read_mostly;
40static struct rw_semaphore namespace_sem; 41static struct rw_semaphore namespace_sem;
41 42
@@ -605,6 +606,51 @@ struct vfsmount *lookup_mnt(struct path *path)
605 } 606 }
606} 607}
607 608
609static struct mountpoint *new_mountpoint(struct dentry *dentry)
610{
611 struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
612 struct mountpoint *mp;
613
614 list_for_each_entry(mp, chain, m_hash) {
615 if (mp->m_dentry == dentry) {
616 /* might be worth a WARN_ON() */
617 if (d_unlinked(dentry))
618 return ERR_PTR(-ENOENT);
619 mp->m_count++;
620 return mp;
621 }
622 }
623
624 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
625 if (!mp)
626 return ERR_PTR(-ENOMEM);
627
628 spin_lock(&dentry->d_lock);
629 if (d_unlinked(dentry)) {
630 spin_unlock(&dentry->d_lock);
631 kfree(mp);
632 return ERR_PTR(-ENOENT);
633 }
634 dentry->d_flags |= DCACHE_MOUNTED;
635 spin_unlock(&dentry->d_lock);
636 mp->m_dentry = dentry;
637 mp->m_count = 1;
638 list_add(&mp->m_hash, chain);
639 return mp;
640}
641
642static void put_mountpoint(struct mountpoint *mp)
643{
644 if (!--mp->m_count) {
645 struct dentry *dentry = mp->m_dentry;
646 spin_lock(&dentry->d_lock);
647 dentry->d_flags &= ~DCACHE_MOUNTED;
648 spin_unlock(&dentry->d_lock);
649 list_del(&mp->m_hash);
650 kfree(mp);
651 }
652}
653
608static inline int check_mnt(struct mount *mnt) 654static inline int check_mnt(struct mount *mnt)
609{ 655{
610 return mnt->mnt_ns == current->nsproxy->mnt_ns; 656 return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -633,27 +679,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
633} 679}
634 680
635/* 681/*
636 * Clear dentry's mounted state if it has no remaining mounts.
637 * vfsmount_lock must be held for write.
638 */
639static void dentry_reset_mounted(struct dentry *dentry)
640{
641 unsigned u;
642
643 for (u = 0; u < HASH_SIZE; u++) {
644 struct mount *p;
645
646 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
647 if (p->mnt_mountpoint == dentry)
648 return;
649 }
650 }
651 spin_lock(&dentry->d_lock);
652 dentry->d_flags &= ~DCACHE_MOUNTED;
653 spin_unlock(&dentry->d_lock);
654}
655
656/*
657 * vfsmount lock must be held for write 682 * vfsmount lock must be held for write
658 */ 683 */
659static void detach_mnt(struct mount *mnt, struct path *old_path) 684static void detach_mnt(struct mount *mnt, struct path *old_path)
@@ -664,32 +689,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
664 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 689 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
665 list_del_init(&mnt->mnt_child); 690 list_del_init(&mnt->mnt_child);
666 list_del_init(&mnt->mnt_hash); 691 list_del_init(&mnt->mnt_hash);
667 dentry_reset_mounted(old_path->dentry); 692 put_mountpoint(mnt->mnt_mp);
693 mnt->mnt_mp = NULL;
668} 694}
669 695
670/* 696/*
671 * vfsmount lock must be held for write 697 * vfsmount lock must be held for write
672 */ 698 */
673void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry, 699void mnt_set_mountpoint(struct mount *mnt,
700 struct mountpoint *mp,
674 struct mount *child_mnt) 701 struct mount *child_mnt)
675{ 702{
703 mp->m_count++;
676 mnt_add_count(mnt, 1); /* essentially, that's mntget */ 704 mnt_add_count(mnt, 1); /* essentially, that's mntget */
677 child_mnt->mnt_mountpoint = dget(dentry); 705 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
678 child_mnt->mnt_parent = mnt; 706 child_mnt->mnt_parent = mnt;
679 spin_lock(&dentry->d_lock); 707 child_mnt->mnt_mp = mp;
680 dentry->d_flags |= DCACHE_MOUNTED;
681 spin_unlock(&dentry->d_lock);
682} 708}
683 709
684/* 710/*
685 * vfsmount lock must be held for write 711 * vfsmount lock must be held for write
686 */ 712 */
687static void attach_mnt(struct mount *mnt, struct path *path) 713static void attach_mnt(struct mount *mnt,
714 struct mount *parent,
715 struct mountpoint *mp)
688{ 716{
689 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt); 717 mnt_set_mountpoint(parent, mp, mnt);
690 list_add_tail(&mnt->mnt_hash, mount_hashtable + 718 list_add_tail(&mnt->mnt_hash, mount_hashtable +
691 hash(path->mnt, path->dentry)); 719 hash(&parent->mnt, mp->m_dentry));
692 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); 720 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
693} 721}
694 722
695/* 723/*
@@ -1138,7 +1166,8 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1138 list_del_init(&p->mnt_child); 1166 list_del_init(&p->mnt_child);
1139 if (mnt_has_parent(p)) { 1167 if (mnt_has_parent(p)) {
1140 p->mnt_parent->mnt_ghosts++; 1168 p->mnt_parent->mnt_ghosts++;
1141 dentry_reset_mounted(p->mnt_mountpoint); 1169 put_mountpoint(p->mnt_mp);
1170 p->mnt_mp = NULL;
1142 } 1171 }
1143 change_mnt_propagation(p, MS_PRIVATE); 1172 change_mnt_propagation(p, MS_PRIVATE);
1144 } 1173 }
@@ -1323,8 +1352,7 @@ static bool mnt_ns_loop(struct path *path)
1323struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1352struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1324 int flag) 1353 int flag)
1325{ 1354{
1326 struct mount *res, *p, *q, *r; 1355 struct mount *res, *p, *q, *r, *parent;
1327 struct path path;
1328 1356
1329 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1357 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1330 return ERR_PTR(-EINVAL); 1358 return ERR_PTR(-EINVAL);
@@ -1351,14 +1379,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1351 q = q->mnt_parent; 1379 q = q->mnt_parent;
1352 } 1380 }
1353 p = s; 1381 p = s;
1354 path.mnt = &q->mnt; 1382 parent = q;
1355 path.dentry = p->mnt_mountpoint;
1356 q = clone_mnt(p, p->mnt.mnt_root, flag); 1383 q = clone_mnt(p, p->mnt.mnt_root, flag);
1357 if (IS_ERR(q)) 1384 if (IS_ERR(q))
1358 goto out; 1385 goto out;
1359 br_write_lock(&vfsmount_lock); 1386 br_write_lock(&vfsmount_lock);
1360 list_add_tail(&q->mnt_list, &res->mnt_list); 1387 list_add_tail(&q->mnt_list, &res->mnt_list);
1361 attach_mnt(q, &path); 1388 attach_mnt(q, parent, p->mnt_mp);
1362 br_write_unlock(&vfsmount_lock); 1389 br_write_unlock(&vfsmount_lock);
1363 } 1390 }
1364 } 1391 }
@@ -1505,11 +1532,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1505 * in allocations. 1532 * in allocations.
1506 */ 1533 */
1507static int attach_recursive_mnt(struct mount *source_mnt, 1534static int attach_recursive_mnt(struct mount *source_mnt,
1508 struct path *path, struct path *parent_path) 1535 struct mount *dest_mnt,
1536 struct mountpoint *dest_mp,
1537 struct path *parent_path)
1509{ 1538{
1510 LIST_HEAD(tree_list); 1539 LIST_HEAD(tree_list);
1511 struct mount *dest_mnt = real_mount(path->mnt);
1512 struct dentry *dest_dentry = path->dentry;
1513 struct mount *child, *p; 1540 struct mount *child, *p;
1514 int err; 1541 int err;
1515 1542
@@ -1518,7 +1545,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1518 if (err) 1545 if (err)
1519 goto out; 1546 goto out;
1520 } 1547 }
1521 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); 1548 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1522 if (err) 1549 if (err)
1523 goto out_cleanup_ids; 1550 goto out_cleanup_ids;
1524 1551
@@ -1530,10 +1557,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1530 } 1557 }
1531 if (parent_path) { 1558 if (parent_path) {
1532 detach_mnt(source_mnt, parent_path); 1559 detach_mnt(source_mnt, parent_path);
1533 attach_mnt(source_mnt, path); 1560 attach_mnt(source_mnt, dest_mnt, dest_mp);
1534 touch_mnt_namespace(source_mnt->mnt_ns); 1561 touch_mnt_namespace(source_mnt->mnt_ns);
1535 } else { 1562 } else {
1536 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1563 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1537 commit_tree(source_mnt); 1564 commit_tree(source_mnt);
1538 } 1565 }
1539 1566
@@ -1552,46 +1579,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1552 return err; 1579 return err;
1553} 1580}
1554 1581
1555static int lock_mount(struct path *path) 1582static struct mountpoint *lock_mount(struct path *path)
1556{ 1583{
1557 struct vfsmount *mnt; 1584 struct vfsmount *mnt;
1585 struct dentry *dentry = path->dentry;
1558retry: 1586retry:
1559 mutex_lock(&path->dentry->d_inode->i_mutex); 1587 mutex_lock(&dentry->d_inode->i_mutex);
1560 if (unlikely(cant_mount(path->dentry))) { 1588 if (unlikely(cant_mount(dentry))) {
1561 mutex_unlock(&path->dentry->d_inode->i_mutex); 1589 mutex_unlock(&dentry->d_inode->i_mutex);
1562 return -ENOENT; 1590 return ERR_PTR(-ENOENT);
1563 } 1591 }
1564 down_write(&namespace_sem); 1592 down_write(&namespace_sem);
1565 mnt = lookup_mnt(path); 1593 mnt = lookup_mnt(path);
1566 if (likely(!mnt)) 1594 if (likely(!mnt)) {
1567 return 0; 1595 struct mountpoint *mp = new_mountpoint(dentry);
1596 if (IS_ERR(mp)) {
1597 up_write(&namespace_sem);
1598 mutex_unlock(&dentry->d_inode->i_mutex);
1599 return mp;
1600 }
1601 return mp;
1602 }
1568 up_write(&namespace_sem); 1603 up_write(&namespace_sem);
1569 mutex_unlock(&path->dentry->d_inode->i_mutex); 1604 mutex_unlock(&path->dentry->d_inode->i_mutex);
1570 path_put(path); 1605 path_put(path);
1571 path->mnt = mnt; 1606 path->mnt = mnt;
1572 path->dentry = dget(mnt->mnt_root); 1607 dentry = path->dentry = dget(mnt->mnt_root);
1573 goto retry; 1608 goto retry;
1574} 1609}
1575 1610
1576static void unlock_mount(struct path *path) 1611static void unlock_mount(struct mountpoint *where)
1577{ 1612{
1613 struct dentry *dentry = where->m_dentry;
1614 put_mountpoint(where);
1578 up_write(&namespace_sem); 1615 up_write(&namespace_sem);
1579 mutex_unlock(&path->dentry->d_inode->i_mutex); 1616 mutex_unlock(&dentry->d_inode->i_mutex);
1580} 1617}
1581 1618
1582static int graft_tree(struct mount *mnt, struct path *path) 1619static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1583{ 1620{
1584 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) 1621 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1585 return -EINVAL; 1622 return -EINVAL;
1586 1623
1587 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1624 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1588 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) 1625 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1589 return -ENOTDIR; 1626 return -ENOTDIR;
1590 1627
1591 if (d_unlinked(path->dentry)) 1628 return attach_recursive_mnt(mnt, p, mp, NULL);
1592 return -ENOENT;
1593
1594 return attach_recursive_mnt(mnt, path, NULL);
1595} 1629}
1596 1630
1597/* 1631/*
@@ -1654,7 +1688,8 @@ static int do_loopback(struct path *path, const char *old_name,
1654{ 1688{
1655 LIST_HEAD(umount_list); 1689 LIST_HEAD(umount_list);
1656 struct path old_path; 1690 struct path old_path;
1657 struct mount *mnt = NULL, *old; 1691 struct mount *mnt = NULL, *old, *parent;
1692 struct mountpoint *mp;
1658 int err; 1693 int err;
1659 if (!old_name || !*old_name) 1694 if (!old_name || !*old_name)
1660 return -EINVAL; 1695 return -EINVAL;
@@ -1666,17 +1701,19 @@ static int do_loopback(struct path *path, const char *old_name,
1666 if (mnt_ns_loop(&old_path)) 1701 if (mnt_ns_loop(&old_path))
1667 goto out; 1702 goto out;
1668 1703
1669 err = lock_mount(path); 1704 mp = lock_mount(path);
1670 if (err) 1705 err = PTR_ERR(mp);
1706 if (IS_ERR(mp))
1671 goto out; 1707 goto out;
1672 1708
1673 old = real_mount(old_path.mnt); 1709 old = real_mount(old_path.mnt);
1710 parent = real_mount(path->mnt);
1674 1711
1675 err = -EINVAL; 1712 err = -EINVAL;
1676 if (IS_MNT_UNBINDABLE(old)) 1713 if (IS_MNT_UNBINDABLE(old))
1677 goto out2; 1714 goto out2;
1678 1715
1679 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) 1716 if (!check_mnt(parent) || !check_mnt(old))
1680 goto out2; 1717 goto out2;
1681 1718
1682 if (recurse) 1719 if (recurse)
@@ -1689,14 +1726,14 @@ static int do_loopback(struct path *path, const char *old_name,
1689 goto out2; 1726 goto out2;
1690 } 1727 }
1691 1728
1692 err = graft_tree(mnt, path); 1729 err = graft_tree(mnt, parent, mp);
1693 if (err) { 1730 if (err) {
1694 br_write_lock(&vfsmount_lock); 1731 br_write_lock(&vfsmount_lock);
1695 umount_tree(mnt, 0, &umount_list); 1732 umount_tree(mnt, 0, &umount_list);
1696 br_write_unlock(&vfsmount_lock); 1733 br_write_unlock(&vfsmount_lock);
1697 } 1734 }
1698out2: 1735out2:
1699 unlock_mount(path); 1736 unlock_mount(mp);
1700 release_mounts(&umount_list); 1737 release_mounts(&umount_list);
1701out: 1738out:
1702 path_put(&old_path); 1739 path_put(&old_path);
@@ -1779,6 +1816,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1779 struct path old_path, parent_path; 1816 struct path old_path, parent_path;
1780 struct mount *p; 1817 struct mount *p;
1781 struct mount *old; 1818 struct mount *old;
1819 struct mountpoint *mp;
1782 int err; 1820 int err;
1783 if (!old_name || !*old_name) 1821 if (!old_name || !*old_name)
1784 return -EINVAL; 1822 return -EINVAL;
@@ -1786,8 +1824,9 @@ static int do_move_mount(struct path *path, const char *old_name)
1786 if (err) 1824 if (err)
1787 return err; 1825 return err;
1788 1826
1789 err = lock_mount(path); 1827 mp = lock_mount(path);
1790 if (err < 0) 1828 err = PTR_ERR(mp);
1829 if (IS_ERR(mp))
1791 goto out; 1830 goto out;
1792 1831
1793 old = real_mount(old_path.mnt); 1832 old = real_mount(old_path.mnt);
@@ -1797,9 +1836,6 @@ static int do_move_mount(struct path *path, const char *old_name)
1797 if (!check_mnt(p) || !check_mnt(old)) 1836 if (!check_mnt(p) || !check_mnt(old))
1798 goto out1; 1837 goto out1;
1799 1838
1800 if (d_unlinked(path->dentry))
1801 goto out1;
1802
1803 err = -EINVAL; 1839 err = -EINVAL;
1804 if (old_path.dentry != old_path.mnt->mnt_root) 1840 if (old_path.dentry != old_path.mnt->mnt_root)
1805 goto out1; 1841 goto out1;
@@ -1826,7 +1862,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1826 if (p == old) 1862 if (p == old)
1827 goto out1; 1863 goto out1;
1828 1864
1829 err = attach_recursive_mnt(old, path, &parent_path); 1865 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
1830 if (err) 1866 if (err)
1831 goto out1; 1867 goto out1;
1832 1868
@@ -1834,7 +1870,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1834 * automatically */ 1870 * automatically */
1835 list_del_init(&old->mnt_expire); 1871 list_del_init(&old->mnt_expire);
1836out1: 1872out1:
1837 unlock_mount(path); 1873 unlock_mount(mp);
1838out: 1874out:
1839 if (!err) 1875 if (!err)
1840 path_put(&parent_path); 1876 path_put(&parent_path);
@@ -1870,21 +1906,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1870 */ 1906 */
1871static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) 1907static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1872{ 1908{
1909 struct mountpoint *mp;
1910 struct mount *parent;
1873 int err; 1911 int err;
1874 1912
1875 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); 1913 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1876 1914
1877 err = lock_mount(path); 1915 mp = lock_mount(path);
1878 if (err) 1916 if (IS_ERR(mp))
1879 return err; 1917 return PTR_ERR(mp);
1880 1918
1919 parent = real_mount(path->mnt);
1881 err = -EINVAL; 1920 err = -EINVAL;
1882 if (unlikely(!check_mnt(real_mount(path->mnt)))) { 1921 if (unlikely(!check_mnt(parent))) {
1883 /* that's acceptable only for automounts done in private ns */ 1922 /* that's acceptable only for automounts done in private ns */
1884 if (!(mnt_flags & MNT_SHRINKABLE)) 1923 if (!(mnt_flags & MNT_SHRINKABLE))
1885 goto unlock; 1924 goto unlock;
1886 /* ... and for those we'd better have mountpoint still alive */ 1925 /* ... and for those we'd better have mountpoint still alive */
1887 if (!real_mount(path->mnt)->mnt_ns) 1926 if (!parent->mnt_ns)
1888 goto unlock; 1927 goto unlock;
1889 } 1928 }
1890 1929
@@ -1899,10 +1938,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1899 goto unlock; 1938 goto unlock;
1900 1939
1901 newmnt->mnt.mnt_flags = mnt_flags; 1940 newmnt->mnt.mnt_flags = mnt_flags;
1902 err = graft_tree(newmnt, path); 1941 err = graft_tree(newmnt, parent, mp);
1903 1942
1904unlock: 1943unlock:
1905 unlock_mount(path); 1944 unlock_mount(mp);
1906 return err; 1945 return err;
1907} 1946}
1908 1947
@@ -2543,7 +2582,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2543 const char __user *, put_old) 2582 const char __user *, put_old)
2544{ 2583{
2545 struct path new, old, parent_path, root_parent, root; 2584 struct path new, old, parent_path, root_parent, root;
2546 struct mount *new_mnt, *root_mnt; 2585 struct mount *new_mnt, *root_mnt, *old_mnt;
2586 struct mountpoint *old_mp, *root_mp;
2547 int error; 2587 int error;
2548 2588
2549 if (!may_mount()) 2589 if (!may_mount())
@@ -2562,14 +2602,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2562 goto out2; 2602 goto out2;
2563 2603
2564 get_fs_root(current->fs, &root); 2604 get_fs_root(current->fs, &root);
2565 error = lock_mount(&old); 2605 old_mp = lock_mount(&old);
2566 if (error) 2606 error = PTR_ERR(old_mp);
2607 if (IS_ERR(old_mp))
2567 goto out3; 2608 goto out3;
2568 2609
2569 error = -EINVAL; 2610 error = -EINVAL;
2570 new_mnt = real_mount(new.mnt); 2611 new_mnt = real_mount(new.mnt);
2571 root_mnt = real_mount(root.mnt); 2612 root_mnt = real_mount(root.mnt);
2572 if (IS_MNT_SHARED(real_mount(old.mnt)) || 2613 old_mnt = real_mount(old.mnt);
2614 if (IS_MNT_SHARED(old_mnt) ||
2573 IS_MNT_SHARED(new_mnt->mnt_parent) || 2615 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2574 IS_MNT_SHARED(root_mnt->mnt_parent)) 2616 IS_MNT_SHARED(root_mnt->mnt_parent))
2575 goto out4; 2617 goto out4;
@@ -2578,37 +2620,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2578 error = -ENOENT; 2620 error = -ENOENT;
2579 if (d_unlinked(new.dentry)) 2621 if (d_unlinked(new.dentry))
2580 goto out4; 2622 goto out4;
2581 if (d_unlinked(old.dentry))
2582 goto out4;
2583 error = -EBUSY; 2623 error = -EBUSY;
2584 if (new.mnt == root.mnt || 2624 if (new_mnt == root_mnt || old_mnt == root_mnt)
2585 old.mnt == root.mnt)
2586 goto out4; /* loop, on the same file system */ 2625 goto out4; /* loop, on the same file system */
2587 error = -EINVAL; 2626 error = -EINVAL;
2588 if (root.mnt->mnt_root != root.dentry) 2627 if (root.mnt->mnt_root != root.dentry)
2589 goto out4; /* not a mountpoint */ 2628 goto out4; /* not a mountpoint */
2590 if (!mnt_has_parent(root_mnt)) 2629 if (!mnt_has_parent(root_mnt))
2591 goto out4; /* not attached */ 2630 goto out4; /* not attached */
2631 root_mp = root_mnt->mnt_mp;
2592 if (new.mnt->mnt_root != new.dentry) 2632 if (new.mnt->mnt_root != new.dentry)
2593 goto out4; /* not a mountpoint */ 2633 goto out4; /* not a mountpoint */
2594 if (!mnt_has_parent(new_mnt)) 2634 if (!mnt_has_parent(new_mnt))
2595 goto out4; /* not attached */ 2635 goto out4; /* not attached */
2596 /* make sure we can reach put_old from new_root */ 2636 /* make sure we can reach put_old from new_root */
2597 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) 2637 if (!is_path_reachable(old_mnt, old.dentry, &new))
2598 goto out4; 2638 goto out4;
2639 root_mp->m_count++; /* pin it so it won't go away */
2599 br_write_lock(&vfsmount_lock); 2640 br_write_lock(&vfsmount_lock);
2600 detach_mnt(new_mnt, &parent_path); 2641 detach_mnt(new_mnt, &parent_path);
2601 detach_mnt(root_mnt, &root_parent); 2642 detach_mnt(root_mnt, &root_parent);
2602 /* mount old root on put_old */ 2643 /* mount old root on put_old */
2603 attach_mnt(root_mnt, &old); 2644 attach_mnt(root_mnt, old_mnt, old_mp);
2604 /* mount new_root on / */ 2645 /* mount new_root on / */
2605 attach_mnt(new_mnt, &root_parent); 2646 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2606 touch_mnt_namespace(current->nsproxy->mnt_ns); 2647 touch_mnt_namespace(current->nsproxy->mnt_ns);
2607 br_write_unlock(&vfsmount_lock); 2648 br_write_unlock(&vfsmount_lock);
2608 chroot_fs_refs(&root, &new); 2649 chroot_fs_refs(&root, &new);
2650 put_mountpoint(root_mp);
2609 error = 0; 2651 error = 0;
2610out4: 2652out4:
2611 unlock_mount(&old); 2653 unlock_mount(old_mp);
2612 if (!error) { 2654 if (!error) {
2613 path_put(&root_parent); 2655 path_put(&root_parent);
2614 path_put(&parent_path); 2656 path_put(&parent_path);
@@ -2663,14 +2705,17 @@ void __init mnt_init(void)
2663 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2705 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2664 2706
2665 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2707 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2708 mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2666 2709
2667 if (!mount_hashtable) 2710 if (!mount_hashtable || !mountpoint_hashtable)
2668 panic("Failed to allocate mount hash table\n"); 2711 panic("Failed to allocate mount hash table\n");
2669 2712
2670 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); 2713 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2671 2714
2672 for (u = 0; u < HASH_SIZE; u++) 2715 for (u = 0; u < HASH_SIZE; u++)
2673 INIT_LIST_HEAD(&mount_hashtable[u]); 2716 INIT_LIST_HEAD(&mount_hashtable[u]);
2717 for (u = 0; u < HASH_SIZE; u++)
2718 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2674 2719
2675 br_lock_init(&vfsmount_lock); 2720 br_lock_init(&vfsmount_lock);
2676 2721
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..98e0d3a23fac 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -217,7 +217,7 @@ static struct mount *get_source(struct mount *dest,
217 * @source_mnt: source mount. 217 * @source_mnt: source mount.
218 * @tree_list : list of heads of trees to be attached. 218 * @tree_list : list of heads of trees to be attached.
219 */ 219 */
220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 220int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
221 struct mount *source_mnt, struct list_head *tree_list) 221 struct mount *source_mnt, struct list_head *tree_list)
222{ 222{
223 struct mount *m, *child; 223 struct mount *m, *child;
@@ -244,8 +244,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
244 goto out; 244 goto out;
245 } 245 }
246 246
247 if (is_subdir(dest_dentry, m->mnt.mnt_root)) { 247 if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
248 mnt_set_mountpoint(m, dest_dentry, child); 248 mnt_set_mountpoint(m, dest_mp, child);
249 list_add_tail(&child->mnt_hash, tree_list); 249 list_add_tail(&child->mnt_hash, tree_list);
250 } else { 250 } else {
251 /* 251 /*
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..f4357d3a0a44 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -31,14 +31,14 @@ static inline void set_mnt_shared(struct mount *mnt)
31} 31}
32 32
33void change_mnt_propagation(struct mount *, int); 33void change_mnt_propagation(struct mount *, int);
34int propagate_mnt(struct mount *, struct dentry *, struct mount *, 34int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
35 struct list_head *); 35 struct list_head *);
36int propagate_umount(struct list_head *); 36int propagate_umount(struct list_head *);
37int propagate_mount_busy(struct mount *, int); 37int propagate_mount_busy(struct mount *, int);
38void mnt_release_group_id(struct mount *); 38void mnt_release_group_id(struct mount *);
39int get_dominating_id(struct mount *mnt, const struct path *root); 39int get_dominating_id(struct mount *mnt, const struct path *root);
40unsigned int mnt_get_count(struct mount *mnt); 40unsigned int mnt_get_count(struct mount *mnt);
41void mnt_set_mountpoint(struct mount *, struct dentry *, 41void mnt_set_mountpoint(struct mount *, struct mountpoint *,
42 struct mount *); 42 struct mount *);
43void release_mounts(struct list_head *); 43void release_mounts(struct list_head *);
44void umount_tree(struct mount *, int, struct list_head *); 44void umount_tree(struct mount *, int, struct list_head *);