aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 20:51:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 20:51:54 -0400
commit20b4fb485227404329e41ad15588afad3df23050 (patch)
treef3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs/namespace.c
parentb9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff)
parentac3e3c5b1164397656df81b9e9ab4991184d3236 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS updates from Al Viro, Misc cleanups all over the place, mainly wrt /proc interfaces (switch create_proc_entry to proc_create(), get rid of the deprecated create_proc_read_entry() in favor of using proc_create_data() and seq_file etc). 7kloc removed. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (204 commits) don't bother with deferred freeing of fdtables proc: Move non-public stuff from linux/proc_fs.h to fs/proc/internal.h proc: Make the PROC_I() and PDE() macros internal to procfs proc: Supply a function to remove a proc entry by PDE take cgroup_open() and cpuset_open() to fs/proc/base.c ppc: Clean up scanlog ppc: Clean up rtas_flash driver somewhat hostap: proc: Use remove_proc_subtree() drm: proc: Use remove_proc_subtree() drm: proc: Use minor->index to label things, not PDE->name drm: Constify drm_proc_list[] zoran: Don't print proc_dir_entry data in debug reiserfs: Don't access the proc_dir_entry in r_open(), r_start() r_show() proc: Supply an accessor for getting the data from a PDE's parent airo: Use remove_proc_subtree() rtl8192u: Don't need to save device proc dir PDE rtl8187se: Use a dir under /proc/net/r8180/ proc: Add proc_mkdir_data() proc: Move some bits from linux/proc_fs.h to linux/{of.h,signal.h,tty.h} proc: Move PDE_NET() to fs/proc/proc_net.c ...
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c341
1 files changed, 195 insertions, 146 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 341d3f564082..b4f96a5230a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -21,7 +21,8 @@
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_ns.h>
25#include <linux/magic.h>
25#include "pnode.h" 26#include "pnode.h"
26#include "internal.h" 27#include "internal.h"
27 28
@@ -36,6 +37,7 @@ static int mnt_id_start = 0;
36static int mnt_group_start = 1; 37static int mnt_group_start = 1;
37 38
38static struct list_head *mount_hashtable __read_mostly; 39static struct list_head *mount_hashtable __read_mostly;
40static struct list_head *mountpoint_hashtable __read_mostly;
39static struct kmem_cache *mnt_cache __read_mostly; 41static struct kmem_cache *mnt_cache __read_mostly;
40static struct rw_semaphore namespace_sem; 42static struct rw_semaphore namespace_sem;
41 43
@@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path)
605 } 607 }
606} 608}
607 609
610static struct mountpoint *new_mountpoint(struct dentry *dentry)
611{
612 struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
613 struct mountpoint *mp;
614
615 list_for_each_entry(mp, chain, m_hash) {
616 if (mp->m_dentry == dentry) {
617 /* might be worth a WARN_ON() */
618 if (d_unlinked(dentry))
619 return ERR_PTR(-ENOENT);
620 mp->m_count++;
621 return mp;
622 }
623 }
624
625 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
626 if (!mp)
627 return ERR_PTR(-ENOMEM);
628
629 spin_lock(&dentry->d_lock);
630 if (d_unlinked(dentry)) {
631 spin_unlock(&dentry->d_lock);
632 kfree(mp);
633 return ERR_PTR(-ENOENT);
634 }
635 dentry->d_flags |= DCACHE_MOUNTED;
636 spin_unlock(&dentry->d_lock);
637 mp->m_dentry = dentry;
638 mp->m_count = 1;
639 list_add(&mp->m_hash, chain);
640 return mp;
641}
642
643static void put_mountpoint(struct mountpoint *mp)
644{
645 if (!--mp->m_count) {
646 struct dentry *dentry = mp->m_dentry;
647 spin_lock(&dentry->d_lock);
648 dentry->d_flags &= ~DCACHE_MOUNTED;
649 spin_unlock(&dentry->d_lock);
650 list_del(&mp->m_hash);
651 kfree(mp);
652 }
653}
654
608static inline int check_mnt(struct mount *mnt) 655static inline int check_mnt(struct mount *mnt)
609{ 656{
610 return mnt->mnt_ns == current->nsproxy->mnt_ns; 657 return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
633} 680}
634 681
635/* 682/*
636 * Clear dentry's mounted state if it has no remaining mounts.
637 * vfsmount_lock must be held for write.
638 */
639static void dentry_reset_mounted(struct dentry *dentry)
640{
641 unsigned u;
642
643 for (u = 0; u < HASH_SIZE; u++) {
644 struct mount *p;
645
646 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
647 if (p->mnt_mountpoint == dentry)
648 return;
649 }
650 }
651 spin_lock(&dentry->d_lock);
652 dentry->d_flags &= ~DCACHE_MOUNTED;
653 spin_unlock(&dentry->d_lock);
654}
655
656/*
657 * vfsmount lock must be held for write 683 * vfsmount lock must be held for write
658 */ 684 */
659static void detach_mnt(struct mount *mnt, struct path *old_path) 685static void detach_mnt(struct mount *mnt, struct path *old_path)
@@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
664 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 690 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
665 list_del_init(&mnt->mnt_child); 691 list_del_init(&mnt->mnt_child);
666 list_del_init(&mnt->mnt_hash); 692 list_del_init(&mnt->mnt_hash);
667 dentry_reset_mounted(old_path->dentry); 693 put_mountpoint(mnt->mnt_mp);
694 mnt->mnt_mp = NULL;
668} 695}
669 696
670/* 697/*
671 * vfsmount lock must be held for write 698 * vfsmount lock must be held for write
672 */ 699 */
673void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry, 700void mnt_set_mountpoint(struct mount *mnt,
701 struct mountpoint *mp,
674 struct mount *child_mnt) 702 struct mount *child_mnt)
675{ 703{
704 mp->m_count++;
676 mnt_add_count(mnt, 1); /* essentially, that's mntget */ 705 mnt_add_count(mnt, 1); /* essentially, that's mntget */
677 child_mnt->mnt_mountpoint = dget(dentry); 706 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
678 child_mnt->mnt_parent = mnt; 707 child_mnt->mnt_parent = mnt;
679 spin_lock(&dentry->d_lock); 708 child_mnt->mnt_mp = mp;
680 dentry->d_flags |= DCACHE_MOUNTED;
681 spin_unlock(&dentry->d_lock);
682} 709}
683 710
684/* 711/*
685 * vfsmount lock must be held for write 712 * vfsmount lock must be held for write
686 */ 713 */
687static void attach_mnt(struct mount *mnt, struct path *path) 714static void attach_mnt(struct mount *mnt,
715 struct mount *parent,
716 struct mountpoint *mp)
688{ 717{
689 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt); 718 mnt_set_mountpoint(parent, mp, mnt);
690 list_add_tail(&mnt->mnt_hash, mount_hashtable + 719 list_add_tail(&mnt->mnt_hash, mount_hashtable +
691 hash(path->mnt, path->dentry)); 720 hash(&parent->mnt, mp->m_dentry));
692 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); 721 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
693} 722}
694 723
695/* 724/*
@@ -1095,11 +1124,23 @@ int may_umount(struct vfsmount *mnt)
1095 1124
1096EXPORT_SYMBOL(may_umount); 1125EXPORT_SYMBOL(may_umount);
1097 1126
1098void release_mounts(struct list_head *head) 1127static LIST_HEAD(unmounted); /* protected by namespace_sem */
1128
1129static void namespace_unlock(void)
1099{ 1130{
1100 struct mount *mnt; 1131 struct mount *mnt;
1101 while (!list_empty(head)) { 1132 LIST_HEAD(head);
1102 mnt = list_first_entry(head, struct mount, mnt_hash); 1133
1134 if (likely(list_empty(&unmounted))) {
1135 up_write(&namespace_sem);
1136 return;
1137 }
1138
1139 list_splice_init(&unmounted, &head);
1140 up_write(&namespace_sem);
1141
1142 while (!list_empty(&head)) {
1143 mnt = list_first_entry(&head, struct mount, mnt_hash);
1103 list_del_init(&mnt->mnt_hash); 1144 list_del_init(&mnt->mnt_hash);
1104 if (mnt_has_parent(mnt)) { 1145 if (mnt_has_parent(mnt)) {
1105 struct dentry *dentry; 1146 struct dentry *dentry;
@@ -1119,11 +1160,16 @@ void release_mounts(struct list_head *head)
1119 } 1160 }
1120} 1161}
1121 1162
1163static inline void namespace_lock(void)
1164{
1165 down_write(&namespace_sem);
1166}
1167
1122/* 1168/*
1123 * vfsmount lock must be held for write 1169 * vfsmount lock must be held for write
1124 * namespace_sem must be held for write 1170 * namespace_sem must be held for write
1125 */ 1171 */
1126void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) 1172void umount_tree(struct mount *mnt, int propagate)
1127{ 1173{
1128 LIST_HEAD(tmp_list); 1174 LIST_HEAD(tmp_list);
1129 struct mount *p; 1175 struct mount *p;
@@ -1142,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1142 list_del_init(&p->mnt_child); 1188 list_del_init(&p->mnt_child);
1143 if (mnt_has_parent(p)) { 1189 if (mnt_has_parent(p)) {
1144 p->mnt_parent->mnt_ghosts++; 1190 p->mnt_parent->mnt_ghosts++;
1145 dentry_reset_mounted(p->mnt_mountpoint); 1191 put_mountpoint(p->mnt_mp);
1192 p->mnt_mp = NULL;
1146 } 1193 }
1147 change_mnt_propagation(p, MS_PRIVATE); 1194 change_mnt_propagation(p, MS_PRIVATE);
1148 } 1195 }
1149 list_splice(&tmp_list, kill); 1196 list_splice(&tmp_list, &unmounted);
1150} 1197}
1151 1198
1152static void shrink_submounts(struct mount *mnt, struct list_head *umounts); 1199static void shrink_submounts(struct mount *mnt);
1153 1200
1154static int do_umount(struct mount *mnt, int flags) 1201static int do_umount(struct mount *mnt, int flags)
1155{ 1202{
1156 struct super_block *sb = mnt->mnt.mnt_sb; 1203 struct super_block *sb = mnt->mnt.mnt_sb;
1157 int retval; 1204 int retval;
1158 LIST_HEAD(umount_list);
1159 1205
1160 retval = security_sb_umount(&mnt->mnt, flags); 1206 retval = security_sb_umount(&mnt->mnt, flags);
1161 if (retval) 1207 if (retval)
@@ -1222,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags)
1222 return retval; 1268 return retval;
1223 } 1269 }
1224 1270
1225 down_write(&namespace_sem); 1271 namespace_lock();
1226 br_write_lock(&vfsmount_lock); 1272 br_write_lock(&vfsmount_lock);
1227 event++; 1273 event++;
1228 1274
1229 if (!(flags & MNT_DETACH)) 1275 if (!(flags & MNT_DETACH))
1230 shrink_submounts(mnt, &umount_list); 1276 shrink_submounts(mnt);
1231 1277
1232 retval = -EBUSY; 1278 retval = -EBUSY;
1233 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { 1279 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1234 if (!list_empty(&mnt->mnt_list)) 1280 if (!list_empty(&mnt->mnt_list))
1235 umount_tree(mnt, 1, &umount_list); 1281 umount_tree(mnt, 1);
1236 retval = 0; 1282 retval = 0;
1237 } 1283 }
1238 br_write_unlock(&vfsmount_lock); 1284 br_write_unlock(&vfsmount_lock);
1239 up_write(&namespace_sem); 1285 namespace_unlock();
1240 release_mounts(&umount_list);
1241 return retval; 1286 return retval;
1242} 1287}
1243 1288
@@ -1310,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path)
1310 * mount namespace loop? 1355 * mount namespace loop?
1311 */ 1356 */
1312 struct inode *inode = path->dentry->d_inode; 1357 struct inode *inode = path->dentry->d_inode;
1313 struct proc_inode *ei; 1358 struct proc_ns *ei;
1314 struct mnt_namespace *mnt_ns; 1359 struct mnt_namespace *mnt_ns;
1315 1360
1316 if (!proc_ns_inode(inode)) 1361 if (!proc_ns_inode(inode))
1317 return false; 1362 return false;
1318 1363
1319 ei = PROC_I(inode); 1364 ei = get_proc_ns(inode);
1320 if (ei->ns_ops != &mntns_operations) 1365 if (ei->ns_ops != &mntns_operations)
1321 return false; 1366 return false;
1322 1367
@@ -1327,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path)
1327struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1372struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1328 int flag) 1373 int flag)
1329{ 1374{
1330 struct mount *res, *p, *q, *r; 1375 struct mount *res, *p, *q, *r, *parent;
1331 struct path path;
1332 1376
1333 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1377 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1334 return ERR_PTR(-EINVAL); 1378 return ERR_PTR(-EINVAL);
@@ -1355,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1355 q = q->mnt_parent; 1399 q = q->mnt_parent;
1356 } 1400 }
1357 p = s; 1401 p = s;
1358 path.mnt = &q->mnt; 1402 parent = q;
1359 path.dentry = p->mnt_mountpoint;
1360 q = clone_mnt(p, p->mnt.mnt_root, flag); 1403 q = clone_mnt(p, p->mnt.mnt_root, flag);
1361 if (IS_ERR(q)) 1404 if (IS_ERR(q))
1362 goto out; 1405 goto out;
1363 br_write_lock(&vfsmount_lock); 1406 br_write_lock(&vfsmount_lock);
1364 list_add_tail(&q->mnt_list, &res->mnt_list); 1407 list_add_tail(&q->mnt_list, &res->mnt_list);
1365 attach_mnt(q, &path); 1408 attach_mnt(q, parent, p->mnt_mp);
1366 br_write_unlock(&vfsmount_lock); 1409 br_write_unlock(&vfsmount_lock);
1367 } 1410 }
1368 } 1411 }
1369 return res; 1412 return res;
1370out: 1413out:
1371 if (res) { 1414 if (res) {
1372 LIST_HEAD(umount_list);
1373 br_write_lock(&vfsmount_lock); 1415 br_write_lock(&vfsmount_lock);
1374 umount_tree(res, 0, &umount_list); 1416 umount_tree(res, 0);
1375 br_write_unlock(&vfsmount_lock); 1417 br_write_unlock(&vfsmount_lock);
1376 release_mounts(&umount_list);
1377 } 1418 }
1378 return q; 1419 return q;
1379} 1420}
@@ -1383,10 +1424,10 @@ out:
1383struct vfsmount *collect_mounts(struct path *path) 1424struct vfsmount *collect_mounts(struct path *path)
1384{ 1425{
1385 struct mount *tree; 1426 struct mount *tree;
1386 down_write(&namespace_sem); 1427 namespace_lock();
1387 tree = copy_tree(real_mount(path->mnt), path->dentry, 1428 tree = copy_tree(real_mount(path->mnt), path->dentry,
1388 CL_COPY_ALL | CL_PRIVATE); 1429 CL_COPY_ALL | CL_PRIVATE);
1389 up_write(&namespace_sem); 1430 namespace_unlock();
1390 if (IS_ERR(tree)) 1431 if (IS_ERR(tree))
1391 return NULL; 1432 return NULL;
1392 return &tree->mnt; 1433 return &tree->mnt;
@@ -1394,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path)
1394 1435
1395void drop_collected_mounts(struct vfsmount *mnt) 1436void drop_collected_mounts(struct vfsmount *mnt)
1396{ 1437{
1397 LIST_HEAD(umount_list); 1438 namespace_lock();
1398 down_write(&namespace_sem);
1399 br_write_lock(&vfsmount_lock); 1439 br_write_lock(&vfsmount_lock);
1400 umount_tree(real_mount(mnt), 0, &umount_list); 1440 umount_tree(real_mount(mnt), 0);
1401 br_write_unlock(&vfsmount_lock); 1441 br_write_unlock(&vfsmount_lock);
1402 up_write(&namespace_sem); 1442 namespace_unlock();
1403 release_mounts(&umount_list);
1404} 1443}
1405 1444
1406int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 1445int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
@@ -1509,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1509 * in allocations. 1548 * in allocations.
1510 */ 1549 */
1511static int attach_recursive_mnt(struct mount *source_mnt, 1550static int attach_recursive_mnt(struct mount *source_mnt,
1512 struct path *path, struct path *parent_path) 1551 struct mount *dest_mnt,
1552 struct mountpoint *dest_mp,
1553 struct path *parent_path)
1513{ 1554{
1514 LIST_HEAD(tree_list); 1555 LIST_HEAD(tree_list);
1515 struct mount *dest_mnt = real_mount(path->mnt);
1516 struct dentry *dest_dentry = path->dentry;
1517 struct mount *child, *p; 1556 struct mount *child, *p;
1518 int err; 1557 int err;
1519 1558
@@ -1522,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1522 if (err) 1561 if (err)
1523 goto out; 1562 goto out;
1524 } 1563 }
1525 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); 1564 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1526 if (err) 1565 if (err)
1527 goto out_cleanup_ids; 1566 goto out_cleanup_ids;
1528 1567
@@ -1534,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1534 } 1573 }
1535 if (parent_path) { 1574 if (parent_path) {
1536 detach_mnt(source_mnt, parent_path); 1575 detach_mnt(source_mnt, parent_path);
1537 attach_mnt(source_mnt, path); 1576 attach_mnt(source_mnt, dest_mnt, dest_mp);
1538 touch_mnt_namespace(source_mnt->mnt_ns); 1577 touch_mnt_namespace(source_mnt->mnt_ns);
1539 } else { 1578 } else {
1540 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1579 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1541 commit_tree(source_mnt); 1580 commit_tree(source_mnt);
1542 } 1581 }
1543 1582
@@ -1556,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1556 return err; 1595 return err;
1557} 1596}
1558 1597
1559static int lock_mount(struct path *path) 1598static struct mountpoint *lock_mount(struct path *path)
1560{ 1599{
1561 struct vfsmount *mnt; 1600 struct vfsmount *mnt;
1601 struct dentry *dentry = path->dentry;
1562retry: 1602retry:
1563 mutex_lock(&path->dentry->d_inode->i_mutex); 1603 mutex_lock(&dentry->d_inode->i_mutex);
1564 if (unlikely(cant_mount(path->dentry))) { 1604 if (unlikely(cant_mount(dentry))) {
1565 mutex_unlock(&path->dentry->d_inode->i_mutex); 1605 mutex_unlock(&dentry->d_inode->i_mutex);
1566 return -ENOENT; 1606 return ERR_PTR(-ENOENT);
1567 } 1607 }
1568 down_write(&namespace_sem); 1608 namespace_lock();
1569 mnt = lookup_mnt(path); 1609 mnt = lookup_mnt(path);
1570 if (likely(!mnt)) 1610 if (likely(!mnt)) {
1571 return 0; 1611 struct mountpoint *mp = new_mountpoint(dentry);
1572 up_write(&namespace_sem); 1612 if (IS_ERR(mp)) {
1613 namespace_unlock();
1614 mutex_unlock(&dentry->d_inode->i_mutex);
1615 return mp;
1616 }
1617 return mp;
1618 }
1619 namespace_unlock();
1573 mutex_unlock(&path->dentry->d_inode->i_mutex); 1620 mutex_unlock(&path->dentry->d_inode->i_mutex);
1574 path_put(path); 1621 path_put(path);
1575 path->mnt = mnt; 1622 path->mnt = mnt;
1576 path->dentry = dget(mnt->mnt_root); 1623 dentry = path->dentry = dget(mnt->mnt_root);
1577 goto retry; 1624 goto retry;
1578} 1625}
1579 1626
1580static void unlock_mount(struct path *path) 1627static void unlock_mount(struct mountpoint *where)
1581{ 1628{
1582 up_write(&namespace_sem); 1629 struct dentry *dentry = where->m_dentry;
1583 mutex_unlock(&path->dentry->d_inode->i_mutex); 1630 put_mountpoint(where);
1631 namespace_unlock();
1632 mutex_unlock(&dentry->d_inode->i_mutex);
1584} 1633}
1585 1634
1586static int graft_tree(struct mount *mnt, struct path *path) 1635static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1587{ 1636{
1588 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) 1637 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1589 return -EINVAL; 1638 return -EINVAL;
1590 1639
1591 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1640 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1592 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) 1641 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1593 return -ENOTDIR; 1642 return -ENOTDIR;
1594 1643
1595 if (d_unlinked(path->dentry)) 1644 return attach_recursive_mnt(mnt, p, mp, NULL);
1596 return -ENOENT;
1597
1598 return attach_recursive_mnt(mnt, path, NULL);
1599} 1645}
1600 1646
1601/* 1647/*
@@ -1633,7 +1679,7 @@ static int do_change_type(struct path *path, int flag)
1633 if (!type) 1679 if (!type)
1634 return -EINVAL; 1680 return -EINVAL;
1635 1681
1636 down_write(&namespace_sem); 1682 namespace_lock();
1637 if (type == MS_SHARED) { 1683 if (type == MS_SHARED) {
1638 err = invent_group_ids(mnt, recurse); 1684 err = invent_group_ids(mnt, recurse);
1639 if (err) 1685 if (err)
@@ -1646,7 +1692,7 @@ static int do_change_type(struct path *path, int flag)
1646 br_write_unlock(&vfsmount_lock); 1692 br_write_unlock(&vfsmount_lock);
1647 1693
1648 out_unlock: 1694 out_unlock:
1649 up_write(&namespace_sem); 1695 namespace_unlock();
1650 return err; 1696 return err;
1651} 1697}
1652 1698
@@ -1656,9 +1702,9 @@ static int do_change_type(struct path *path, int flag)
1656static int do_loopback(struct path *path, const char *old_name, 1702static int do_loopback(struct path *path, const char *old_name,
1657 int recurse) 1703 int recurse)
1658{ 1704{
1659 LIST_HEAD(umount_list);
1660 struct path old_path; 1705 struct path old_path;
1661 struct mount *mnt = NULL, *old; 1706 struct mount *mnt = NULL, *old, *parent;
1707 struct mountpoint *mp;
1662 int err; 1708 int err;
1663 if (!old_name || !*old_name) 1709 if (!old_name || !*old_name)
1664 return -EINVAL; 1710 return -EINVAL;
@@ -1670,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name,
1670 if (mnt_ns_loop(&old_path)) 1716 if (mnt_ns_loop(&old_path))
1671 goto out; 1717 goto out;
1672 1718
1673 err = lock_mount(path); 1719 mp = lock_mount(path);
1674 if (err) 1720 err = PTR_ERR(mp);
1721 if (IS_ERR(mp))
1675 goto out; 1722 goto out;
1676 1723
1677 old = real_mount(old_path.mnt); 1724 old = real_mount(old_path.mnt);
1725 parent = real_mount(path->mnt);
1678 1726
1679 err = -EINVAL; 1727 err = -EINVAL;
1680 if (IS_MNT_UNBINDABLE(old)) 1728 if (IS_MNT_UNBINDABLE(old))
1681 goto out2; 1729 goto out2;
1682 1730
1683 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) 1731 if (!check_mnt(parent) || !check_mnt(old))
1684 goto out2; 1732 goto out2;
1685 1733
1686 if (recurse) 1734 if (recurse)
@@ -1693,15 +1741,14 @@ static int do_loopback(struct path *path, const char *old_name,
1693 goto out2; 1741 goto out2;
1694 } 1742 }
1695 1743
1696 err = graft_tree(mnt, path); 1744 err = graft_tree(mnt, parent, mp);
1697 if (err) { 1745 if (err) {
1698 br_write_lock(&vfsmount_lock); 1746 br_write_lock(&vfsmount_lock);
1699 umount_tree(mnt, 0, &umount_list); 1747 umount_tree(mnt, 0);
1700 br_write_unlock(&vfsmount_lock); 1748 br_write_unlock(&vfsmount_lock);
1701 } 1749 }
1702out2: 1750out2:
1703 unlock_mount(path); 1751 unlock_mount(mp);
1704 release_mounts(&umount_list);
1705out: 1752out:
1706 path_put(&old_path); 1753 path_put(&old_path);
1707 return err; 1754 return err;
@@ -1786,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1786 struct path old_path, parent_path; 1833 struct path old_path, parent_path;
1787 struct mount *p; 1834 struct mount *p;
1788 struct mount *old; 1835 struct mount *old;
1836 struct mountpoint *mp;
1789 int err; 1837 int err;
1790 if (!old_name || !*old_name) 1838 if (!old_name || !*old_name)
1791 return -EINVAL; 1839 return -EINVAL;
@@ -1793,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name)
1793 if (err) 1841 if (err)
1794 return err; 1842 return err;
1795 1843
1796 err = lock_mount(path); 1844 mp = lock_mount(path);
1797 if (err < 0) 1845 err = PTR_ERR(mp);
1846 if (IS_ERR(mp))
1798 goto out; 1847 goto out;
1799 1848
1800 old = real_mount(old_path.mnt); 1849 old = real_mount(old_path.mnt);
@@ -1804,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name)
1804 if (!check_mnt(p) || !check_mnt(old)) 1853 if (!check_mnt(p) || !check_mnt(old))
1805 goto out1; 1854 goto out1;
1806 1855
1807 if (d_unlinked(path->dentry))
1808 goto out1;
1809
1810 err = -EINVAL; 1856 err = -EINVAL;
1811 if (old_path.dentry != old_path.mnt->mnt_root) 1857 if (old_path.dentry != old_path.mnt->mnt_root)
1812 goto out1; 1858 goto out1;
@@ -1833,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1833 if (p == old) 1879 if (p == old)
1834 goto out1; 1880 goto out1;
1835 1881
1836 err = attach_recursive_mnt(old, path, &parent_path); 1882 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
1837 if (err) 1883 if (err)
1838 goto out1; 1884 goto out1;
1839 1885
@@ -1841,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1841 * automatically */ 1887 * automatically */
1842 list_del_init(&old->mnt_expire); 1888 list_del_init(&old->mnt_expire);
1843out1: 1889out1:
1844 unlock_mount(path); 1890 unlock_mount(mp);
1845out: 1891out:
1846 if (!err) 1892 if (!err)
1847 path_put(&parent_path); 1893 path_put(&parent_path);
@@ -1877,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1877 */ 1923 */
1878static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) 1924static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1879{ 1925{
1926 struct mountpoint *mp;
1927 struct mount *parent;
1880 int err; 1928 int err;
1881 1929
1882 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); 1930 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1883 1931
1884 err = lock_mount(path); 1932 mp = lock_mount(path);
1885 if (err) 1933 if (IS_ERR(mp))
1886 return err; 1934 return PTR_ERR(mp);
1887 1935
1936 parent = real_mount(path->mnt);
1888 err = -EINVAL; 1937 err = -EINVAL;
1889 if (unlikely(!check_mnt(real_mount(path->mnt)))) { 1938 if (unlikely(!check_mnt(parent))) {
1890 /* that's acceptable only for automounts done in private ns */ 1939 /* that's acceptable only for automounts done in private ns */
1891 if (!(mnt_flags & MNT_SHRINKABLE)) 1940 if (!(mnt_flags & MNT_SHRINKABLE))
1892 goto unlock; 1941 goto unlock;
1893 /* ... and for those we'd better have mountpoint still alive */ 1942 /* ... and for those we'd better have mountpoint still alive */
1894 if (!real_mount(path->mnt)->mnt_ns) 1943 if (!parent->mnt_ns)
1895 goto unlock; 1944 goto unlock;
1896 } 1945 }
1897 1946
@@ -1906,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1906 goto unlock; 1955 goto unlock;
1907 1956
1908 newmnt->mnt.mnt_flags = mnt_flags; 1957 newmnt->mnt.mnt_flags = mnt_flags;
1909 err = graft_tree(newmnt, path); 1958 err = graft_tree(newmnt, parent, mp);
1910 1959
1911unlock: 1960unlock:
1912 unlock_mount(path); 1961 unlock_mount(mp);
1913 return err; 1962 return err;
1914} 1963}
1915 1964
@@ -1982,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path)
1982fail: 2031fail:
1983 /* remove m from any expiration list it may be on */ 2032 /* remove m from any expiration list it may be on */
1984 if (!list_empty(&mnt->mnt_expire)) { 2033 if (!list_empty(&mnt->mnt_expire)) {
1985 down_write(&namespace_sem); 2034 namespace_lock();
1986 br_write_lock(&vfsmount_lock); 2035 br_write_lock(&vfsmount_lock);
1987 list_del_init(&mnt->mnt_expire); 2036 list_del_init(&mnt->mnt_expire);
1988 br_write_unlock(&vfsmount_lock); 2037 br_write_unlock(&vfsmount_lock);
1989 up_write(&namespace_sem); 2038 namespace_unlock();
1990 } 2039 }
1991 mntput(m); 2040 mntput(m);
1992 mntput(m); 2041 mntput(m);
@@ -2000,13 +2049,13 @@ fail:
2000 */ 2049 */
2001void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 2050void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2002{ 2051{
2003 down_write(&namespace_sem); 2052 namespace_lock();
2004 br_write_lock(&vfsmount_lock); 2053 br_write_lock(&vfsmount_lock);
2005 2054
2006 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 2055 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2007 2056
2008 br_write_unlock(&vfsmount_lock); 2057 br_write_unlock(&vfsmount_lock);
2009 up_write(&namespace_sem); 2058 namespace_unlock();
2010} 2059}
2011EXPORT_SYMBOL(mnt_set_expiry); 2060EXPORT_SYMBOL(mnt_set_expiry);
2012 2061
@@ -2019,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2019{ 2068{
2020 struct mount *mnt, *next; 2069 struct mount *mnt, *next;
2021 LIST_HEAD(graveyard); 2070 LIST_HEAD(graveyard);
2022 LIST_HEAD(umounts);
2023 2071
2024 if (list_empty(mounts)) 2072 if (list_empty(mounts))
2025 return; 2073 return;
2026 2074
2027 down_write(&namespace_sem); 2075 namespace_lock();
2028 br_write_lock(&vfsmount_lock); 2076 br_write_lock(&vfsmount_lock);
2029 2077
2030 /* extract from the expiration list every vfsmount that matches the 2078 /* extract from the expiration list every vfsmount that matches the
@@ -2042,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2042 while (!list_empty(&graveyard)) { 2090 while (!list_empty(&graveyard)) {
2043 mnt = list_first_entry(&graveyard, struct mount, mnt_expire); 2091 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2044 touch_mnt_namespace(mnt->mnt_ns); 2092 touch_mnt_namespace(mnt->mnt_ns);
2045 umount_tree(mnt, 1, &umounts); 2093 umount_tree(mnt, 1);
2046 } 2094 }
2047 br_write_unlock(&vfsmount_lock); 2095 br_write_unlock(&vfsmount_lock);
2048 up_write(&namespace_sem); 2096 namespace_unlock();
2049
2050 release_mounts(&umounts);
2051} 2097}
2052 2098
2053EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 2099EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -2104,7 +2150,7 @@ resume:
2104 * 2150 *
2105 * vfsmount_lock must be held for write 2151 * vfsmount_lock must be held for write
2106 */ 2152 */
2107static void shrink_submounts(struct mount *mnt, struct list_head *umounts) 2153static void shrink_submounts(struct mount *mnt)
2108{ 2154{
2109 LIST_HEAD(graveyard); 2155 LIST_HEAD(graveyard);
2110 struct mount *m; 2156 struct mount *m;
@@ -2115,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
2115 m = list_first_entry(&graveyard, struct mount, 2161 m = list_first_entry(&graveyard, struct mount,
2116 mnt_expire); 2162 mnt_expire);
2117 touch_mnt_namespace(m->mnt_ns); 2163 touch_mnt_namespace(m->mnt_ns);
2118 umount_tree(m, 1, umounts); 2164 umount_tree(m, 1);
2119 } 2165 }
2120 } 2166 }
2121} 2167}
@@ -2342,14 +2388,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2342 if (IS_ERR(new_ns)) 2388 if (IS_ERR(new_ns))
2343 return new_ns; 2389 return new_ns;
2344 2390
2345 down_write(&namespace_sem); 2391 namespace_lock();
2346 /* First pass: copy the tree topology */ 2392 /* First pass: copy the tree topology */
2347 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2393 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2348 if (user_ns != mnt_ns->user_ns) 2394 if (user_ns != mnt_ns->user_ns)
2349 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2395 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2350 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2396 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2351 if (IS_ERR(new)) { 2397 if (IS_ERR(new)) {
2352 up_write(&namespace_sem); 2398 namespace_unlock();
2353 free_mnt_ns(new_ns); 2399 free_mnt_ns(new_ns);
2354 return ERR_CAST(new); 2400 return ERR_CAST(new);
2355 } 2401 }
@@ -2380,7 +2426,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2380 p = next_mnt(p, old); 2426 p = next_mnt(p, old);
2381 q = next_mnt(q, new); 2427 q = next_mnt(q, new);
2382 } 2428 }
2383 up_write(&namespace_sem); 2429 namespace_unlock();
2384 2430
2385 if (rootmnt) 2431 if (rootmnt)
2386 mntput(rootmnt); 2432 mntput(rootmnt);
@@ -2550,7 +2596,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2550 const char __user *, put_old) 2596 const char __user *, put_old)
2551{ 2597{
2552 struct path new, old, parent_path, root_parent, root; 2598 struct path new, old, parent_path, root_parent, root;
2553 struct mount *new_mnt, *root_mnt; 2599 struct mount *new_mnt, *root_mnt, *old_mnt;
2600 struct mountpoint *old_mp, *root_mp;
2554 int error; 2601 int error;
2555 2602
2556 if (!may_mount()) 2603 if (!may_mount())
@@ -2569,14 +2616,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2569 goto out2; 2616 goto out2;
2570 2617
2571 get_fs_root(current->fs, &root); 2618 get_fs_root(current->fs, &root);
2572 error = lock_mount(&old); 2619 old_mp = lock_mount(&old);
2573 if (error) 2620 error = PTR_ERR(old_mp);
2621 if (IS_ERR(old_mp))
2574 goto out3; 2622 goto out3;
2575 2623
2576 error = -EINVAL; 2624 error = -EINVAL;
2577 new_mnt = real_mount(new.mnt); 2625 new_mnt = real_mount(new.mnt);
2578 root_mnt = real_mount(root.mnt); 2626 root_mnt = real_mount(root.mnt);
2579 if (IS_MNT_SHARED(real_mount(old.mnt)) || 2627 old_mnt = real_mount(old.mnt);
2628 if (IS_MNT_SHARED(old_mnt) ||
2580 IS_MNT_SHARED(new_mnt->mnt_parent) || 2629 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2581 IS_MNT_SHARED(root_mnt->mnt_parent)) 2630 IS_MNT_SHARED(root_mnt->mnt_parent))
2582 goto out4; 2631 goto out4;
@@ -2585,37 +2634,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2585 error = -ENOENT; 2634 error = -ENOENT;
2586 if (d_unlinked(new.dentry)) 2635 if (d_unlinked(new.dentry))
2587 goto out4; 2636 goto out4;
2588 if (d_unlinked(old.dentry))
2589 goto out4;
2590 error = -EBUSY; 2637 error = -EBUSY;
2591 if (new.mnt == root.mnt || 2638 if (new_mnt == root_mnt || old_mnt == root_mnt)
2592 old.mnt == root.mnt)
2593 goto out4; /* loop, on the same file system */ 2639 goto out4; /* loop, on the same file system */
2594 error = -EINVAL; 2640 error = -EINVAL;
2595 if (root.mnt->mnt_root != root.dentry) 2641 if (root.mnt->mnt_root != root.dentry)
2596 goto out4; /* not a mountpoint */ 2642 goto out4; /* not a mountpoint */
2597 if (!mnt_has_parent(root_mnt)) 2643 if (!mnt_has_parent(root_mnt))
2598 goto out4; /* not attached */ 2644 goto out4; /* not attached */
2645 root_mp = root_mnt->mnt_mp;
2599 if (new.mnt->mnt_root != new.dentry) 2646 if (new.mnt->mnt_root != new.dentry)
2600 goto out4; /* not a mountpoint */ 2647 goto out4; /* not a mountpoint */
2601 if (!mnt_has_parent(new_mnt)) 2648 if (!mnt_has_parent(new_mnt))
2602 goto out4; /* not attached */ 2649 goto out4; /* not attached */
2603 /* make sure we can reach put_old from new_root */ 2650 /* make sure we can reach put_old from new_root */
2604 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) 2651 if (!is_path_reachable(old_mnt, old.dentry, &new))
2605 goto out4; 2652 goto out4;
2653 root_mp->m_count++; /* pin it so it won't go away */
2606 br_write_lock(&vfsmount_lock); 2654 br_write_lock(&vfsmount_lock);
2607 detach_mnt(new_mnt, &parent_path); 2655 detach_mnt(new_mnt, &parent_path);
2608 detach_mnt(root_mnt, &root_parent); 2656 detach_mnt(root_mnt, &root_parent);
2609 /* mount old root on put_old */ 2657 /* mount old root on put_old */
2610 attach_mnt(root_mnt, &old); 2658 attach_mnt(root_mnt, old_mnt, old_mp);
2611 /* mount new_root on / */ 2659 /* mount new_root on / */
2612 attach_mnt(new_mnt, &root_parent); 2660 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2613 touch_mnt_namespace(current->nsproxy->mnt_ns); 2661 touch_mnt_namespace(current->nsproxy->mnt_ns);
2614 br_write_unlock(&vfsmount_lock); 2662 br_write_unlock(&vfsmount_lock);
2615 chroot_fs_refs(&root, &new); 2663 chroot_fs_refs(&root, &new);
2664 put_mountpoint(root_mp);
2616 error = 0; 2665 error = 0;
2617out4: 2666out4:
2618 unlock_mount(&old); 2667 unlock_mount(old_mp);
2619 if (!error) { 2668 if (!error) {
2620 path_put(&root_parent); 2669 path_put(&root_parent);
2621 path_put(&parent_path); 2670 path_put(&parent_path);
@@ -2670,14 +2719,17 @@ void __init mnt_init(void)
2670 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2719 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2671 2720
2672 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2721 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2722 mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2673 2723
2674 if (!mount_hashtable) 2724 if (!mount_hashtable || !mountpoint_hashtable)
2675 panic("Failed to allocate mount hash table\n"); 2725 panic("Failed to allocate mount hash table\n");
2676 2726
2677 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); 2727 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2678 2728
2679 for (u = 0; u < HASH_SIZE; u++) 2729 for (u = 0; u < HASH_SIZE; u++)
2680 INIT_LIST_HEAD(&mount_hashtable[u]); 2730 INIT_LIST_HEAD(&mount_hashtable[u]);
2731 for (u = 0; u < HASH_SIZE; u++)
2732 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2681 2733
2682 br_lock_init(&vfsmount_lock); 2734 br_lock_init(&vfsmount_lock);
2683 2735
@@ -2694,16 +2746,13 @@ void __init mnt_init(void)
2694 2746
2695void put_mnt_ns(struct mnt_namespace *ns) 2747void put_mnt_ns(struct mnt_namespace *ns)
2696{ 2748{
2697 LIST_HEAD(umount_list);
2698
2699 if (!atomic_dec_and_test(&ns->count)) 2749 if (!atomic_dec_and_test(&ns->count))
2700 return; 2750 return;
2701 down_write(&namespace_sem); 2751 namespace_lock();
2702 br_write_lock(&vfsmount_lock); 2752 br_write_lock(&vfsmount_lock);
2703 umount_tree(ns->root, 0, &umount_list); 2753 umount_tree(ns->root, 0);
2704 br_write_unlock(&vfsmount_lock); 2754 br_write_unlock(&vfsmount_lock);
2705 up_write(&namespace_sem); 2755 namespace_unlock();
2706 release_mounts(&umount_list);
2707 free_mnt_ns(ns); 2756 free_mnt_ns(ns);
2708} 2757}
2709 2758