aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-07 17:35:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-07 17:35:32 -0400
commitc7c4591db64dbd1e504bc4e2806d7ef290a3c81b (patch)
treea2fb124f9760eec668d20541383e762822d7cc7b /fs/namespace.c
parent11c7b03d42a847db90862d0f9d8be6ce9b2f0553 (diff)
parentc7b96acf1456ef127fef461fcfedb54b81fecfbb (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull namespace changes from Eric Biederman: "This is an assorted mishmash of small cleanups, enhancements and bug fixes. The major theme is user namespace mount restrictions. nsown_capable is killed as it encourages not thinking about details that need to be considered. A very hard to hit pid namespace exiting bug was finally tracked and fixed. A couple of cleanups to the basic namespace infrastructure. Finally there is an enhancement that makes per user namespace capabilities usable as capabilities, and an enhancement that allows the per userns root to nice other processes in the user namespace" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: userns: Kill nsown_capable it makes the wrong thing easy capabilities: allow nice if we are privileged pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD userns: Allow PR_CAPBSET_DROP in a user namespace. namespaces: Simplify copy_namespaces so it is clear what is going on. pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup sysfs: Restrict mounting sysfs userns: Better restrictions on when proc and sysfs can be mounted vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces kernel/nsproxy.c: Improving a snippet of code. proc: Restrict mounting the proc filesystem vfs: Lock in place mounts from more privileged users
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c121
1 files changed, 95 insertions, 26 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index ad8ea9bc2518..ef69fa5d2e5b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
831 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 831 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
832 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 832 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
833 833
834 /* Don't allow unprivileged users to reveal what is under a mount */
835 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
836 mnt->mnt.mnt_flags |= MNT_LOCKED;
837
834 atomic_inc(&sb->s_active); 838 atomic_inc(&sb->s_active);
835 mnt->mnt.mnt_sb = sb; 839 mnt->mnt.mnt_sb = sb;
836 mnt->mnt.mnt_root = dget(root); 840 mnt->mnt.mnt_root = dget(root);
@@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1327 goto dput_and_out; 1331 goto dput_and_out;
1328 if (!check_mnt(mnt)) 1332 if (!check_mnt(mnt))
1329 goto dput_and_out; 1333 goto dput_and_out;
1334 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1335 goto dput_and_out;
1330 1336
1331 retval = do_umount(mnt, flags); 1337 retval = do_umount(mnt, flags);
1332dput_and_out: 1338dput_and_out:
@@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1349 1355
1350#endif 1356#endif
1351 1357
1352static bool mnt_ns_loop(struct path *path) 1358static bool is_mnt_ns_file(struct dentry *dentry)
1353{ 1359{
1354 /* Could bind mounting the mount namespace inode cause a 1360 /* Is this a proxy for a mount namespace? */
1355 * mount namespace loop? 1361 struct inode *inode = dentry->d_inode;
1356 */
1357 struct inode *inode = path->dentry->d_inode;
1358 struct proc_ns *ei; 1362 struct proc_ns *ei;
1359 struct mnt_namespace *mnt_ns;
1360 1363
1361 if (!proc_ns_inode(inode)) 1364 if (!proc_ns_inode(inode))
1362 return false; 1365 return false;
@@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path)
1365 if (ei->ns_ops != &mntns_operations) 1368 if (ei->ns_ops != &mntns_operations)
1366 return false; 1369 return false;
1367 1370
1368 mnt_ns = ei->ns; 1371 return true;
1372}
1373
1374static bool mnt_ns_loop(struct dentry *dentry)
1375{
1376 /* Could bind mounting the mount namespace inode cause a
1377 * mount namespace loop?
1378 */
1379 struct mnt_namespace *mnt_ns;
1380 if (!is_mnt_ns_file(dentry))
1381 return false;
1382
1383 mnt_ns = get_proc_ns(dentry->d_inode)->ns;
1369 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 1384 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1370} 1385}
1371 1386
@@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1374{ 1389{
1375 struct mount *res, *p, *q, *r, *parent; 1390 struct mount *res, *p, *q, *r, *parent;
1376 1391
1377 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1392 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1393 return ERR_PTR(-EINVAL);
1394
1395 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1378 return ERR_PTR(-EINVAL); 1396 return ERR_PTR(-EINVAL);
1379 1397
1380 res = q = clone_mnt(mnt, dentry, flag); 1398 res = q = clone_mnt(mnt, dentry, flag);
1381 if (IS_ERR(q)) 1399 if (IS_ERR(q))
1382 return q; 1400 return q;
1383 1401
1402 q->mnt.mnt_flags &= ~MNT_LOCKED;
1384 q->mnt_mountpoint = mnt->mnt_mountpoint; 1403 q->mnt_mountpoint = mnt->mnt_mountpoint;
1385 1404
1386 p = mnt; 1405 p = mnt;
@@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1390 continue; 1409 continue;
1391 1410
1392 for (s = r; s; s = next_mnt(s, r)) { 1411 for (s = r; s; s = next_mnt(s, r)) {
1393 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) { 1412 if (!(flag & CL_COPY_UNBINDABLE) &&
1413 IS_MNT_UNBINDABLE(s)) {
1414 s = skip_mnt_tree(s);
1415 continue;
1416 }
1417 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1418 is_mnt_ns_file(s->mnt.mnt_root)) {
1394 s = skip_mnt_tree(s); 1419 s = skip_mnt_tree(s);
1395 continue; 1420 continue;
1396 } 1421 }
@@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag)
1696 return err; 1721 return err;
1697} 1722}
1698 1723
1724static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1725{
1726 struct mount *child;
1727 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1728 if (!is_subdir(child->mnt_mountpoint, dentry))
1729 continue;
1730
1731 if (child->mnt.mnt_flags & MNT_LOCKED)
1732 return true;
1733 }
1734 return false;
1735}
1736
1699/* 1737/*
1700 * do loopback mount. 1738 * do loopback mount.
1701 */ 1739 */
@@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name,
1713 return err; 1751 return err;
1714 1752
1715 err = -EINVAL; 1753 err = -EINVAL;
1716 if (mnt_ns_loop(&old_path)) 1754 if (mnt_ns_loop(old_path.dentry))
1717 goto out; 1755 goto out;
1718 1756
1719 mp = lock_mount(path); 1757 mp = lock_mount(path);
@@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name,
1731 if (!check_mnt(parent) || !check_mnt(old)) 1769 if (!check_mnt(parent) || !check_mnt(old))
1732 goto out2; 1770 goto out2;
1733 1771
1772 if (!recurse && has_locked_children(old, old_path.dentry))
1773 goto out2;
1774
1734 if (recurse) 1775 if (recurse)
1735 mnt = copy_tree(old, old_path.dentry, 0); 1776 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
1736 else 1777 else
1737 mnt = clone_mnt(old, old_path.dentry, 0); 1778 mnt = clone_mnt(old, old_path.dentry, 0);
1738 1779
@@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name,
1741 goto out2; 1782 goto out2;
1742 } 1783 }
1743 1784
1785 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
1786
1744 err = graft_tree(mnt, parent, mp); 1787 err = graft_tree(mnt, parent, mp);
1745 if (err) { 1788 if (err) {
1746 br_write_lock(&vfsmount_lock); 1789 br_write_lock(&vfsmount_lock);
@@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name)
1853 if (!check_mnt(p) || !check_mnt(old)) 1896 if (!check_mnt(p) || !check_mnt(old))
1854 goto out1; 1897 goto out1;
1855 1898
1899 if (old->mnt.mnt_flags & MNT_LOCKED)
1900 goto out1;
1901
1856 err = -EINVAL; 1902 err = -EINVAL;
1857 if (old_path.dentry != old_path.mnt->mnt_root) 1903 if (old_path.dentry != old_path.mnt->mnt_root)
1858 goto out1; 1904 goto out1;
@@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2389 2435
2390 namespace_lock(); 2436 namespace_lock();
2391 /* First pass: copy the tree topology */ 2437 /* First pass: copy the tree topology */
2392 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2438 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2393 if (user_ns != mnt_ns->user_ns) 2439 if (user_ns != mnt_ns->user_ns)
2394 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2440 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2395 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2441 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
@@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2424 } 2470 }
2425 p = next_mnt(p, old); 2471 p = next_mnt(p, old);
2426 q = next_mnt(q, new); 2472 q = next_mnt(q, new);
2473 if (!q)
2474 break;
2475 while (p->mnt.mnt_root != q->mnt.mnt_root)
2476 p = next_mnt(p, old);
2427 } 2477 }
2428 namespace_unlock(); 2478 namespace_unlock();
2429 2479
@@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2630 goto out4; 2680 goto out4;
2631 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 2681 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2632 goto out4; 2682 goto out4;
2683 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
2684 goto out4;
2633 error = -ENOENT; 2685 error = -ENOENT;
2634 if (d_unlinked(new.dentry)) 2686 if (d_unlinked(new.dentry))
2635 goto out4; 2687 goto out4;
@@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2653 br_write_lock(&vfsmount_lock); 2705 br_write_lock(&vfsmount_lock);
2654 detach_mnt(new_mnt, &parent_path); 2706 detach_mnt(new_mnt, &parent_path);
2655 detach_mnt(root_mnt, &root_parent); 2707 detach_mnt(root_mnt, &root_parent);
2708 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
2709 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
2710 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2711 }
2656 /* mount old root on put_old */ 2712 /* mount old root on put_old */
2657 attach_mnt(root_mnt, old_mnt, old_mp); 2713 attach_mnt(root_mnt, old_mnt, old_mp);
2658 /* mount new_root on / */ 2714 /* mount new_root on / */
@@ -2811,25 +2867,38 @@ bool current_chrooted(void)
2811 return chrooted; 2867 return chrooted;
2812} 2868}
2813 2869
2814void update_mnt_policy(struct user_namespace *userns) 2870bool fs_fully_visible(struct file_system_type *type)
2815{ 2871{
2816 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 2872 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2817 struct mount *mnt; 2873 struct mount *mnt;
2874 bool visible = false;
2818 2875
2819 down_read(&namespace_sem); 2876 if (unlikely(!ns))
2877 return false;
2878
2879 namespace_lock();
2820 list_for_each_entry(mnt, &ns->list, mnt_list) { 2880 list_for_each_entry(mnt, &ns->list, mnt_list) {
2821 switch (mnt->mnt.mnt_sb->s_magic) { 2881 struct mount *child;
2822 case SYSFS_MAGIC: 2882 if (mnt->mnt.mnt_sb->s_type != type)
2823 userns->may_mount_sysfs = true; 2883 continue;
2824 break; 2884
2825 case PROC_SUPER_MAGIC: 2885 /* This mount is not fully visible if there are any child mounts
2826 userns->may_mount_proc = true; 2886 * that cover anything except for empty directories.
2827 break; 2887 */
2888 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2889 struct inode *inode = child->mnt_mountpoint->d_inode;
2890 if (!S_ISDIR(inode->i_mode))
2891 goto next;
2892 if (inode->i_nlink != 2)
2893 goto next;
2828 } 2894 }
2829 if (userns->may_mount_sysfs && userns->may_mount_proc) 2895 visible = true;
2830 break; 2896 goto found;
2897 next: ;
2831 } 2898 }
2832 up_read(&namespace_sem); 2899found:
2900 namespace_unlock();
2901 return visible;
2833} 2902}
2834 2903
2835static void *mntns_get(struct task_struct *task) 2904static void *mntns_get(struct task_struct *task)
@@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
2860 struct path root; 2929 struct path root;
2861 2930
2862 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || 2931 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2863 !nsown_capable(CAP_SYS_CHROOT) || 2932 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
2864 !nsown_capable(CAP_SYS_ADMIN)) 2933 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2865 return -EPERM; 2934 return -EPERM;
2866 2935
2867 if (fs->users != 1) 2936 if (fs->users != 1)