aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 15:31:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 15:31:40 -0500
commit87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (patch)
treeab2e5331fea9b823cb92719d0954a9141451c931 /fs
parentf045bbb9fa1bf6f507ad4de12d4e3471d8f672f1 (diff)
parentdb86da7cb76f797a1a8b445166a15cb922c6ff85 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace related fixes from Eric Biederman: "As these are bug fixes almost all of thes changes are marked for backporting to stable. The first change (implicitly adding MNT_NODEV on remount) addresses a regression that was created when security issues with unprivileged remount were closed. I go on to update the remount test to make it easy to detect if this issue reoccurs. Then there are a handful of mount and umount related fixes. Then half of the changes deal with the a recently discovered design bug in the permission checks of gid_map. Unix since the beginning has allowed setting group permissions on files to less than the user and other permissions (aka ---rwx---rwx). As the unix permission checks stop as soon as a group matches, and setgroups allows setting groups that can not later be dropped, results in a situtation where it is possible to legitimately use a group to assign fewer privileges to a process. Which means dropping a group can increase a processes privileges. The fix I have adopted is that gid_map is now no longer writable without privilege unless the new file /proc/self/setgroups has been set to permanently disable setgroups. The bulk of user namespace using applications even the applications using applications using user namespaces without privilege remain unaffected by this change. Unfortunately this ix breaks a couple user space applications, that were relying on the problematic behavior (one of which was tools/selftests/mount/unprivileged-remount-test.c). To hopefully prevent needing a regression fix on top of my security fix I rounded folks who work with the container implementations mostly like to be affected and encouraged them to test the changes. > So far nothing broke on my libvirt-lxc test bed. :-) > Tested with openSUSE 13.2 and libvirt 1.2.9. > Tested-by: Richard Weinberger <richard@nod.at> > Tested on Fedora20 with libvirt 1.2.11, works fine. > Tested-by: Chen Hanxiao <chenhanxiao@cn.fujitsu.com> > Ok, thanks - yes, unprivileged lxc is working fine with your kernels. > Just to be sure I was testing the right thing I also tested using > my unprivileged nsexec testcases, and they failed on setgroup/setgid > as now expected, and succeeded there without your patches. > Tested-by: Serge Hallyn <serge.hallyn@ubuntu.com> > I tested this with Sandstorm. It breaks as is and it works if I add > the setgroups thing. > Tested-by: Andy Lutomirski <luto@amacapital.net> # breaks things as designed :(" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: userns: Unbreak the unprivileged remount tests userns; Correct the comment in map_write userns: Allow setting gid_maps without privilege when setgroups is disabled userns: Add a knob to disable setgroups on a per user namespace basis userns: Rename id_map_mutex to userns_state_mutex userns: Only allow the creator of the userns unprivileged mappings userns: Check euid no fsuid when establishing an unprivileged uid mapping userns: Don't allow unprivileged creation of gid mappings userns: Don't allow setgroups until a gid mapping has been setablished userns: Document what the invariant required for safe unprivileged mappings. groups: Consolidate the setgroups permission checks mnt: Clear mnt_expire during pivot_root mnt: Carefully set CL_UNPRIVILEGED in clone_mnt mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. umount: Do not allow unmounting rootfs. umount: Disallow unprivileged mount force mnt: Update unprivileged remount test mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount
Diffstat (limited to 'fs')
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c53
3 files changed, 69 insertions, 3 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7dd807..820af6a1dd6b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
963 } 963 }
964 964
965 /* Don't allow unprivileged users to reveal what is under a mount */ 965 /* Don't allow unprivileged users to reveal what is under a mount */
966 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 966 if ((flag & CL_UNPRIVILEGED) &&
967 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
967 mnt->mnt.mnt_flags |= MNT_LOCKED; 968 mnt->mnt.mnt_flags |= MNT_LOCKED;
968 969
969 atomic_inc(&sb->s_active); 970 atomic_inc(&sb->s_active);
@@ -1544,6 +1545,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1544 goto dput_and_out; 1545 goto dput_and_out;
1545 if (mnt->mnt.mnt_flags & MNT_LOCKED) 1546 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1546 goto dput_and_out; 1547 goto dput_and_out;
1548 retval = -EPERM;
1549 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1550 goto dput_and_out;
1547 1551
1548 retval = do_umount(mnt, flags); 1552 retval = do_umount(mnt, flags);
1549dput_and_out: 1553dput_and_out:
@@ -1606,7 +1610,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1606 if (IS_ERR(q)) 1610 if (IS_ERR(q))
1607 return q; 1611 return q;
1608 1612
1609 q->mnt.mnt_flags &= ~MNT_LOCKED;
1610 q->mnt_mountpoint = mnt->mnt_mountpoint; 1613 q->mnt_mountpoint = mnt->mnt_mountpoint;
1611 1614
1612 p = mnt; 1615 p = mnt;
@@ -2097,7 +2100,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
2097 } 2100 }
2098 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && 2101 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2099 !(mnt_flags & MNT_NODEV)) { 2102 !(mnt_flags & MNT_NODEV)) {
2100 return -EPERM; 2103 /* Was the nodev implicitly added in mount? */
2104 if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2105 !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2106 mnt_flags |= MNT_NODEV;
2107 } else {
2108 return -EPERM;
2109 }
2101 } 2110 }
2102 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && 2111 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2103 !(mnt_flags & MNT_NOSUID)) { 2112 !(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2967,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2958 /* mount new_root on / */ 2967 /* mount new_root on / */
2959 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); 2968 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2960 touch_mnt_namespace(current->nsproxy->mnt_ns); 2969 touch_mnt_namespace(current->nsproxy->mnt_ns);
2970 /* A moved mount should not expire automatically */
2971 list_del_init(&new_mnt->mnt_expire);
2961 unlock_mount_hash(); 2972 unlock_mount_hash();
2962 chroot_fs_refs(&root, &new); 2973 chroot_fs_refs(&root, &new);
2963 put_mountpoint(root_mp); 2974 put_mountpoint(root_mp);
@@ -3002,6 +3013,7 @@ static void __init init_mount_tree(void)
3002 3013
3003 root.mnt = mnt; 3014 root.mnt = mnt;
3004 root.dentry = mnt->mnt_root; 3015 root.dentry = mnt->mnt_root;
3016 mnt->mnt_flags |= MNT_LOCKED;
3005 3017
3006 set_fs_pwd(current->fs, &root); 3018 set_fs_pwd(current->fs, &root);
3007 set_fs_root(current->fs, &root); 3019 set_fs_root(current->fs, &root);
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03b..260ac8f898a4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
242 child = copy_tree(last_source, last_source->mnt.mnt_root, type); 242 child = copy_tree(last_source, last_source->mnt.mnt_root, type);
243 if (IS_ERR(child)) 243 if (IS_ERR(child))
244 return PTR_ERR(child); 244 return PTR_ERR(child);
245 child->mnt.mnt_flags &= ~MNT_LOCKED;
245 mnt_set_mountpoint(m, mp, child); 246 mnt_set_mountpoint(m, mp, child);
246 last_dest = m; 247 last_dest = m;
247 last_source = child; 248 last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af12..3f3d7aeb0712 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
2464 .llseek = seq_lseek, 2464 .llseek = seq_lseek,
2465 .release = proc_id_map_release, 2465 .release = proc_id_map_release,
2466}; 2466};
2467
2468static int proc_setgroups_open(struct inode *inode, struct file *file)
2469{
2470 struct user_namespace *ns = NULL;
2471 struct task_struct *task;
2472 int ret;
2473
2474 ret = -ESRCH;
2475 task = get_proc_task(inode);
2476 if (task) {
2477 rcu_read_lock();
2478 ns = get_user_ns(task_cred_xxx(task, user_ns));
2479 rcu_read_unlock();
2480 put_task_struct(task);
2481 }
2482 if (!ns)
2483 goto err;
2484
2485 if (file->f_mode & FMODE_WRITE) {
2486 ret = -EACCES;
2487 if (!ns_capable(ns, CAP_SYS_ADMIN))
2488 goto err_put_ns;
2489 }
2490
2491 ret = single_open(file, &proc_setgroups_show, ns);
2492 if (ret)
2493 goto err_put_ns;
2494
2495 return 0;
2496err_put_ns:
2497 put_user_ns(ns);
2498err:
2499 return ret;
2500}
2501
2502static int proc_setgroups_release(struct inode *inode, struct file *file)
2503{
2504 struct seq_file *seq = file->private_data;
2505 struct user_namespace *ns = seq->private;
2506 int ret = single_release(inode, file);
2507 put_user_ns(ns);
2508 return ret;
2509}
2510
2511static const struct file_operations proc_setgroups_operations = {
2512 .open = proc_setgroups_open,
2513 .write = proc_setgroups_write,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
2516 .release = proc_setgroups_release,
2517};
2467#endif /* CONFIG_USER_NS */ 2518#endif /* CONFIG_USER_NS */
2468 2519
2469static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2520static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2572 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2623 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2573 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2624 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2574 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2625 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2626 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2575#endif 2627#endif
2576#ifdef CONFIG_CHECKPOINT_RESTORE 2628#ifdef CONFIG_CHECKPOINT_RESTORE
2577 REG("timers", S_IRUGO, proc_timers_operations), 2629 REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
2916 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2968 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2917 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2969 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2918 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2970 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2971 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2919#endif 2972#endif
2920}; 2973};
2921 2974