aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/s390/kernel/compat_linux.c2
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c53
-rw-r--r--include/linux/cred.h1
-rw-r--r--include/linux/user_namespace.h12
-rw-r--r--kernel/groups.c11
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/user_namespace.c124
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c204
11 files changed, 374 insertions, 55 deletions
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139423ae..437e61159279 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
249 struct group_info *group_info; 249 struct group_info *group_info;
250 int retval; 250 int retval;
251 251
252 if (!capable(CAP_SETGID)) 252 if (!may_setgroups())
253 return -EPERM; 253 return -EPERM;
254 if ((unsigned)gidsetsize > NGROUPS_MAX) 254 if ((unsigned)gidsetsize > NGROUPS_MAX)
255 return -EINVAL; 255 return -EINVAL;
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7dd807..820af6a1dd6b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
963 } 963 }
964 964
965 /* Don't allow unprivileged users to reveal what is under a mount */ 965 /* Don't allow unprivileged users to reveal what is under a mount */
966 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 966 if ((flag & CL_UNPRIVILEGED) &&
967 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
967 mnt->mnt.mnt_flags |= MNT_LOCKED; 968 mnt->mnt.mnt_flags |= MNT_LOCKED;
968 969
969 atomic_inc(&sb->s_active); 970 atomic_inc(&sb->s_active);
@@ -1544,6 +1545,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1544 goto dput_and_out; 1545 goto dput_and_out;
1545 if (mnt->mnt.mnt_flags & MNT_LOCKED) 1546 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1546 goto dput_and_out; 1547 goto dput_and_out;
1548 retval = -EPERM;
1549 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1550 goto dput_and_out;
1547 1551
1548 retval = do_umount(mnt, flags); 1552 retval = do_umount(mnt, flags);
1549dput_and_out: 1553dput_and_out:
@@ -1606,7 +1610,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1606 if (IS_ERR(q)) 1610 if (IS_ERR(q))
1607 return q; 1611 return q;
1608 1612
1609 q->mnt.mnt_flags &= ~MNT_LOCKED;
1610 q->mnt_mountpoint = mnt->mnt_mountpoint; 1613 q->mnt_mountpoint = mnt->mnt_mountpoint;
1611 1614
1612 p = mnt; 1615 p = mnt;
@@ -2097,7 +2100,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
2097 } 2100 }
2098 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && 2101 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2099 !(mnt_flags & MNT_NODEV)) { 2102 !(mnt_flags & MNT_NODEV)) {
2100 return -EPERM; 2103 /* Was the nodev implicitly added in mount? */
2104 if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2105 !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2106 mnt_flags |= MNT_NODEV;
2107 } else {
2108 return -EPERM;
2109 }
2101 } 2110 }
2102 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && 2111 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2103 !(mnt_flags & MNT_NOSUID)) { 2112 !(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2967,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2958 /* mount new_root on / */ 2967 /* mount new_root on / */
2959 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); 2968 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2960 touch_mnt_namespace(current->nsproxy->mnt_ns); 2969 touch_mnt_namespace(current->nsproxy->mnt_ns);
2970 /* A moved mount should not expire automatically */
2971 list_del_init(&new_mnt->mnt_expire);
2961 unlock_mount_hash(); 2972 unlock_mount_hash();
2962 chroot_fs_refs(&root, &new); 2973 chroot_fs_refs(&root, &new);
2963 put_mountpoint(root_mp); 2974 put_mountpoint(root_mp);
@@ -3002,6 +3013,7 @@ static void __init init_mount_tree(void)
3002 3013
3003 root.mnt = mnt; 3014 root.mnt = mnt;
3004 root.dentry = mnt->mnt_root; 3015 root.dentry = mnt->mnt_root;
3016 mnt->mnt_flags |= MNT_LOCKED;
3005 3017
3006 set_fs_pwd(current->fs, &root); 3018 set_fs_pwd(current->fs, &root);
3007 set_fs_root(current->fs, &root); 3019 set_fs_root(current->fs, &root);
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03b..260ac8f898a4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
242 child = copy_tree(last_source, last_source->mnt.mnt_root, type); 242 child = copy_tree(last_source, last_source->mnt.mnt_root, type);
243 if (IS_ERR(child)) 243 if (IS_ERR(child))
244 return PTR_ERR(child); 244 return PTR_ERR(child);
245 child->mnt.mnt_flags &= ~MNT_LOCKED;
245 mnt_set_mountpoint(m, mp, child); 246 mnt_set_mountpoint(m, mp, child);
246 last_dest = m; 247 last_dest = m;
247 last_source = child; 248 last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af12..3f3d7aeb0712 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
2464 .llseek = seq_lseek, 2464 .llseek = seq_lseek,
2465 .release = proc_id_map_release, 2465 .release = proc_id_map_release,
2466}; 2466};
2467
2468static int proc_setgroups_open(struct inode *inode, struct file *file)
2469{
2470 struct user_namespace *ns = NULL;
2471 struct task_struct *task;
2472 int ret;
2473
2474 ret = -ESRCH;
2475 task = get_proc_task(inode);
2476 if (task) {
2477 rcu_read_lock();
2478 ns = get_user_ns(task_cred_xxx(task, user_ns));
2479 rcu_read_unlock();
2480 put_task_struct(task);
2481 }
2482 if (!ns)
2483 goto err;
2484
2485 if (file->f_mode & FMODE_WRITE) {
2486 ret = -EACCES;
2487 if (!ns_capable(ns, CAP_SYS_ADMIN))
2488 goto err_put_ns;
2489 }
2490
2491 ret = single_open(file, &proc_setgroups_show, ns);
2492 if (ret)
2493 goto err_put_ns;
2494
2495 return 0;
2496err_put_ns:
2497 put_user_ns(ns);
2498err:
2499 return ret;
2500}
2501
2502static int proc_setgroups_release(struct inode *inode, struct file *file)
2503{
2504 struct seq_file *seq = file->private_data;
2505 struct user_namespace *ns = seq->private;
2506 int ret = single_release(inode, file);
2507 put_user_ns(ns);
2508 return ret;
2509}
2510
2511static const struct file_operations proc_setgroups_operations = {
2512 .open = proc_setgroups_open,
2513 .write = proc_setgroups_write,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
2516 .release = proc_setgroups_release,
2517};
2467#endif /* CONFIG_USER_NS */ 2518#endif /* CONFIG_USER_NS */
2468 2519
2469static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2520static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2572 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2623 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2573 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2624 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2574 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2625 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2626 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2575#endif 2627#endif
2576#ifdef CONFIG_CHECKPOINT_RESTORE 2628#ifdef CONFIG_CHECKPOINT_RESTORE
2577 REG("timers", S_IRUGO, proc_timers_operations), 2629 REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
2916 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2968 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2917 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2969 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2918 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2970 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2971 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2919#endif 2972#endif
2920}; 2973};
2921 2974
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820837c4..2fb2ca2127ed 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
68extern int set_current_groups(struct group_info *); 68extern int set_current_groups(struct group_info *);
69extern void set_groups(struct cred *, struct group_info *); 69extern void set_groups(struct cred *, struct group_info *);
70extern int groups_search(const struct group_info *, kgid_t); 70extern int groups_search(const struct group_info *, kgid_t);
71extern bool may_setgroups(void);
71 72
72/* access the groups "array" with this macro */ 73/* access the groups "array" with this macro */
73#define GROUP_AT(gi, i) \ 74#define GROUP_AT(gi, i) \
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4cf06c140e21..8297e5b341d8 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -18,6 +18,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
18 } extent[UID_GID_MAP_MAX_EXTENTS]; 18 } extent[UID_GID_MAP_MAX_EXTENTS];
19}; 19};
20 20
21#define USERNS_SETGROUPS_ALLOWED 1UL
22
23#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
24
21struct user_namespace { 25struct user_namespace {
22 struct uid_gid_map uid_map; 26 struct uid_gid_map uid_map;
23 struct uid_gid_map gid_map; 27 struct uid_gid_map gid_map;
@@ -28,6 +32,7 @@ struct user_namespace {
28 kuid_t owner; 32 kuid_t owner;
29 kgid_t group; 33 kgid_t group;
30 struct ns_common ns; 34 struct ns_common ns;
35 unsigned long flags;
31 36
32 /* Register of per-UID persistent keyrings for this namespace */ 37 /* Register of per-UID persistent keyrings for this namespace */
33#ifdef CONFIG_PERSISTENT_KEYRINGS 38#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations;
64extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); 69extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
65extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); 70extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
66extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); 71extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
72extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
73extern int proc_setgroups_show(struct seq_file *m, void *v);
74extern bool userns_may_setgroups(const struct user_namespace *ns);
67#else 75#else
68 76
69static inline struct user_namespace *get_user_ns(struct user_namespace *ns) 77static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns)
88{ 96{
89} 97}
90 98
99static inline bool userns_may_setgroups(const struct user_namespace *ns)
100{
101 return true;
102}
91#endif 103#endif
92 104
93#endif /* _LINUX_USER_H */ 105#endif /* _LINUX_USER_H */
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cfa..664411f171b5 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/syscalls.h> 8#include <linux/syscalls.h>
9#include <linux/user_namespace.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10 11
11/* init to 2 - one for init_task, one to ensure it is never freed */ 12/* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ out:
213 return i; 214 return i;
214} 215}
215 216
217bool may_setgroups(void)
218{
219 struct user_namespace *user_ns = current_user_ns();
220
221 return ns_capable(user_ns, CAP_SETGID) &&
222 userns_may_setgroups(user_ns);
223}
224
216/* 225/*
217 * SMP: Our groups are copy-on-write. We can set them safely 226 * SMP: Our groups are copy-on-write. We can set them safely
218 * without another task interfering. 227 * without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
223 struct group_info *group_info; 232 struct group_info *group_info;
224 int retval; 233 int retval;
225 234
226 if (!ns_capable(current_user_ns(), CAP_SETGID)) 235 if (!may_setgroups())
227 return -EPERM; 236 return -EPERM;
228 if ((unsigned)gidsetsize > NGROUPS_MAX) 237 if ((unsigned)gidsetsize > NGROUPS_MAX)
229 return -EINVAL; 238 return -EINVAL;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbceff..d58cc4d8f0d1 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
176 struct group_info *group_info; 176 struct group_info *group_info;
177 int retval; 177 int retval;
178 178
179 if (!ns_capable(current_user_ns(), CAP_SETGID)) 179 if (!may_setgroups())
180 return -EPERM; 180 return -EPERM;
181 if ((unsigned)gidsetsize > NGROUPS_MAX) 181 if ((unsigned)gidsetsize > NGROUPS_MAX)
182 return -EINVAL; 182 return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 69b800aebf13..b069ccbfb0b0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
54#ifdef CONFIG_USER_NS 54#ifdef CONFIG_USER_NS
55 .ns.ops = &userns_operations, 55 .ns.ops = &userns_operations,
56#endif 56#endif
57 .flags = USERNS_INIT_FLAGS,
57#ifdef CONFIG_PERSISTENT_KEYRINGS 58#ifdef CONFIG_PERSISTENT_KEYRINGS
58 .persistent_keyring_register_sem = 59 .persistent_keyring_register_sem =
59 __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), 60 __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1491ad00388f..4109f8320684 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,7 @@
24#include <linux/fs_struct.h> 24#include <linux/fs_struct.h>
25 25
26static struct kmem_cache *user_ns_cachep __read_mostly; 26static struct kmem_cache *user_ns_cachep __read_mostly;
27static DEFINE_MUTEX(userns_state_mutex);
27 28
28static bool new_idmap_permitted(const struct file *file, 29static bool new_idmap_permitted(const struct file *file,
29 struct user_namespace *ns, int cap_setid, 30 struct user_namespace *ns, int cap_setid,
@@ -100,6 +101,11 @@ int create_user_ns(struct cred *new)
100 ns->owner = owner; 101 ns->owner = owner;
101 ns->group = group; 102 ns->group = group;
102 103
104 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
105 mutex_lock(&userns_state_mutex);
106 ns->flags = parent_ns->flags;
107 mutex_unlock(&userns_state_mutex);
108
103 set_cred_user_ns(new, ns); 109 set_cred_user_ns(new, ns);
104 110
105#ifdef CONFIG_PERSISTENT_KEYRINGS 111#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -584,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
584 return false; 590 return false;
585} 591}
586 592
587
588static DEFINE_MUTEX(id_map_mutex);
589
590static ssize_t map_write(struct file *file, const char __user *buf, 593static ssize_t map_write(struct file *file, const char __user *buf,
591 size_t count, loff_t *ppos, 594 size_t count, loff_t *ppos,
592 int cap_setid, 595 int cap_setid,
@@ -603,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
603 ssize_t ret = -EINVAL; 606 ssize_t ret = -EINVAL;
604 607
605 /* 608 /*
606 * The id_map_mutex serializes all writes to any given map. 609 * The userns_state_mutex serializes all writes to any given map.
607 * 610 *
608 * Any map is only ever written once. 611 * Any map is only ever written once.
609 * 612 *
@@ -621,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
621 * order and smp_rmb() is guaranteed that we don't have crazy 624 * order and smp_rmb() is guaranteed that we don't have crazy
622 * architectures returning stale data. 625 * architectures returning stale data.
623 */ 626 */
624 mutex_lock(&id_map_mutex); 627 mutex_lock(&userns_state_mutex);
625 628
626 ret = -EPERM; 629 ret = -EPERM;
627 /* Only allow one successful write to the map */ 630 /* Only allow one successful write to the map */
@@ -641,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
641 if (!page) 644 if (!page)
642 goto out; 645 goto out;
643 646
644 /* Only allow <= page size writes at the beginning of the file */ 647 /* Only allow < page size writes at the beginning of the file */
645 ret = -EINVAL; 648 ret = -EINVAL;
646 if ((*ppos != 0) || (count >= PAGE_SIZE)) 649 if ((*ppos != 0) || (count >= PAGE_SIZE))
647 goto out; 650 goto out;
@@ -751,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
751 *ppos = count; 754 *ppos = count;
752 ret = count; 755 ret = count;
753out: 756out:
754 mutex_unlock(&id_map_mutex); 757 mutex_unlock(&userns_state_mutex);
755 if (page) 758 if (page)
756 free_page(page); 759 free_page(page);
757 return ret; 760 return ret;
@@ -813,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file,
813 struct user_namespace *ns, int cap_setid, 816 struct user_namespace *ns, int cap_setid,
814 struct uid_gid_map *new_map) 817 struct uid_gid_map *new_map)
815{ 818{
816 /* Allow mapping to your own filesystem ids */ 819 const struct cred *cred = file->f_cred;
817 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { 820 /* Don't allow mappings that would allow anything that wouldn't
821 * be allowed without the establishment of unprivileged mappings.
822 */
823 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
824 uid_eq(ns->owner, cred->euid)) {
818 u32 id = new_map->extent[0].lower_first; 825 u32 id = new_map->extent[0].lower_first;
819 if (cap_setid == CAP_SETUID) { 826 if (cap_setid == CAP_SETUID) {
820 kuid_t uid = make_kuid(ns->parent, id); 827 kuid_t uid = make_kuid(ns->parent, id);
821 if (uid_eq(uid, file->f_cred->fsuid)) 828 if (uid_eq(uid, cred->euid))
822 return true; 829 return true;
823 } else if (cap_setid == CAP_SETGID) { 830 } else if (cap_setid == CAP_SETGID) {
824 kgid_t gid = make_kgid(ns->parent, id); 831 kgid_t gid = make_kgid(ns->parent, id);
825 if (gid_eq(gid, file->f_cred->fsgid)) 832 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
833 gid_eq(gid, cred->egid))
826 return true; 834 return true;
827 } 835 }
828 } 836 }
@@ -842,6 +850,100 @@ static bool new_idmap_permitted(const struct file *file,
842 return false; 850 return false;
843} 851}
844 852
853int proc_setgroups_show(struct seq_file *seq, void *v)
854{
855 struct user_namespace *ns = seq->private;
856 unsigned long userns_flags = ACCESS_ONCE(ns->flags);
857
858 seq_printf(seq, "%s\n",
859 (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
860 "allow" : "deny");
861 return 0;
862}
863
864ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
865 size_t count, loff_t *ppos)
866{
867 struct seq_file *seq = file->private_data;
868 struct user_namespace *ns = seq->private;
869 char kbuf[8], *pos;
870 bool setgroups_allowed;
871 ssize_t ret;
872
873 /* Only allow a very narrow range of strings to be written */
874 ret = -EINVAL;
875 if ((*ppos != 0) || (count >= sizeof(kbuf)))
876 goto out;
877
878 /* What was written? */
879 ret = -EFAULT;
880 if (copy_from_user(kbuf, buf, count))
881 goto out;
882 kbuf[count] = '\0';
883 pos = kbuf;
884
885 /* What is being requested? */
886 ret = -EINVAL;
887 if (strncmp(pos, "allow", 5) == 0) {
888 pos += 5;
889 setgroups_allowed = true;
890 }
891 else if (strncmp(pos, "deny", 4) == 0) {
892 pos += 4;
893 setgroups_allowed = false;
894 }
895 else
896 goto out;
897
898 /* Verify there is not trailing junk on the line */
899 pos = skip_spaces(pos);
900 if (*pos != '\0')
901 goto out;
902
903 ret = -EPERM;
904 mutex_lock(&userns_state_mutex);
905 if (setgroups_allowed) {
906 /* Enabling setgroups after setgroups has been disabled
907 * is not allowed.
908 */
909 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
910 goto out_unlock;
911 } else {
912 /* Permanently disabling setgroups after setgroups has
913 * been enabled by writing the gid_map is not allowed.
914 */
915 if (ns->gid_map.nr_extents != 0)
916 goto out_unlock;
917 ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
918 }
919 mutex_unlock(&userns_state_mutex);
920
921 /* Report a successful write */
922 *ppos = count;
923 ret = count;
924out:
925 return ret;
926out_unlock:
927 mutex_unlock(&userns_state_mutex);
928 goto out;
929}
930
931bool userns_may_setgroups(const struct user_namespace *ns)
932{
933 bool allowed;
934
935 mutex_lock(&userns_state_mutex);
936 /* It is not safe to use setgroups until a gid mapping in
937 * the user namespace has been established.
938 */
939 allowed = ns->gid_map.nr_extents != 0;
940 /* Is setgroups allowed? */
941 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
942 mutex_unlock(&userns_state_mutex);
943
944 return allowed;
945}
946
845static inline struct user_namespace *to_user_ns(struct ns_common *ns) 947static inline struct user_namespace *to_user_ns(struct ns_common *ns)
846{ 948{
847 return container_of(ns, struct user_namespace, ns); 949 return container_of(ns, struct user_namespace, ns);
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 1b3ff2fda4d0..517785052f1c 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -6,6 +6,8 @@
6#include <sys/types.h> 6#include <sys/types.h>
7#include <sys/mount.h> 7#include <sys/mount.h>
8#include <sys/wait.h> 8#include <sys/wait.h>
9#include <sys/vfs.h>
10#include <sys/statvfs.h>
9#include <stdlib.h> 11#include <stdlib.h>
10#include <unistd.h> 12#include <unistd.h>
11#include <fcntl.h> 13#include <fcntl.h>
@@ -32,11 +34,14 @@
32# define CLONE_NEWPID 0x20000000 34# define CLONE_NEWPID 0x20000000
33#endif 35#endif
34 36
37#ifndef MS_REC
38# define MS_REC 16384
39#endif
35#ifndef MS_RELATIME 40#ifndef MS_RELATIME
36#define MS_RELATIME (1 << 21) 41# define MS_RELATIME (1 << 21)
37#endif 42#endif
38#ifndef MS_STRICTATIME 43#ifndef MS_STRICTATIME
39#define MS_STRICTATIME (1 << 24) 44# define MS_STRICTATIME (1 << 24)
40#endif 45#endif
41 46
42static void die(char *fmt, ...) 47static void die(char *fmt, ...)
@@ -48,17 +53,14 @@ static void die(char *fmt, ...)
48 exit(EXIT_FAILURE); 53 exit(EXIT_FAILURE);
49} 54}
50 55
51static void write_file(char *filename, char *fmt, ...) 56static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
52{ 57{
53 char buf[4096]; 58 char buf[4096];
54 int fd; 59 int fd;
55 ssize_t written; 60 ssize_t written;
56 int buf_len; 61 int buf_len;
57 va_list ap;
58 62
59 va_start(ap, fmt);
60 buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); 63 buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
61 va_end(ap);
62 if (buf_len < 0) { 64 if (buf_len < 0) {
63 die("vsnprintf failed: %s\n", 65 die("vsnprintf failed: %s\n",
64 strerror(errno)); 66 strerror(errno));
@@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
69 71
70 fd = open(filename, O_WRONLY); 72 fd = open(filename, O_WRONLY);
71 if (fd < 0) { 73 if (fd < 0) {
74 if ((errno == ENOENT) && enoent_ok)
75 return;
72 die("open of %s failed: %s\n", 76 die("open of %s failed: %s\n",
73 filename, strerror(errno)); 77 filename, strerror(errno));
74 } 78 }
@@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...)
87 } 91 }
88} 92}
89 93
94static void maybe_write_file(char *filename, char *fmt, ...)
95{
96 va_list ap;
97
98 va_start(ap, fmt);
99 vmaybe_write_file(true, filename, fmt, ap);
100 va_end(ap);
101
102}
103
104static void write_file(char *filename, char *fmt, ...)
105{
106 va_list ap;
107
108 va_start(ap, fmt);
109 vmaybe_write_file(false, filename, fmt, ap);
110 va_end(ap);
111
112}
113
114static int read_mnt_flags(const char *path)
115{
116 int ret;
117 struct statvfs stat;
118 int mnt_flags;
119
120 ret = statvfs(path, &stat);
121 if (ret != 0) {
122 die("statvfs of %s failed: %s\n",
123 path, strerror(errno));
124 }
125 if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
126 ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
127 ST_SYNCHRONOUS | ST_MANDLOCK)) {
128 die("Unrecognized mount flags\n");
129 }
130 mnt_flags = 0;
131 if (stat.f_flag & ST_RDONLY)
132 mnt_flags |= MS_RDONLY;
133 if (stat.f_flag & ST_NOSUID)
134 mnt_flags |= MS_NOSUID;
135 if (stat.f_flag & ST_NODEV)
136 mnt_flags |= MS_NODEV;
137 if (stat.f_flag & ST_NOEXEC)
138 mnt_flags |= MS_NOEXEC;
139 if (stat.f_flag & ST_NOATIME)
140 mnt_flags |= MS_NOATIME;
141 if (stat.f_flag & ST_NODIRATIME)
142 mnt_flags |= MS_NODIRATIME;
143 if (stat.f_flag & ST_RELATIME)
144 mnt_flags |= MS_RELATIME;
145 if (stat.f_flag & ST_SYNCHRONOUS)
146 mnt_flags |= MS_SYNCHRONOUS;
147 if (stat.f_flag & ST_MANDLOCK)
148 mnt_flags |= ST_MANDLOCK;
149
150 return mnt_flags;
151}
152
90static void create_and_enter_userns(void) 153static void create_and_enter_userns(void)
91{ 154{
92 uid_t uid; 155 uid_t uid;
@@ -100,13 +163,10 @@ static void create_and_enter_userns(void)
100 strerror(errno)); 163 strerror(errno));
101 } 164 }
102 165
166 maybe_write_file("/proc/self/setgroups", "deny");
103 write_file("/proc/self/uid_map", "0 %d 1", uid); 167 write_file("/proc/self/uid_map", "0 %d 1", uid);
104 write_file("/proc/self/gid_map", "0 %d 1", gid); 168 write_file("/proc/self/gid_map", "0 %d 1", gid);
105 169
106 if (setgroups(0, NULL) != 0) {
107 die("setgroups failed: %s\n",
108 strerror(errno));
109 }
110 if (setgid(0) != 0) { 170 if (setgid(0) != 0) {
111 die ("setgid(0) failed %s\n", 171 die ("setgid(0) failed %s\n",
112 strerror(errno)); 172 strerror(errno));
@@ -118,7 +178,8 @@ static void create_and_enter_userns(void)
118} 178}
119 179
120static 180static
121bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) 181bool test_unpriv_remount(const char *fstype, const char *mount_options,
182 int mount_flags, int remount_flags, int invalid_flags)
122{ 183{
123 pid_t child; 184 pid_t child;
124 185
@@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
151 strerror(errno)); 212 strerror(errno));
152 } 213 }
153 214
154 if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { 215 if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
155 die("mount of /tmp failed: %s\n", 216 die("mount of %s with options '%s' on /tmp failed: %s\n",
156 strerror(errno)); 217 fstype,
218 mount_options? mount_options : "",
219 strerror(errno));
157 } 220 }
158 221
159 create_and_enter_userns(); 222 create_and_enter_userns();
@@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
181 244
182static bool test_unpriv_remount_simple(int mount_flags) 245static bool test_unpriv_remount_simple(int mount_flags)
183{ 246{
184 return test_unpriv_remount(mount_flags, mount_flags, 0); 247 return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
185} 248}
186 249
187static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) 250static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
188{ 251{
189 return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); 252 return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
253 invalid_flags);
254}
255
256static bool test_priv_mount_unpriv_remount(void)
257{
258 pid_t child;
259 int ret;
260 const char *orig_path = "/dev";
261 const char *dest_path = "/tmp";
262 int orig_mnt_flags, remount_mnt_flags;
263
264 child = fork();
265 if (child == -1) {
266 die("fork failed: %s\n",
267 strerror(errno));
268 }
269 if (child != 0) { /* parent */
270 pid_t pid;
271 int status;
272 pid = waitpid(child, &status, 0);
273 if (pid == -1) {
274 die("waitpid failed: %s\n",
275 strerror(errno));
276 }
277 if (pid != child) {
278 die("waited for %d got %d\n",
279 child, pid);
280 }
281 if (!WIFEXITED(status)) {
282 die("child did not terminate cleanly\n");
283 }
284 return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
285 }
286
287 orig_mnt_flags = read_mnt_flags(orig_path);
288
289 create_and_enter_userns();
290 ret = unshare(CLONE_NEWNS);
291 if (ret != 0) {
292 die("unshare(CLONE_NEWNS) failed: %s\n",
293 strerror(errno));
294 }
295
296 ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
297 if (ret != 0) {
298 die("recursive bind mount of %s onto %s failed: %s\n",
299 orig_path, dest_path, strerror(errno));
300 }
301
302 ret = mount(dest_path, dest_path, "none",
303 MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
304 if (ret != 0) {
305 /* system("cat /proc/self/mounts"); */
306 die("remount of /tmp failed: %s\n",
307 strerror(errno));
308 }
309
310 remount_mnt_flags = read_mnt_flags(dest_path);
311 if (orig_mnt_flags != remount_mnt_flags) {
312 die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
313 dest_path, orig_path);
314 }
315 exit(EXIT_SUCCESS);
190} 316}
191 317
192int main(int argc, char **argv) 318int main(int argc, char **argv)
193{ 319{
194 if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { 320 if (!test_unpriv_remount_simple(MS_RDONLY)) {
195 die("MS_RDONLY malfunctions\n"); 321 die("MS_RDONLY malfunctions\n");
196 } 322 }
197 if (!test_unpriv_remount_simple(MS_NODEV)) { 323 if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
198 die("MS_NODEV malfunctions\n"); 324 die("MS_NODEV malfunctions\n");
199 } 325 }
200 if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { 326 if (!test_unpriv_remount_simple(MS_NOSUID)) {
201 die("MS_NOSUID malfunctions\n"); 327 die("MS_NOSUID malfunctions\n");
202 } 328 }
203 if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { 329 if (!test_unpriv_remount_simple(MS_NOEXEC)) {
204 die("MS_NOEXEC malfunctions\n"); 330 die("MS_NOEXEC malfunctions\n");
205 } 331 }
206 if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, 332 if (!test_unpriv_remount_atime(MS_RELATIME,
207 MS_NOATIME|MS_NODEV)) 333 MS_NOATIME))
208 { 334 {
209 die("MS_RELATIME malfunctions\n"); 335 die("MS_RELATIME malfunctions\n");
210 } 336 }
211 if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, 337 if (!test_unpriv_remount_atime(MS_STRICTATIME,
212 MS_NOATIME|MS_NODEV)) 338 MS_NOATIME))
213 { 339 {
214 die("MS_STRICTATIME malfunctions\n"); 340 die("MS_STRICTATIME malfunctions\n");
215 } 341 }
216 if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, 342 if (!test_unpriv_remount_atime(MS_NOATIME,
217 MS_STRICTATIME|MS_NODEV)) 343 MS_STRICTATIME))
218 { 344 {
219 die("MS_RELATIME malfunctions\n"); 345 die("MS_NOATIME malfunctions\n");
220 } 346 }
221 if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, 347 if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
222 MS_NOATIME|MS_NODEV)) 348 MS_NOATIME))
223 { 349 {
224 die("MS_RELATIME malfunctions\n"); 350 die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
225 } 351 }
226 if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, 352 if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
227 MS_NOATIME|MS_NODEV)) 353 MS_NOATIME))
228 { 354 {
229 die("MS_RELATIME malfunctions\n"); 355 die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
230 } 356 }
231 if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, 357 if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
232 MS_STRICTATIME|MS_NODEV)) 358 MS_STRICTATIME))
233 { 359 {
234 die("MS_RELATIME malfunctions\n"); 360 die("MS_NOATIME|MS_DIRATIME malfunctions\n");
235 } 361 }
236 if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, 362 if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
237 MS_NOATIME|MS_NODEV))
238 { 363 {
239 die("Default atime malfunctions\n"); 364 die("Default atime malfunctions\n");
240 } 365 }
366 if (!test_priv_mount_unpriv_remount()) {
367 die("Mount flags unexpectedly changed after remount\n");
368 }
241 return EXIT_SUCCESS; 369 return EXIT_SUCCESS;
242} 370}