aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-28 16:43:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-28 16:43:46 -0400
commit2c3de1c2d7d68c6ba4c1ecd82c68285f34d9609e (patch)
tree6a09ce761173a966718f9009514dcc90bd9947b7
parent9064171268d838b8f283fe111ef086b9479d059a (diff)
parent87a8ebd637dafc255070f503909a053cf0d98d3f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull userns fixes from Eric W Biederman: "The bulk of the changes are fixing the worst consequences of the user namespace design oversight in not considering what happens when one namespace starts off as a clone of another namespace, as happens with the mount namespace. The rest of the changes are just plain bug fixes. Many thanks to Andy Lutomirski for pointing out many of these issues." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: userns: Restrict when proc and sysfs can be mounted ipc: Restrict mounting the mqueue filesystem vfs: Carefully propogate mounts across user namespaces vfs: Add a mount flag to lock read only bind mounts userns: Don't allow creation if the user is chrooted yama: Better permission check for ptraceme pid: Handle the exit of a multi-threaded init. scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/sysfs/mount.c4
-rw-r--r--include/linux/fs_struct.h2
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/user_namespace.h4
-rw-r--r--ipc/mqueue.c12
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c11
-rw-r--r--net/core/scm.c4
-rw-r--r--security/yama/yama_lsm.c4
14 files changed, 105 insertions, 8 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..d581e45c0a9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
798 } 798 }
799 799
800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
801 /* Don't allow unprivileged users to change mount flags */
802 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
803 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
804
801 atomic_inc(&sb->s_active); 805 atomic_inc(&sb->s_active);
802 mnt->mnt.mnt_sb = sb; 806 mnt->mnt.mnt_sb = sb;
803 mnt->mnt.mnt_root = dget(root); 807 mnt->mnt.mnt_root = dget(root);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1713 if (readonly_request == __mnt_is_readonly(mnt)) 1717 if (readonly_request == __mnt_is_readonly(mnt))
1714 return 0; 1718 return 0;
1715 1719
1720 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1721 return -EPERM;
1722
1716 if (readonly_request) 1723 if (readonly_request)
1717 error = mnt_make_readonly(real_mount(mnt)); 1724 error = mnt_make_readonly(real_mount(mnt));
1718 else 1725 else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339 /* First pass: copy the tree topology */ 2346 /* First pass: copy the tree topology */
2340 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2347 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2341 if (user_ns != mnt_ns->user_ns) 2348 if (user_ns != mnt_ns->user_ns)
2342 copy_flags |= CL_SHARED_TO_SLAVE; 2349 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2343 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2350 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2344 if (IS_ERR(new)) { 2351 if (IS_ERR(new)) {
2345 up_write(&namespace_sem); 2352 up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
2732 return check_mnt(real_mount(mnt)); 2739 return check_mnt(real_mount(mnt));
2733} 2740}
2734 2741
2742bool current_chrooted(void)
2743{
2744 /* Does the current process have a non-standard root */
2745 struct path ns_root;
2746 struct path fs_root;
2747 bool chrooted;
2748
2749 /* Find the namespace root */
2750 ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
2751 ns_root.dentry = ns_root.mnt->mnt_root;
2752 path_get(&ns_root);
2753 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
2754 ;
2755
2756 get_fs_root(current->fs, &fs_root);
2757
2758 chrooted = !path_equal(&fs_root, &ns_root);
2759
2760 path_put(&fs_root);
2761 path_put(&ns_root);
2762
2763 return chrooted;
2764}
2765
2766void update_mnt_policy(struct user_namespace *userns)
2767{
2768 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2769 struct mount *mnt;
2770
2771 down_read(&namespace_sem);
2772 list_for_each_entry(mnt, &ns->list, mnt_list) {
2773 switch (mnt->mnt.mnt_sb->s_magic) {
2774 case SYSFS_MAGIC:
2775 userns->may_mount_sysfs = true;
2776 break;
2777 case PROC_SUPER_MAGIC:
2778 userns->may_mount_proc = true;
2779 break;
2780 }
2781 if (userns->may_mount_sysfs && userns->may_mount_proc)
2782 break;
2783 }
2784 up_read(&namespace_sem);
2785}
2786
2735static void *mntns_get(struct task_struct *task) 2787static void *mntns_get(struct task_struct *task)
2736{ 2788{
2737 struct mnt_namespace *ns = NULL; 2789 struct mnt_namespace *ns = NULL;
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
9#include <linux/mnt_namespace.h> 9#include <linux/mnt_namespace.h>
10#include <linux/mount.h> 10#include <linux/mount.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/nsproxy.h>
12#include "internal.h" 13#include "internal.h"
13#include "pnode.h" 14#include "pnode.h"
14 15
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 221int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
221 struct mount *source_mnt, struct list_head *tree_list) 222 struct mount *source_mnt, struct list_head *tree_list)
222{ 223{
224 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
223 struct mount *m, *child; 225 struct mount *m, *child;
224 int ret = 0; 226 int ret = 0;
225 struct mount *prev_dest_mnt = dest_mnt; 227 struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 239
238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 240 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
239 241
242 /* Notice when we are propagating across user namespaces */
243 if (m->mnt_ns->user_ns != user_ns)
244 type |= CL_UNPRIVILEGED;
245
240 child = copy_tree(source, source->mnt.mnt_root, type); 246 child = copy_tree(source, source->mnt.mnt_root, type);
241 if (IS_ERR(child)) { 247 if (IS_ERR(child)) {
242 ret = PTR_ERR(child); 248 ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20 25#define CL_SHARED_TO_SLAVE 0x20
26#define CL_UNPRIVILEGED 0x40
26 27
27static inline void set_mnt_shared(struct mount *mnt) 28static inline void set_mnt_shared(struct mount *mnt)
28{ 29{
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/user_namespace.h>
19#include <linux/mount.h> 20#include <linux/mount.h>
20#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
21#include <linux/parser.h> 22#include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
108 } else { 109 } else {
109 ns = task_active_pid_ns(current); 110 ns = task_active_pid_ns(current);
110 options = data; 111 options = data;
112
113 if (!current_user_ns()->may_mount_proc)
114 return ERR_PTR(-EPERM);
111 } 115 }
112 116
113 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); 117 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/magic.h> 20#include <linux/magic.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22 23
23#include "sysfs.h" 24#include "sysfs.h"
24 25
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
111 struct super_block *sb; 112 struct super_block *sb;
112 int error; 113 int error;
113 114
115 if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
116 return ERR_PTR(-EPERM);
117
114 info = kzalloc(sizeof(*info), GFP_KERNEL); 118 info = kzalloc(sizeof(*info), GFP_KERNEL);
115 if (!info) 119 if (!info)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 729eded4b24f..2b93a9a5a1e6 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
50 spin_unlock(&fs->lock); 50 spin_unlock(&fs->lock);
51} 51}
52 52
53extern bool current_chrooted(void);
54
53#endif /* _LINUX_FS_STRUCT_H */ 55#endif /* _LINUX_FS_STRUCT_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index d7029f4a191a..73005f9957ea 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -47,6 +47,8 @@ struct mnt_namespace;
47 47
48#define MNT_INTERNAL 0x4000 48#define MNT_INTERNAL 0x4000
49 49
50#define MNT_LOCK_READONLY 0x400000
51
50struct vfsmount { 52struct vfsmount {
51 struct dentry *mnt_root; /* root of the mounted tree */ 53 struct dentry *mnt_root; /* root of the mounted tree */
52 struct super_block *mnt_sb; /* pointer to superblock */ 54 struct super_block *mnt_sb; /* pointer to superblock */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4ce009324933..b6b215f13b45 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,8 @@ struct user_namespace {
26 kuid_t owner; 26 kuid_t owner;
27 kgid_t group; 27 kgid_t group;
28 unsigned int proc_inum; 28 unsigned int proc_inum;
29 bool may_mount_sysfs;
30 bool may_mount_proc;
29}; 31};
30 32
31extern struct user_namespace init_user_ns; 33extern struct user_namespace init_user_ns;
@@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns)
82 84
83#endif 85#endif
84 86
87void update_mnt_policy(struct user_namespace *userns);
88
85#endif /* _LINUX_USER_H */ 89#endif /* _LINUX_USER_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3953fda2e8bd..e4e47f647446 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
330 int flags, const char *dev_name, 330 int flags, const char *dev_name,
331 void *data) 331 void *data)
332{ 332{
333 if (!(flags & MS_KERNMOUNT)) 333 if (!(flags & MS_KERNMOUNT)) {
334 data = current->nsproxy->ipc_ns; 334 struct ipc_namespace *ns = current->nsproxy->ipc_ns;
335 /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
336 * over the ipc namespace.
337 */
338 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
339 return ERR_PTR(-EPERM);
340
341 data = ns;
342 }
335 return mount_ns(fs_type, flags, data, mqueue_fill_super); 343 return mount_ns(fs_type, flags, data, mqueue_fill_super);
336} 344}
337 345
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c1c3dc1c6023..bea15bdf82b0 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
181 int nr; 181 int nr;
182 int rc; 182 int rc;
183 struct task_struct *task, *me = current; 183 struct task_struct *task, *me = current;
184 int init_pids = thread_group_leader(me) ? 1 : 2;
184 185
185 /* Don't allow any more processes into the pid namespace */ 186 /* Don't allow any more processes into the pid namespace */
186 disable_pid_allocation(pid_ns); 187 disable_pid_allocation(pid_ns);
@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
230 */ 231 */
231 for (;;) { 232 for (;;) {
232 set_current_state(TASK_UNINTERRUPTIBLE); 233 set_current_state(TASK_UNINTERRUPTIBLE);
233 if (pid_ns->nr_hashed == 1) 234 if (pid_ns->nr_hashed == init_pids)
234 break; 235 break;
235 schedule(); 236 schedule();
236 } 237 }
diff --git a/kernel/user.c b/kernel/user.c
index e81978e8c03b..8e635a18ab52 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
51 .owner = GLOBAL_ROOT_UID, 51 .owner = GLOBAL_ROOT_UID,
52 .group = GLOBAL_ROOT_GID, 52 .group = GLOBAL_ROOT_GID,
53 .proc_inum = PROC_USER_INIT_INO, 53 .proc_inum = PROC_USER_INIT_INO,
54 .may_mount_sysfs = true,
55 .may_mount_proc = true,
54}; 56};
55EXPORT_SYMBOL_GPL(init_user_ns); 57EXPORT_SYMBOL_GPL(init_user_ns);
56 58
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b14f4d342043..a54f26f82eb2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new)
61 kgid_t group = new->egid; 61 kgid_t group = new->egid;
62 int ret; 62 int ret;
63 63
64 /*
65 * Verify that we can not violate the policy of which files
66 * may be accessed that is specified by the root directory,
67 * by verifing that the root directory is at the root of the
68 * mount namespace which allows all files to be accessed.
69 */
70 if (current_chrooted())
71 return -EPERM;
72
64 /* The creator needs a mapping in the parent user namespace 73 /* The creator needs a mapping in the parent user namespace
65 * or else we won't be able to reasonably tell userspace who 74 * or else we won't be able to reasonably tell userspace who
66 * created a user_namespace. 75 * created a user_namespace.
@@ -87,6 +96,8 @@ int create_user_ns(struct cred *new)
87 96
88 set_cred_user_ns(new, ns); 97 set_cred_user_ns(new, ns);
89 98
99 update_mnt_policy(ns);
100
90 return 0; 101 return 0;
91} 102}
92 103
diff --git a/net/core/scm.c b/net/core/scm.c
index 905dcc6ad1e3..2dc6cdaaae8a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/pid_namespace.h>
27#include <linux/pid.h> 28#include <linux/pid.h>
28#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
29#include <linux/slab.h> 30#include <linux/slab.h>
@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
52 if (!uid_valid(uid) || !gid_valid(gid)) 53 if (!uid_valid(uid) || !gid_valid(gid))
53 return -EINVAL; 54 return -EINVAL;
54 55
55 if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && 56 if ((creds->pid == task_tgid_vnr(current) ||
57 ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
56 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || 58 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
57 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && 59 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
58 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || 60 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 23414b93771f..13c88fbcf037 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent)
347 /* Only disallow PTRACE_TRACEME on more aggressive settings. */ 347 /* Only disallow PTRACE_TRACEME on more aggressive settings. */
348 switch (ptrace_scope) { 348 switch (ptrace_scope) {
349 case YAMA_SCOPE_CAPABILITY: 349 case YAMA_SCOPE_CAPABILITY:
350 rcu_read_lock(); 350 if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE))
351 if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
352 rc = -EPERM; 351 rc = -EPERM;
353 rcu_read_unlock();
354 break; 352 break;
355 case YAMA_SCOPE_NO_ATTACH: 353 case YAMA_SCOPE_NO_ATTACH:
356 rc = -EPERM; 354 rc = -EPERM;