aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/README1
-rw-r--r--Documentation/sysctl/user.txt66
-rw-r--r--fs/devpts/inode.c71
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/proc/proc_sysctl.c14
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/ipc_namespace.h1
-rw-r--r--include/linux/pid_namespace.h1
-rw-r--r--include/linux/sysctl.h3
-rw-r--r--include/linux/user_namespace.h37
-rw-r--r--include/linux/utsname.h1
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--ipc/namespace.c45
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c18
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/pid_namespace.c25
-rw-r--r--kernel/ucount.c235
-rw-r--r--kernel/user_namespace.c74
-rw-r--r--kernel/utsname.c34
-rw-r--r--net/core/net_namespace.c22
-rw-r--r--net/sysctl_net.c4
23 files changed, 589 insertions, 95 deletions
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README
index 8c3306e01d52..91f54ffa0077 100644
--- a/Documentation/sysctl/README
+++ b/Documentation/sysctl/README
@@ -69,6 +69,7 @@ proc/ <empty>
69sunrpc/ SUN Remote Procedure Call (NFS) 69sunrpc/ SUN Remote Procedure Call (NFS)
70vm/ memory management tuning 70vm/ memory management tuning
71 buffer and cache management 71 buffer and cache management
72user/ Per user per user namespace limits
72 73
73These are the subdirs I have on my system. There might be more 74These are the subdirs I have on my system. There might be more
74or other subdirs in another setup. If you see another dir, I'd 75or other subdirs in another setup. If you see another dir, I'd
diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt
new file mode 100644
index 000000000000..1291c498f78f
--- /dev/null
+++ b/Documentation/sysctl/user.txt
@@ -0,0 +1,66 @@
1Documentation for /proc/sys/user/* kernel version 4.9.0
2 (c) 2016 Eric Biederman <ebiederm@xmission.com>
3
4==============================================================
5
6This file contains the documetation for the sysctl files in
7/proc/sys/user.
8
9The files in this directory can be used to override the default
10limits on the number of namespaces and other objects that have
11per user per user namespace limits.
12
13The primary purpose of these limits is to stop programs that
14malfunction and attempt to create a ridiculous number of objects,
15before the malfunction becomes a system wide problem. It is the
16intention that the defaults of these limits are set high enough that
17no program in normal operation should run into these limits.
18
19The creation of per user per user namespace objects are charged to
20the user in the user namespace who created the object and
21verified to be below the per user limit in that user namespace.
22
23The creation of objects is also charged to all of the users
24who created user namespaces the creation of the object happens
25in (user namespaces can be nested) and verified to be below the per user
26limits in the user namespaces of those users.
27
28This recursive counting of created objects ensures that creating a
29user namespace does not allow a user to escape their current limits.
30
31Currently, these files are in /proc/sys/user:
32
33- max_cgroup_namespaces
34
35 The maximum number of cgroup namespaces that any user in the current
36 user namespace may create.
37
38- max_ipc_namespaces
39
40 The maximum number of ipc namespaces that any user in the current
41 user namespace may create.
42
43- max_mnt_namespaces
44
45 The maximum number of mount namespaces that any user in the current
46 user namespace may create.
47
48- max_net_namespaces
49
50 The maximum number of network namespaces that any user in the
51 current user namespace may create.
52
53- max_pid_namespaces
54
55 The maximum number of pid namespaces that any user in the current
56 user namespace may create.
57
58- max_user_namespaces
59
60 The maximum number of user namespaces that any user in the current
61 user namespace may create.
62
63- max_uts_namespaces
64
65 The maximum number of user namespaces that any user in the current
66 user namespace may create.
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d116453b0276..154cc45c19e8 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -272,13 +272,8 @@ static int mknod_ptmx(struct super_block *sb)
272 struct dentry *root = sb->s_root; 272 struct dentry *root = sb->s_root;
273 struct pts_fs_info *fsi = DEVPTS_SB(sb); 273 struct pts_fs_info *fsi = DEVPTS_SB(sb);
274 struct pts_mount_opts *opts = &fsi->mount_opts; 274 struct pts_mount_opts *opts = &fsi->mount_opts;
275 kuid_t root_uid; 275 kuid_t ptmx_uid = current_fsuid();
276 kgid_t root_gid; 276 kgid_t ptmx_gid = current_fsgid();
277
278 root_uid = make_kuid(current_user_ns(), 0);
279 root_gid = make_kgid(current_user_ns(), 0);
280 if (!uid_valid(root_uid) || !gid_valid(root_gid))
281 return -EINVAL;
282 277
283 inode_lock(d_inode(root)); 278 inode_lock(d_inode(root));
284 279
@@ -309,8 +304,8 @@ static int mknod_ptmx(struct super_block *sb)
309 304
310 mode = S_IFCHR|opts->ptmxmode; 305 mode = S_IFCHR|opts->ptmxmode;
311 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); 306 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
312 inode->i_uid = root_uid; 307 inode->i_uid = ptmx_uid;
313 inode->i_gid = root_gid; 308 inode->i_gid = ptmx_gid;
314 309
315 d_add(dentry, inode); 310 d_add(dentry, inode);
316 311
@@ -336,7 +331,6 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
336 struct pts_fs_info *fsi = DEVPTS_SB(sb); 331 struct pts_fs_info *fsi = DEVPTS_SB(sb);
337 struct pts_mount_opts *opts = &fsi->mount_opts; 332 struct pts_mount_opts *opts = &fsi->mount_opts;
338 333
339 sync_filesystem(sb);
340 err = parse_mount_options(data, PARSE_REMOUNT, opts); 334 err = parse_mount_options(data, PARSE_REMOUNT, opts);
341 335
342 /* 336 /*
@@ -395,6 +389,7 @@ static int
395devpts_fill_super(struct super_block *s, void *data, int silent) 389devpts_fill_super(struct super_block *s, void *data, int silent)
396{ 390{
397 struct inode *inode; 391 struct inode *inode;
392 int error;
398 393
399 s->s_iflags &= ~SB_I_NODEV; 394 s->s_iflags &= ~SB_I_NODEV;
400 s->s_blocksize = 1024; 395 s->s_blocksize = 1024;
@@ -403,10 +398,16 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
403 s->s_op = &devpts_sops; 398 s->s_op = &devpts_sops;
404 s->s_time_gran = 1; 399 s->s_time_gran = 1;
405 400
401 error = -ENOMEM;
406 s->s_fs_info = new_pts_fs_info(s); 402 s->s_fs_info = new_pts_fs_info(s);
407 if (!s->s_fs_info) 403 if (!s->s_fs_info)
408 goto fail; 404 goto fail;
409 405
406 error = parse_mount_options(data, PARSE_MOUNT, &DEVPTS_SB(s)->mount_opts);
407 if (error)
408 goto fail;
409
410 error = -ENOMEM;
410 inode = new_inode(s); 411 inode = new_inode(s);
411 if (!inode) 412 if (!inode)
412 goto fail; 413 goto fail;
@@ -418,13 +419,21 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
418 set_nlink(inode, 2); 419 set_nlink(inode, 2);
419 420
420 s->s_root = d_make_root(inode); 421 s->s_root = d_make_root(inode);
421 if (s->s_root) 422 if (!s->s_root) {
422 return 0; 423 pr_err("get root dentry failed\n");
424 goto fail;
425 }
423 426
424 pr_err("get root dentry failed\n"); 427 error = mknod_ptmx(s);
428 if (error)
429 goto fail_dput;
425 430
431 return 0;
432fail_dput:
433 dput(s->s_root);
434 s->s_root = NULL;
426fail: 435fail:
427 return -ENOMEM; 436 return error;
428} 437}
429 438
430/* 439/*
@@ -436,43 +445,15 @@ fail:
436static struct dentry *devpts_mount(struct file_system_type *fs_type, 445static struct dentry *devpts_mount(struct file_system_type *fs_type,
437 int flags, const char *dev_name, void *data) 446 int flags, const char *dev_name, void *data)
438{ 447{
439 int error; 448 return mount_nodev(fs_type, flags, data, devpts_fill_super);
440 struct pts_mount_opts opts;
441 struct super_block *s;
442
443 error = parse_mount_options(data, PARSE_MOUNT, &opts);
444 if (error)
445 return ERR_PTR(error);
446
447 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
448 if (IS_ERR(s))
449 return ERR_CAST(s);
450
451 if (!s->s_root) {
452 error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
453 if (error)
454 goto out_undo_sget;
455 s->s_flags |= MS_ACTIVE;
456 }
457
458 memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
459
460 error = mknod_ptmx(s);
461 if (error)
462 goto out_undo_sget;
463
464 return dget(s->s_root);
465
466out_undo_sget:
467 deactivate_locked_super(s);
468 return ERR_PTR(error);
469} 449}
470 450
471static void devpts_kill_sb(struct super_block *sb) 451static void devpts_kill_sb(struct super_block *sb)
472{ 452{
473 struct pts_fs_info *fsi = DEVPTS_SB(sb); 453 struct pts_fs_info *fsi = DEVPTS_SB(sb);
474 454
475 ida_destroy(&fsi->allocated_ptys); 455 if (fsi)
456 ida_destroy(&fsi->allocated_ptys);
476 kfree(fsi); 457 kfree(fsi);
477 kill_litter_super(sb); 458 kill_litter_super(sb);
478} 459}
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..e037981d8351 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,6 +10,7 @@ struct mnt_namespace {
10 struct mount * root; 10 struct mount * root;
11 struct list_head list; 11 struct list_head list;
12 struct user_namespace *user_ns; 12 struct user_namespace *user_ns;
13 struct ucounts *ucounts;
13 u64 seq; /* Sequence number to prevent loops */ 14 u64 seq; /* Sequence number to prevent loops */
14 wait_queue_head_t poll; 15 wait_queue_head_t poll;
15 u64 event; 16 u64 event;
diff --git a/fs/namespace.c b/fs/namespace.c
index fea56f310547..8a0e90eb81d3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2719,9 +2719,20 @@ dput_out:
2719 return retval; 2719 return retval;
2720} 2720}
2721 2721
2722static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2723{
2724 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2725}
2726
2727static void dec_mnt_namespaces(struct ucounts *ucounts)
2728{
2729 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2730}
2731
2722static void free_mnt_ns(struct mnt_namespace *ns) 2732static void free_mnt_ns(struct mnt_namespace *ns)
2723{ 2733{
2724 ns_free_inum(&ns->ns); 2734 ns_free_inum(&ns->ns);
2735 dec_mnt_namespaces(ns->ucounts);
2725 put_user_ns(ns->user_ns); 2736 put_user_ns(ns->user_ns);
2726 kfree(ns); 2737 kfree(ns);
2727} 2738}
@@ -2738,14 +2749,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2738static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) 2749static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2739{ 2750{
2740 struct mnt_namespace *new_ns; 2751 struct mnt_namespace *new_ns;
2752 struct ucounts *ucounts;
2741 int ret; 2753 int ret;
2742 2754
2755 ucounts = inc_mnt_namespaces(user_ns);
2756 if (!ucounts)
2757 return ERR_PTR(-ENOSPC);
2758
2743 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2759 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2744 if (!new_ns) 2760 if (!new_ns) {
2761 dec_mnt_namespaces(ucounts);
2745 return ERR_PTR(-ENOMEM); 2762 return ERR_PTR(-ENOMEM);
2763 }
2746 ret = ns_alloc_inum(&new_ns->ns); 2764 ret = ns_alloc_inum(&new_ns->ns);
2747 if (ret) { 2765 if (ret) {
2748 kfree(new_ns); 2766 kfree(new_ns);
2767 dec_mnt_namespaces(ucounts);
2749 return ERR_PTR(ret); 2768 return ERR_PTR(ret);
2750 } 2769 }
2751 new_ns->ns.ops = &mntns_operations; 2770 new_ns->ns.ops = &mntns_operations;
@@ -2756,6 +2775,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2756 init_waitqueue_head(&new_ns->poll); 2775 init_waitqueue_head(&new_ns->poll);
2757 new_ns->event = 0; 2776 new_ns->event = 0;
2758 new_ns->user_ns = get_user_ns(user_ns); 2777 new_ns->user_ns = get_user_ns(user_ns);
2778 new_ns->ucounts = ucounts;
2759 return new_ns; 2779 return new_ns;
2760} 2780}
2761 2781
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1b93650dda2f..a80acdfbe180 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
72 72
73static void drop_sysctl_table(struct ctl_table_header *header); 73static void drop_sysctl_table(struct ctl_table_header *header);
74static int sysctl_follow_link(struct ctl_table_header **phead, 74static int sysctl_follow_link(struct ctl_table_header **phead,
75 struct ctl_table **pentry, struct nsproxy *namespaces); 75 struct ctl_table **pentry);
76static int insert_links(struct ctl_table_header *head); 76static int insert_links(struct ctl_table_header *head);
77static void put_links(struct ctl_table_header *header); 77static void put_links(struct ctl_table_header *header);
78 78
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
319} 319}
320 320
321static struct ctl_table_set * 321static struct ctl_table_set *
322lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 322lookup_header_set(struct ctl_table_root *root)
323{ 323{
324 struct ctl_table_set *set = &root->default_set; 324 struct ctl_table_set *set = &root->default_set;
325 if (root->lookup) 325 if (root->lookup)
326 set = root->lookup(root, namespaces); 326 set = root->lookup(root);
327 return set; 327 return set;
328} 328}
329 329
@@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
491 goto out; 491 goto out;
492 492
493 if (S_ISLNK(p->mode)) { 493 if (S_ISLNK(p->mode)) {
494 ret = sysctl_follow_link(&h, &p, current->nsproxy); 494 ret = sysctl_follow_link(&h, &p);
495 err = ERR_PTR(ret); 495 err = ERR_PTR(ret);
496 if (ret) 496 if (ret)
497 goto out; 497 goto out;
@@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
659 659
660 if (S_ISLNK(table->mode)) { 660 if (S_ISLNK(table->mode)) {
661 /* It is not an error if we can not follow the link ignore it */ 661 /* It is not an error if we can not follow the link ignore it */
662 int err = sysctl_follow_link(&head, &table, current->nsproxy); 662 int err = sysctl_follow_link(&head, &table);
663 if (err) 663 if (err)
664 goto out; 664 goto out;
665 } 665 }
@@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
976} 976}
977 977
978static int sysctl_follow_link(struct ctl_table_header **phead, 978static int sysctl_follow_link(struct ctl_table_header **phead,
979 struct ctl_table **pentry, struct nsproxy *namespaces) 979 struct ctl_table **pentry)
980{ 980{
981 struct ctl_table_header *head; 981 struct ctl_table_header *head;
982 struct ctl_table_root *root; 982 struct ctl_table_root *root;
@@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
988 ret = 0; 988 ret = 0;
989 spin_lock(&sysctl_lock); 989 spin_lock(&sysctl_lock);
990 root = (*pentry)->data; 990 root = (*pentry)->data;
991 set = lookup_header_set(root, namespaces); 991 set = lookup_header_set(root);
992 dir = xlate_dir(set, (*phead)->parent); 992 dir = xlate_dir(set, (*phead)->parent);
993 if (IS_ERR(dir)) 993 if (IS_ERR(dir))
994 ret = PTR_ERR(dir); 994 ret = PTR_ERR(dir);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b719a9..1ed92812785a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -621,6 +621,7 @@ struct cgroup_namespace {
621 atomic_t count; 621 atomic_t count;
622 struct ns_common ns; 622 struct ns_common ns;
623 struct user_namespace *user_ns; 623 struct user_namespace *user_ns;
624 struct ucounts *ucounts;
624 struct css_set *root_cset; 625 struct css_set *root_cset;
625}; 626};
626 627
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index d10e54f03c09..848e5796400e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -58,6 +58,7 @@ struct ipc_namespace {
58 58
59 /* user_ns which owns the ipc ns */ 59 /* user_ns which owns the ipc ns */
60 struct user_namespace *user_ns; 60 struct user_namespace *user_ns;
61 struct ucounts *ucounts;
61 62
62 struct ns_common ns; 63 struct ns_common ns;
63}; 64};
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 918b117a7cd3..34cce96741bc 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -40,6 +40,7 @@ struct pid_namespace {
40 struct fs_pin *bacct; 40 struct fs_pin *bacct;
41#endif 41#endif
42 struct user_namespace *user_ns; 42 struct user_namespace *user_ns;
43 struct ucounts *ucounts;
43 struct work_struct proc_work; 44 struct work_struct proc_work;
44 kgid_t pid_gid; 45 kgid_t pid_gid;
45 int hide_pid; 46 int hide_pid;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 697e160c78d0..f166ca0203e2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -155,8 +155,7 @@ struct ctl_table_set {
155 155
156struct ctl_table_root { 156struct ctl_table_root {
157 struct ctl_table_set default_set; 157 struct ctl_table_set default_set;
158 struct ctl_table_set *(*lookup)(struct ctl_table_root *root, 158 struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
159 struct nsproxy *namespaces);
160 int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); 159 int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
161}; 160};
162 161
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 190cf0760815..eb209d4523f5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,19 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
22 22
23#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED 23#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
24 24
25struct ucounts;
26
27enum ucount_type {
28 UCOUNT_USER_NAMESPACES,
29 UCOUNT_PID_NAMESPACES,
30 UCOUNT_UTS_NAMESPACES,
31 UCOUNT_IPC_NAMESPACES,
32 UCOUNT_NET_NAMESPACES,
33 UCOUNT_MNT_NAMESPACES,
34 UCOUNT_CGROUP_NAMESPACES,
35 UCOUNT_COUNTS,
36};
37
25struct user_namespace { 38struct user_namespace {
26 struct uid_gid_map uid_map; 39 struct uid_gid_map uid_map;
27 struct uid_gid_map gid_map; 40 struct uid_gid_map gid_map;
@@ -39,10 +52,30 @@ struct user_namespace {
39 struct key *persistent_keyring_register; 52 struct key *persistent_keyring_register;
40 struct rw_semaphore persistent_keyring_register_sem; 53 struct rw_semaphore persistent_keyring_register_sem;
41#endif 54#endif
55 struct work_struct work;
56#ifdef CONFIG_SYSCTL
57 struct ctl_table_set set;
58 struct ctl_table_header *sysctls;
59#endif
60 struct ucounts *ucounts;
61 int ucount_max[UCOUNT_COUNTS];
62};
63
64struct ucounts {
65 struct hlist_node node;
66 struct user_namespace *ns;
67 kuid_t uid;
68 atomic_t count;
69 atomic_t ucount[UCOUNT_COUNTS];
42}; 70};
43 71
44extern struct user_namespace init_user_ns; 72extern struct user_namespace init_user_ns;
45 73
74bool setup_userns_sysctls(struct user_namespace *ns);
75void retire_userns_sysctls(struct user_namespace *ns);
76struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
77void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
78
46#ifdef CONFIG_USER_NS 79#ifdef CONFIG_USER_NS
47 80
48static inline struct user_namespace *get_user_ns(struct user_namespace *ns) 81static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -54,12 +87,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
54 87
55extern int create_user_ns(struct cred *new); 88extern int create_user_ns(struct cred *new);
56extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); 89extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
57extern void free_user_ns(struct user_namespace *ns); 90extern void __put_user_ns(struct user_namespace *ns);
58 91
59static inline void put_user_ns(struct user_namespace *ns) 92static inline void put_user_ns(struct user_namespace *ns)
60{ 93{
61 if (ns && atomic_dec_and_test(&ns->count)) 94 if (ns && atomic_dec_and_test(&ns->count))
62 free_user_ns(ns); 95 __put_user_ns(ns);
63} 96}
64 97
65struct seq_operations; 98struct seq_operations;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 5093f58ae192..60f0bb83b313 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -24,6 +24,7 @@ struct uts_namespace {
24 struct kref kref; 24 struct kref kref;
25 struct new_utsname name; 25 struct new_utsname name;
26 struct user_namespace *user_ns; 26 struct user_namespace *user_ns;
27 struct ucounts *ucounts;
27 struct ns_common ns; 28 struct ns_common ns;
28}; 29};
29extern struct uts_namespace init_uts_ns; 30extern struct uts_namespace init_uts_ns;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0933c7455a30..fc4f757107df 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
60 struct list_head exit_list; /* Use only net_mutex */ 60 struct list_head exit_list; /* Use only net_mutex */
61 61
62 struct user_namespace *user_ns; /* Owning user namespace */ 62 struct user_namespace *user_ns; /* Owning user namespace */
63 struct ucounts *ucounts;
63 spinlock_t nsid_lock; 64 spinlock_t nsid_lock;
64 struct idr netns_ids; 65 struct idr netns_ids;
65 66
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 578d93be619d..0abdea496493 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,39 +16,61 @@
16 16
17#include "util.h" 17#include "util.h"
18 18
19static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
20{
21 return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
22}
23
24static void dec_ipc_namespaces(struct ucounts *ucounts)
25{
26 dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES);
27}
28
19static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, 29static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
20 struct ipc_namespace *old_ns) 30 struct ipc_namespace *old_ns)
21{ 31{
22 struct ipc_namespace *ns; 32 struct ipc_namespace *ns;
33 struct ucounts *ucounts;
23 int err; 34 int err;
24 35
36 err = -ENOSPC;
37 ucounts = inc_ipc_namespaces(user_ns);
38 if (!ucounts)
39 goto fail;
40
41 err = -ENOMEM;
25 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); 42 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
26 if (ns == NULL) 43 if (ns == NULL)
27 return ERR_PTR(-ENOMEM); 44 goto fail_dec;
28 45
29 err = ns_alloc_inum(&ns->ns); 46 err = ns_alloc_inum(&ns->ns);
30 if (err) { 47 if (err)
31 kfree(ns); 48 goto fail_free;
32 return ERR_PTR(err);
33 }
34 ns->ns.ops = &ipcns_operations; 49 ns->ns.ops = &ipcns_operations;
35 50
36 atomic_set(&ns->count, 1); 51 atomic_set(&ns->count, 1);
37 ns->user_ns = get_user_ns(user_ns); 52 ns->user_ns = get_user_ns(user_ns);
53 ns->ucounts = ucounts;
38 54
39 err = mq_init_ns(ns); 55 err = mq_init_ns(ns);
40 if (err) { 56 if (err)
41 put_user_ns(ns->user_ns); 57 goto fail_put;
42 ns_free_inum(&ns->ns);
43 kfree(ns);
44 return ERR_PTR(err);
45 }
46 58
47 sem_init_ns(ns); 59 sem_init_ns(ns);
48 msg_init_ns(ns); 60 msg_init_ns(ns);
49 shm_init_ns(ns); 61 shm_init_ns(ns);
50 62
51 return ns; 63 return ns;
64
65fail_put:
66 put_user_ns(ns->user_ns);
67 ns_free_inum(&ns->ns);
68fail_free:
69 kfree(ns);
70fail_dec:
71 dec_ipc_namespaces(ucounts);
72fail:
73 return ERR_PTR(err);
52} 74}
53 75
54struct ipc_namespace *copy_ipcs(unsigned long flags, 76struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -96,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
96 msg_exit_ns(ns); 118 msg_exit_ns(ns);
97 shm_exit_ns(ns); 119 shm_exit_ns(ns);
98 120
121 dec_ipc_namespaces(ns->ucounts);
99 put_user_ns(ns->user_ns); 122 put_user_ns(ns->user_ns);
100 ns_free_inum(&ns->ns); 123 ns_free_inum(&ns->ns);
101 kfree(ns); 124 kfree(ns);
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..eb26e12c6c2a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
9 extable.o params.o \ 9 extable.o params.o \
10 kthread.o sys_ni.o nsproxy.o \ 10 kthread.o sys_ni.o nsproxy.o \
11 notifier.o ksysfs.o cred.o reboot.o \ 11 notifier.o ksysfs.o cred.o reboot.o \
12 async.o range.o smpboot.o 12 async.o range.o smpboot.o ucount.o
13 13
14obj-$(CONFIG_MULTIUSER) += groups.o 14obj-$(CONFIG_MULTIUSER) += groups.o
15 15
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86b0e8b16426..d6504338e284 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6295,6 +6295,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
6295 6295
6296/* cgroup namespaces */ 6296/* cgroup namespaces */
6297 6297
6298static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
6299{
6300 return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
6301}
6302
6303static void dec_cgroup_namespaces(struct ucounts *ucounts)
6304{
6305 dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
6306}
6307
6298static struct cgroup_namespace *alloc_cgroup_ns(void) 6308static struct cgroup_namespace *alloc_cgroup_ns(void)
6299{ 6309{
6300 struct cgroup_namespace *new_ns; 6310 struct cgroup_namespace *new_ns;
@@ -6316,6 +6326,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
6316void free_cgroup_ns(struct cgroup_namespace *ns) 6326void free_cgroup_ns(struct cgroup_namespace *ns)
6317{ 6327{
6318 put_css_set(ns->root_cset); 6328 put_css_set(ns->root_cset);
6329 dec_cgroup_namespaces(ns->ucounts);
6319 put_user_ns(ns->user_ns); 6330 put_user_ns(ns->user_ns);
6320 ns_free_inum(&ns->ns); 6331 ns_free_inum(&ns->ns);
6321 kfree(ns); 6332 kfree(ns);
@@ -6327,6 +6338,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6327 struct cgroup_namespace *old_ns) 6338 struct cgroup_namespace *old_ns)
6328{ 6339{
6329 struct cgroup_namespace *new_ns; 6340 struct cgroup_namespace *new_ns;
6341 struct ucounts *ucounts;
6330 struct css_set *cset; 6342 struct css_set *cset;
6331 6343
6332 BUG_ON(!old_ns); 6344 BUG_ON(!old_ns);
@@ -6340,6 +6352,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6340 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 6352 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
6341 return ERR_PTR(-EPERM); 6353 return ERR_PTR(-EPERM);
6342 6354
6355 ucounts = inc_cgroup_namespaces(user_ns);
6356 if (!ucounts)
6357 return ERR_PTR(-ENOSPC);
6358
6343 /* It is not safe to take cgroup_mutex here */ 6359 /* It is not safe to take cgroup_mutex here */
6344 spin_lock_irq(&css_set_lock); 6360 spin_lock_irq(&css_set_lock);
6345 cset = task_css_set(current); 6361 cset = task_css_set(current);
@@ -6349,10 +6365,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6349 new_ns = alloc_cgroup_ns(); 6365 new_ns = alloc_cgroup_ns();
6350 if (IS_ERR(new_ns)) { 6366 if (IS_ERR(new_ns)) {
6351 put_css_set(cset); 6367 put_css_set(cset);
6368 dec_cgroup_namespaces(ucounts);
6352 return new_ns; 6369 return new_ns;
6353 } 6370 }
6354 6371
6355 new_ns->user_ns = get_user_ns(user_ns); 6372 new_ns->user_ns = get_user_ns(user_ns);
6373 new_ns->ucounts = ucounts;
6356 new_ns->root_cset = cset; 6374 new_ns->root_cset = cset;
6357 6375
6358 return new_ns; 6376 return new_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4a866..3cb4853a59aa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -302,6 +302,7 @@ int arch_task_struct_size __read_mostly;
302 302
303void __init fork_init(void) 303void __init fork_init(void)
304{ 304{
305 int i;
305#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 306#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
306#ifndef ARCH_MIN_TASKALIGN 307#ifndef ARCH_MIN_TASKALIGN
307#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 308#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
@@ -321,6 +322,10 @@ void __init fork_init(void)
321 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 322 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
322 init_task.signal->rlim[RLIMIT_SIGPENDING] = 323 init_task.signal->rlim[RLIMIT_SIGPENDING] =
323 init_task.signal->rlim[RLIMIT_NPROC]; 324 init_task.signal->rlim[RLIMIT_NPROC];
325
326 for (i = 0; i < UCOUNT_COUNTS; i++) {
327 init_user_ns.ucount_max[i] = max_threads/2;
328 }
324} 329}
325 330
326int __weak arch_dup_task_struct(struct task_struct *dst, 331int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 4fa2d56a936c..df9e8e9e0be7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -79,23 +79,36 @@ static void proc_cleanup_work(struct work_struct *work)
79/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ 79/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
80#define MAX_PID_NS_LEVEL 32 80#define MAX_PID_NS_LEVEL 32
81 81
82static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
83{
84 return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
85}
86
87static void dec_pid_namespaces(struct ucounts *ucounts)
88{
89 dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
90}
91
82static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, 92static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
83 struct pid_namespace *parent_pid_ns) 93 struct pid_namespace *parent_pid_ns)
84{ 94{
85 struct pid_namespace *ns; 95 struct pid_namespace *ns;
86 unsigned int level = parent_pid_ns->level + 1; 96 unsigned int level = parent_pid_ns->level + 1;
97 struct ucounts *ucounts;
87 int i; 98 int i;
88 int err; 99 int err;
89 100
90 if (level > MAX_PID_NS_LEVEL) { 101 err = -ENOSPC;
91 err = -EINVAL; 102 if (level > MAX_PID_NS_LEVEL)
103 goto out;
104 ucounts = inc_pid_namespaces(user_ns);
105 if (!ucounts)
92 goto out; 106 goto out;
93 }
94 107
95 err = -ENOMEM; 108 err = -ENOMEM;
96 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 109 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
97 if (ns == NULL) 110 if (ns == NULL)
98 goto out; 111 goto out_dec;
99 112
100 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); 113 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
101 if (!ns->pidmap[0].page) 114 if (!ns->pidmap[0].page)
@@ -114,6 +127,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
114 ns->level = level; 127 ns->level = level;
115 ns->parent = get_pid_ns(parent_pid_ns); 128 ns->parent = get_pid_ns(parent_pid_ns);
116 ns->user_ns = get_user_ns(user_ns); 129 ns->user_ns = get_user_ns(user_ns);
130 ns->ucounts = ucounts;
117 ns->nr_hashed = PIDNS_HASH_ADDING; 131 ns->nr_hashed = PIDNS_HASH_ADDING;
118 INIT_WORK(&ns->proc_work, proc_cleanup_work); 132 INIT_WORK(&ns->proc_work, proc_cleanup_work);
119 133
@@ -129,6 +143,8 @@ out_free_map:
129 kfree(ns->pidmap[0].page); 143 kfree(ns->pidmap[0].page);
130out_free: 144out_free:
131 kmem_cache_free(pid_ns_cachep, ns); 145 kmem_cache_free(pid_ns_cachep, ns);
146out_dec:
147 dec_pid_namespaces(ucounts);
132out: 148out:
133 return ERR_PTR(err); 149 return ERR_PTR(err);
134} 150}
@@ -146,6 +162,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
146 ns_free_inum(&ns->ns); 162 ns_free_inum(&ns->ns);
147 for (i = 0; i < PIDMAP_ENTRIES; i++) 163 for (i = 0; i < PIDMAP_ENTRIES; i++)
148 kfree(ns->pidmap[i].page); 164 kfree(ns->pidmap[i].page);
165 dec_pid_namespaces(ns->ucounts);
149 put_user_ns(ns->user_ns); 166 put_user_ns(ns->user_ns);
150 call_rcu(&ns->rcu, delayed_free_pidns); 167 call_rcu(&ns->rcu, delayed_free_pidns);
151} 168}
diff --git a/kernel/ucount.c b/kernel/ucount.c
new file mode 100644
index 000000000000..9d20d5dd298a
--- /dev/null
+++ b/kernel/ucount.c
@@ -0,0 +1,235 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8#include <linux/stat.h>
9#include <linux/sysctl.h>
10#include <linux/slab.h>
11#include <linux/hash.h>
12#include <linux/user_namespace.h>
13
14#define UCOUNTS_HASHTABLE_BITS 10
15static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
16static DEFINE_SPINLOCK(ucounts_lock);
17
18#define ucounts_hashfn(ns, uid) \
19 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
20 UCOUNTS_HASHTABLE_BITS)
21#define ucounts_hashentry(ns, uid) \
22 (ucounts_hashtable + ucounts_hashfn(ns, uid))
23
24
25#ifdef CONFIG_SYSCTL
26static struct ctl_table_set *
27set_lookup(struct ctl_table_root *root)
28{
29 return &current_user_ns()->set;
30}
31
32static int set_is_seen(struct ctl_table_set *set)
33{
34 return &current_user_ns()->set == set;
35}
36
37static int set_permissions(struct ctl_table_header *head,
38 struct ctl_table *table)
39{
40 struct user_namespace *user_ns =
41 container_of(head->set, struct user_namespace, set);
42 int mode;
43
44 /* Allow users with CAP_SYS_RESOURCE unrestrained access */
45 if (ns_capable(user_ns, CAP_SYS_RESOURCE))
46 mode = (table->mode & S_IRWXU) >> 6;
47 else
48 /* Allow all others at most read-only access */
49 mode = table->mode & S_IROTH;
50 return (mode << 6) | (mode << 3) | mode;
51}
52
53static struct ctl_table_root set_root = {
54 .lookup = set_lookup,
55 .permissions = set_permissions,
56};
57
58static int zero = 0;
59static int int_max = INT_MAX;
60#define UCOUNT_ENTRY(name) \
61 { \
62 .procname = name, \
63 .maxlen = sizeof(int), \
64 .mode = 0644, \
65 .proc_handler = proc_dointvec_minmax, \
66 .extra1 = &zero, \
67 .extra2 = &int_max, \
68 }
69static struct ctl_table user_table[] = {
70 UCOUNT_ENTRY("max_user_namespaces"),
71 UCOUNT_ENTRY("max_pid_namespaces"),
72 UCOUNT_ENTRY("max_uts_namespaces"),
73 UCOUNT_ENTRY("max_ipc_namespaces"),
74 UCOUNT_ENTRY("max_net_namespaces"),
75 UCOUNT_ENTRY("max_mnt_namespaces"),
76 UCOUNT_ENTRY("max_cgroup_namespaces"),
77 { }
78};
79#endif /* CONFIG_SYSCTL */
80
81bool setup_userns_sysctls(struct user_namespace *ns)
82{
83#ifdef CONFIG_SYSCTL
84 struct ctl_table *tbl;
85 setup_sysctl_set(&ns->set, &set_root, set_is_seen);
86 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
87 if (tbl) {
88 int i;
89 for (i = 0; i < UCOUNT_COUNTS; i++) {
90 tbl[i].data = &ns->ucount_max[i];
91 }
92 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
93 }
94 if (!ns->sysctls) {
95 kfree(tbl);
96 retire_sysctl_set(&ns->set);
97 return false;
98 }
99#endif
100 return true;
101}
102
103void retire_userns_sysctls(struct user_namespace *ns)
104{
105#ifdef CONFIG_SYSCTL
106 struct ctl_table *tbl;
107
108 tbl = ns->sysctls->ctl_table_arg;
109 unregister_sysctl_table(ns->sysctls);
110 retire_sysctl_set(&ns->set);
111 kfree(tbl);
112#endif
113}
114
115static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
116{
117 struct ucounts *ucounts;
118
119 hlist_for_each_entry(ucounts, hashent, node) {
120 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
121 return ucounts;
122 }
123 return NULL;
124}
125
126static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
127{
128 struct hlist_head *hashent = ucounts_hashentry(ns, uid);
129 struct ucounts *ucounts, *new;
130
131 spin_lock(&ucounts_lock);
132 ucounts = find_ucounts(ns, uid, hashent);
133 if (!ucounts) {
134 spin_unlock(&ucounts_lock);
135
136 new = kzalloc(sizeof(*new), GFP_KERNEL);
137 if (!new)
138 return NULL;
139
140 new->ns = ns;
141 new->uid = uid;
142 atomic_set(&new->count, 0);
143
144 spin_lock(&ucounts_lock);
145 ucounts = find_ucounts(ns, uid, hashent);
146 if (ucounts) {
147 kfree(new);
148 } else {
149 hlist_add_head(&new->node, hashent);
150 ucounts = new;
151 }
152 }
153 if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
154 ucounts = NULL;
155 spin_unlock(&ucounts_lock);
156 return ucounts;
157}
158
159static void put_ucounts(struct ucounts *ucounts)
160{
161 if (atomic_dec_and_test(&ucounts->count)) {
162 spin_lock(&ucounts_lock);
163 hlist_del_init(&ucounts->node);
164 spin_unlock(&ucounts_lock);
165
166 kfree(ucounts);
167 }
168}
169
170static inline bool atomic_inc_below(atomic_t *v, int u)
171{
172 int c, old;
173 c = atomic_read(v);
174 for (;;) {
175 if (unlikely(c >= u))
176 return false;
177 old = atomic_cmpxchg(v, c, c+1);
178 if (likely(old == c))
179 return true;
180 c = old;
181 }
182}
183
184struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
185 enum ucount_type type)
186{
187 struct ucounts *ucounts, *iter, *bad;
188 struct user_namespace *tns;
189 ucounts = get_ucounts(ns, uid);
190 for (iter = ucounts; iter; iter = tns->ucounts) {
191 int max;
192 tns = iter->ns;
193 max = READ_ONCE(tns->ucount_max[type]);
194 if (!atomic_inc_below(&iter->ucount[type], max))
195 goto fail;
196 }
197 return ucounts;
198fail:
199 bad = iter;
200 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
201 atomic_dec(&iter->ucount[type]);
202
203 put_ucounts(ucounts);
204 return NULL;
205}
206
207void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
208{
209 struct ucounts *iter;
210 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
211 int dec = atomic_dec_if_positive(&iter->ucount[type]);
212 WARN_ON_ONCE(dec < 0);
213 }
214 put_ucounts(ucounts);
215}
216
217static __init int user_namespace_sysctl_init(void)
218{
219#ifdef CONFIG_SYSCTL
220 static struct ctl_table_header *user_header;
221 static struct ctl_table empty[1];
222 /*
223 * It is necessary to register the user directory in the
224 * default set so that registrations in the child sets work
225 * properly.
226 */
227 user_header = register_sysctl("user", empty);
228 BUG_ON(!user_header);
229 BUG_ON(!setup_userns_sysctls(&init_user_ns));
230#endif
231 return 0;
232}
233subsys_initcall(user_namespace_sysctl_init);
234
235
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a58a219b99c6..86b7854fec8e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -29,6 +29,17 @@ static DEFINE_MUTEX(userns_state_mutex);
29static bool new_idmap_permitted(const struct file *file, 29static bool new_idmap_permitted(const struct file *file,
30 struct user_namespace *ns, int cap_setid, 30 struct user_namespace *ns, int cap_setid,
31 struct uid_gid_map *map); 31 struct uid_gid_map *map);
32static void free_user_ns(struct work_struct *work);
33
34static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
35{
36 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
37}
38
39static void dec_user_namespaces(struct ucounts *ucounts)
40{
41 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
42}
32 43
33static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) 44static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
34{ 45{
@@ -62,10 +73,16 @@ int create_user_ns(struct cred *new)
62 struct user_namespace *ns, *parent_ns = new->user_ns; 73 struct user_namespace *ns, *parent_ns = new->user_ns;
63 kuid_t owner = new->euid; 74 kuid_t owner = new->euid;
64 kgid_t group = new->egid; 75 kgid_t group = new->egid;
65 int ret; 76 struct ucounts *ucounts;
77 int ret, i;
66 78
79 ret = -ENOSPC;
67 if (parent_ns->level > 32) 80 if (parent_ns->level > 32)
68 return -EUSERS; 81 goto fail;
82
83 ucounts = inc_user_namespaces(parent_ns, owner);
84 if (!ucounts)
85 goto fail;
69 86
70 /* 87 /*
71 * Verify that we can not violate the policy of which files 88 * Verify that we can not violate the policy of which files
@@ -73,26 +90,27 @@ int create_user_ns(struct cred *new)
73 * by verifing that the root directory is at the root of the 90 * by verifing that the root directory is at the root of the
74 * mount namespace which allows all files to be accessed. 91 * mount namespace which allows all files to be accessed.
75 */ 92 */
93 ret = -EPERM;
76 if (current_chrooted()) 94 if (current_chrooted())
77 return -EPERM; 95 goto fail_dec;
78 96
79 /* The creator needs a mapping in the parent user namespace 97 /* The creator needs a mapping in the parent user namespace
80 * or else we won't be able to reasonably tell userspace who 98 * or else we won't be able to reasonably tell userspace who
81 * created a user_namespace. 99 * created a user_namespace.
82 */ 100 */
101 ret = -EPERM;
83 if (!kuid_has_mapping(parent_ns, owner) || 102 if (!kuid_has_mapping(parent_ns, owner) ||
84 !kgid_has_mapping(parent_ns, group)) 103 !kgid_has_mapping(parent_ns, group))
85 return -EPERM; 104 goto fail_dec;
86 105
106 ret = -ENOMEM;
87 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); 107 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
88 if (!ns) 108 if (!ns)
89 return -ENOMEM; 109 goto fail_dec;
90 110
91 ret = ns_alloc_inum(&ns->ns); 111 ret = ns_alloc_inum(&ns->ns);
92 if (ret) { 112 if (ret)
93 kmem_cache_free(user_ns_cachep, ns); 113 goto fail_free;
94 return ret;
95 }
96 ns->ns.ops = &userns_operations; 114 ns->ns.ops = &userns_operations;
97 115
98 atomic_set(&ns->count, 1); 116 atomic_set(&ns->count, 1);
@@ -101,18 +119,37 @@ int create_user_ns(struct cred *new)
101 ns->level = parent_ns->level + 1; 119 ns->level = parent_ns->level + 1;
102 ns->owner = owner; 120 ns->owner = owner;
103 ns->group = group; 121 ns->group = group;
122 INIT_WORK(&ns->work, free_user_ns);
123 for (i = 0; i < UCOUNT_COUNTS; i++) {
124 ns->ucount_max[i] = INT_MAX;
125 }
126 ns->ucounts = ucounts;
104 127
105 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ 128 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
106 mutex_lock(&userns_state_mutex); 129 mutex_lock(&userns_state_mutex);
107 ns->flags = parent_ns->flags; 130 ns->flags = parent_ns->flags;
108 mutex_unlock(&userns_state_mutex); 131 mutex_unlock(&userns_state_mutex);
109 132
110 set_cred_user_ns(new, ns);
111
112#ifdef CONFIG_PERSISTENT_KEYRINGS 133#ifdef CONFIG_PERSISTENT_KEYRINGS
113 init_rwsem(&ns->persistent_keyring_register_sem); 134 init_rwsem(&ns->persistent_keyring_register_sem);
114#endif 135#endif
136 ret = -ENOMEM;
137 if (!setup_userns_sysctls(ns))
138 goto fail_keyring;
139
140 set_cred_user_ns(new, ns);
115 return 0; 141 return 0;
142fail_keyring:
143#ifdef CONFIG_PERSISTENT_KEYRINGS
144 key_put(ns->persistent_keyring_register);
145#endif
146 ns_free_inum(&ns->ns);
147fail_free:
148 kmem_cache_free(user_ns_cachep, ns);
149fail_dec:
150 dec_user_namespaces(ucounts);
151fail:
152 return ret;
116} 153}
117 154
118int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) 155int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -135,21 +172,30 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
135 return err; 172 return err;
136} 173}
137 174
138void free_user_ns(struct user_namespace *ns) 175static void free_user_ns(struct work_struct *work)
139{ 176{
140 struct user_namespace *parent; 177 struct user_namespace *parent, *ns =
178 container_of(work, struct user_namespace, work);
141 179
142 do { 180 do {
181 struct ucounts *ucounts = ns->ucounts;
143 parent = ns->parent; 182 parent = ns->parent;
183 retire_userns_sysctls(ns);
144#ifdef CONFIG_PERSISTENT_KEYRINGS 184#ifdef CONFIG_PERSISTENT_KEYRINGS
145 key_put(ns->persistent_keyring_register); 185 key_put(ns->persistent_keyring_register);
146#endif 186#endif
147 ns_free_inum(&ns->ns); 187 ns_free_inum(&ns->ns);
148 kmem_cache_free(user_ns_cachep, ns); 188 kmem_cache_free(user_ns_cachep, ns);
189 dec_user_namespaces(ucounts);
149 ns = parent; 190 ns = parent;
150 } while (atomic_dec_and_test(&parent->count)); 191 } while (atomic_dec_and_test(&parent->count));
151} 192}
152EXPORT_SYMBOL(free_user_ns); 193
194void __put_user_ns(struct user_namespace *ns)
195{
196 schedule_work(&ns->work);
197}
198EXPORT_SYMBOL(__put_user_ns);
153 199
154static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) 200static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
155{ 201{
diff --git a/kernel/utsname.c b/kernel/utsname.c
index e1211a8a5c18..6976cd47dcf6 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,16 @@
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/proc_ns.h> 18#include <linux/proc_ns.h>
19 19
20static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
21{
22 return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES);
23}
24
25static void dec_uts_namespaces(struct ucounts *ucounts)
26{
27 dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES);
28}
29
20static struct uts_namespace *create_uts_ns(void) 30static struct uts_namespace *create_uts_ns(void)
21{ 31{
22 struct uts_namespace *uts_ns; 32 struct uts_namespace *uts_ns;
@@ -36,18 +46,24 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
36 struct uts_namespace *old_ns) 46 struct uts_namespace *old_ns)
37{ 47{
38 struct uts_namespace *ns; 48 struct uts_namespace *ns;
49 struct ucounts *ucounts;
39 int err; 50 int err;
40 51
52 err = -ENOSPC;
53 ucounts = inc_uts_namespaces(user_ns);
54 if (!ucounts)
55 goto fail;
56
57 err = -ENOMEM;
41 ns = create_uts_ns(); 58 ns = create_uts_ns();
42 if (!ns) 59 if (!ns)
43 return ERR_PTR(-ENOMEM); 60 goto fail_dec;
44 61
45 err = ns_alloc_inum(&ns->ns); 62 err = ns_alloc_inum(&ns->ns);
46 if (err) { 63 if (err)
47 kfree(ns); 64 goto fail_free;
48 return ERR_PTR(err);
49 }
50 65
66 ns->ucounts = ucounts;
51 ns->ns.ops = &utsns_operations; 67 ns->ns.ops = &utsns_operations;
52 68
53 down_read(&uts_sem); 69 down_read(&uts_sem);
@@ -55,6 +71,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
55 ns->user_ns = get_user_ns(user_ns); 71 ns->user_ns = get_user_ns(user_ns);
56 up_read(&uts_sem); 72 up_read(&uts_sem);
57 return ns; 73 return ns;
74
75fail_free:
76 kfree(ns);
77fail_dec:
78 dec_uts_namespaces(ucounts);
79fail:
80 return ERR_PTR(err);
58} 81}
59 82
60/* 83/*
@@ -85,6 +108,7 @@ void free_uts_ns(struct kref *kref)
85 struct uts_namespace *ns; 108 struct uts_namespace *ns;
86 109
87 ns = container_of(kref, struct uts_namespace, kref); 110 ns = container_of(kref, struct uts_namespace, kref);
111 dec_uts_namespaces(ns->ucounts);
88 put_user_ns(ns->user_ns); 112 put_user_ns(ns->user_ns);
89 ns_free_inum(&ns->ns); 113 ns_free_inum(&ns->ns);
90 kfree(ns); 114 kfree(ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 861efa34f08c..e8be581b47b0 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
266 return peer; 266 return peer;
267} 267}
268 268
269static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
270{
271 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
272}
273
274static void dec_net_namespaces(struct ucounts *ucounts)
275{
276 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
277}
278
269/* 279/*
270 * setup_net runs the initializers for the network namespace object. 280 * setup_net runs the initializers for the network namespace object.
271 */ 281 */
@@ -351,19 +361,27 @@ void net_drop_ns(void *p)
351struct net *copy_net_ns(unsigned long flags, 361struct net *copy_net_ns(unsigned long flags,
352 struct user_namespace *user_ns, struct net *old_net) 362 struct user_namespace *user_ns, struct net *old_net)
353{ 363{
364 struct ucounts *ucounts;
354 struct net *net; 365 struct net *net;
355 int rv; 366 int rv;
356 367
357 if (!(flags & CLONE_NEWNET)) 368 if (!(flags & CLONE_NEWNET))
358 return get_net(old_net); 369 return get_net(old_net);
359 370
371 ucounts = inc_net_namespaces(user_ns);
372 if (!ucounts)
373 return ERR_PTR(-ENOSPC);
374
360 net = net_alloc(); 375 net = net_alloc();
361 if (!net) 376 if (!net) {
377 dec_net_namespaces(ucounts);
362 return ERR_PTR(-ENOMEM); 378 return ERR_PTR(-ENOMEM);
379 }
363 380
364 get_user_ns(user_ns); 381 get_user_ns(user_ns);
365 382
366 mutex_lock(&net_mutex); 383 mutex_lock(&net_mutex);
384 net->ucounts = ucounts;
367 rv = setup_net(net, user_ns); 385 rv = setup_net(net, user_ns);
368 if (rv == 0) { 386 if (rv == 0) {
369 rtnl_lock(); 387 rtnl_lock();
@@ -372,6 +390,7 @@ struct net *copy_net_ns(unsigned long flags,
372 } 390 }
373 mutex_unlock(&net_mutex); 391 mutex_unlock(&net_mutex);
374 if (rv < 0) { 392 if (rv < 0) {
393 dec_net_namespaces(ucounts);
375 put_user_ns(user_ns); 394 put_user_ns(user_ns);
376 net_drop_ns(net); 395 net_drop_ns(net);
377 return ERR_PTR(rv); 396 return ERR_PTR(rv);
@@ -444,6 +463,7 @@ static void cleanup_net(struct work_struct *work)
444 /* Finally it is safe to free my network namespace structure */ 463 /* Finally it is safe to free my network namespace structure */
445 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 464 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
446 list_del_init(&net->exit_list); 465 list_del_init(&net->exit_list);
466 dec_net_namespaces(net->ucounts);
447 put_user_ns(net->user_ns); 467 put_user_ns(net->user_ns);
448 net_drop_ns(net); 468 net_drop_ns(net);
449 } 469 }
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 46a71c701e7c..ba9b5d1a31df 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
27#endif 27#endif
28 28
29static struct ctl_table_set * 29static struct ctl_table_set *
30net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) 30net_ctl_header_lookup(struct ctl_table_root *root)
31{ 31{
32 return &namespaces->net_ns->sysctls; 32 return &current->nsproxy->net_ns->sysctls;
33} 33}
34 34
35static int is_seen(struct ctl_table_set *set) 35static int is_seen(struct ctl_table_set *set)