summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/README1
-rw-r--r--Documentation/sysctl/fs.txt7
-rw-r--r--Documentation/sysctl/user.txt66
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c77
-rw-r--r--fs/nsfs.c105
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/proc_sysctl.c14
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/ipc_namespace.h1
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/pid_namespace.h1
-rw-r--r--include/linux/proc_ns.h2
-rw-r--r--include/linux/sysctl.h3
-rw-r--r--include/linux/user_namespace.h44
-rw-r--r--include/linux/utsname.h1
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/uapi/linux/nsfs.h13
-rw-r--r--ipc/namespace.c51
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c24
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/pid_namespace.c50
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/ucount.c235
-rw-r--r--kernel/user_namespace.c99
-rw-r--r--kernel/utsname.c40
-rw-r--r--net/core/net_namespace.c28
-rw-r--r--net/sysctl_net.c4
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/nsfs/Makefile12
-rw-r--r--tools/testing/selftests/nsfs/owner.c91
-rw-r--r--tools/testing/selftests/nsfs/pidns.c78
35 files changed, 1007 insertions, 71 deletions
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README
index 8c3306e01d52..91f54ffa0077 100644
--- a/Documentation/sysctl/README
+++ b/Documentation/sysctl/README
@@ -69,6 +69,7 @@ proc/ <empty>
69sunrpc/ SUN Remote Procedure Call (NFS) 69sunrpc/ SUN Remote Procedure Call (NFS)
70vm/ memory management tuning 70vm/ memory management tuning
71 buffer and cache management 71 buffer and cache management
72user/ Per user per user namespace limits
72 73
73These are the subdirs I have on my system. There might be more 74These are the subdirs I have on my system. There might be more
74or other subdirs in another setup. If you see another dir, I'd 75or other subdirs in another setup. If you see another dir, I'd
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 302b5ed616a6..35e17f748ca7 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -265,6 +265,13 @@ aio-nr can grow to.
265 265
266============================================================== 266==============================================================
267 267
268mount-max:
269
270This denotes the maximum number of mounts that may exist
271in a mount namespace.
272
273==============================================================
274
268 275
2692. /proc/sys/fs/binfmt_misc 2762. /proc/sys/fs/binfmt_misc
270---------------------------------------------------------- 277----------------------------------------------------------
diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt
new file mode 100644
index 000000000000..1291c498f78f
--- /dev/null
+++ b/Documentation/sysctl/user.txt
@@ -0,0 +1,66 @@
1Documentation for /proc/sys/user/* kernel version 4.9.0
2 (c) 2016 Eric Biederman <ebiederm@xmission.com>
3
4==============================================================
5
6This file contains the documetation for the sysctl files in
7/proc/sys/user.
8
9The files in this directory can be used to override the default
10limits on the number of namespaces and other objects that have
11per user per user namespace limits.
12
13The primary purpose of these limits is to stop programs that
14malfunction and attempt to create a ridiculous number of objects,
15before the malfunction becomes a system wide problem. It is the
16intention that the defaults of these limits are set high enough that
17no program in normal operation should run into these limits.
18
19The creation of per user per user namespace objects are charged to
20the user in the user namespace who created the object and
21verified to be below the per user limit in that user namespace.
22
23The creation of objects is also charged to all of the users
24who created user namespaces the creation of the object happens
25in (user namespaces can be nested) and verified to be below the per user
26limits in the user namespaces of those users.
27
28This recursive counting of created objects ensures that creating a
29user namespace does not allow a user to escape their current limits.
30
31Currently, these files are in /proc/sys/user:
32
33- max_cgroup_namespaces
34
35 The maximum number of cgroup namespaces that any user in the current
36 user namespace may create.
37
38- max_ipc_namespaces
39
40 The maximum number of ipc namespaces that any user in the current
41 user namespace may create.
42
43- max_mnt_namespaces
44
45 The maximum number of mount namespaces that any user in the current
46 user namespace may create.
47
48- max_net_namespaces
49
50 The maximum number of network namespaces that any user in the
51 current user namespace may create.
52
53- max_pid_namespaces
54
55 The maximum number of pid namespaces that any user in the current
56 user namespace may create.
57
58- max_user_namespaces
59
60 The maximum number of user namespaces that any user in the current
61 user namespace may create.
62
63- max_uts_namespaces
64
65 The maximum number of user namespaces that any user in the current
66 user namespace may create.
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 431fd7ee3488..e44271dfceb6 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi,
431 memcpy(&wq->name, &qstr, sizeof(struct qstr)); 431 memcpy(&wq->name, &qstr, sizeof(struct qstr));
432 wq->dev = autofs4_get_dev(sbi); 432 wq->dev = autofs4_get_dev(sbi);
433 wq->ino = autofs4_get_ino(sbi); 433 wq->ino = autofs4_get_ino(sbi);
434 wq->uid = current_uid(); 434 wq->uid = current_real_cred()->uid;
435 wq->gid = current_gid(); 435 wq->gid = current_real_cred()->gid;
436 wq->pid = pid; 436 wq->pid = pid;
437 wq->tgid = tgid; 437 wq->tgid = tgid;
438 wq->status = -EINTR; /* Status return if interrupted */ 438 wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..d2e25d7b64b3 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,9 +10,12 @@ struct mnt_namespace {
10 struct mount * root; 10 struct mount * root;
11 struct list_head list; 11 struct list_head list;
12 struct user_namespace *user_ns; 12 struct user_namespace *user_ns;
13 struct ucounts *ucounts;
13 u64 seq; /* Sequence number to prevent loops */ 14 u64 seq; /* Sequence number to prevent loops */
14 wait_queue_head_t poll; 15 wait_queue_head_t poll;
15 u64 event; 16 u64 event;
17 unsigned int mounts; /* # of mounts in the namespace */
18 unsigned int pending_mounts;
16}; 19};
17 20
18struct mnt_pcp { 21struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..db1b5a38864e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
27#include "pnode.h" 27#include "pnode.h"
28#include "internal.h" 28#include "internal.h"
29 29
30/* Maximum number of mounts in a mount namespace */
31unsigned int sysctl_mount_max __read_mostly = 100000;
32
30static unsigned int m_hash_mask __read_mostly; 33static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly; 34static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly; 35static unsigned int mp_hash_mask __read_mostly;
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
899 902
900 list_splice(&head, n->list.prev); 903 list_splice(&head, n->list.prev);
901 904
905 n->mounts += n->pending_mounts;
906 n->pending_mounts = 0;
907
902 attach_shadowed(mnt, parent, shadows); 908 attach_shadowed(mnt, parent, shadows);
903 touch_mnt_namespace(n); 909 touch_mnt_namespace(n);
904} 910}
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1419 propagate_umount(&tmp_list); 1425 propagate_umount(&tmp_list);
1420 1426
1421 while (!list_empty(&tmp_list)) { 1427 while (!list_empty(&tmp_list)) {
1428 struct mnt_namespace *ns;
1422 bool disconnect; 1429 bool disconnect;
1423 p = list_first_entry(&tmp_list, struct mount, mnt_list); 1430 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1424 list_del_init(&p->mnt_expire); 1431 list_del_init(&p->mnt_expire);
1425 list_del_init(&p->mnt_list); 1432 list_del_init(&p->mnt_list);
1426 __touch_mnt_namespace(p->mnt_ns); 1433 ns = p->mnt_ns;
1434 if (ns) {
1435 ns->mounts--;
1436 __touch_mnt_namespace(ns);
1437 }
1427 p->mnt_ns = NULL; 1438 p->mnt_ns = NULL;
1428 if (how & UMOUNT_SYNC) 1439 if (how & UMOUNT_SYNC)
1429 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1440 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1840 return 0; 1851 return 0;
1841} 1852}
1842 1853
1854int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1855{
1856 unsigned int max = READ_ONCE(sysctl_mount_max);
1857 unsigned int mounts = 0, old, pending, sum;
1858 struct mount *p;
1859
1860 for (p = mnt; p; p = next_mnt(p, mnt))
1861 mounts++;
1862
1863 old = ns->mounts;
1864 pending = ns->pending_mounts;
1865 sum = old + pending;
1866 if ((old > sum) ||
1867 (pending > sum) ||
1868 (max < sum) ||
1869 (mounts > (max - sum)))
1870 return -ENOSPC;
1871
1872 ns->pending_mounts = pending + mounts;
1873 return 0;
1874}
1875
1843/* 1876/*
1844 * @source_mnt : mount tree to be attached 1877 * @source_mnt : mount tree to be attached
1845 * @nd : place the mount tree @source_mnt is attached 1878 * @nd : place the mount tree @source_mnt is attached
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1909 struct path *parent_path) 1942 struct path *parent_path)
1910{ 1943{
1911 HLIST_HEAD(tree_list); 1944 HLIST_HEAD(tree_list);
1945 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1912 struct mount *child, *p; 1946 struct mount *child, *p;
1913 struct hlist_node *n; 1947 struct hlist_node *n;
1914 int err; 1948 int err;
1915 1949
1950 /* Is there space to add these mounts to the mount namespace? */
1951 if (!parent_path) {
1952 err = count_mounts(ns, source_mnt);
1953 if (err)
1954 goto out;
1955 }
1956
1916 if (IS_MNT_SHARED(dest_mnt)) { 1957 if (IS_MNT_SHARED(dest_mnt)) {
1917 err = invent_group_ids(source_mnt, true); 1958 err = invent_group_ids(source_mnt, true);
1918 if (err) 1959 if (err)
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1949 out_cleanup_ids: 1990 out_cleanup_ids:
1950 while (!hlist_empty(&tree_list)) { 1991 while (!hlist_empty(&tree_list)) {
1951 child = hlist_entry(tree_list.first, struct mount, mnt_hash); 1992 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1993 child->mnt_parent->mnt_ns->pending_mounts = 0;
1952 umount_tree(child, UMOUNT_SYNC); 1994 umount_tree(child, UMOUNT_SYNC);
1953 } 1995 }
1954 unlock_mount_hash(); 1996 unlock_mount_hash();
1955 cleanup_group_ids(source_mnt, NULL); 1997 cleanup_group_ids(source_mnt, NULL);
1956 out: 1998 out:
1999 ns->pending_mounts = 0;
1957 return err; 2000 return err;
1958} 2001}
1959 2002
@@ -2719,9 +2762,20 @@ dput_out:
2719 return retval; 2762 return retval;
2720} 2763}
2721 2764
2765static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2766{
2767 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2768}
2769
2770static void dec_mnt_namespaces(struct ucounts *ucounts)
2771{
2772 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2773}
2774
2722static void free_mnt_ns(struct mnt_namespace *ns) 2775static void free_mnt_ns(struct mnt_namespace *ns)
2723{ 2776{
2724 ns_free_inum(&ns->ns); 2777 ns_free_inum(&ns->ns);
2778 dec_mnt_namespaces(ns->ucounts);
2725 put_user_ns(ns->user_ns); 2779 put_user_ns(ns->user_ns);
2726 kfree(ns); 2780 kfree(ns);
2727} 2781}
@@ -2738,14 +2792,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2738static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) 2792static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2739{ 2793{
2740 struct mnt_namespace *new_ns; 2794 struct mnt_namespace *new_ns;
2795 struct ucounts *ucounts;
2741 int ret; 2796 int ret;
2742 2797
2798 ucounts = inc_mnt_namespaces(user_ns);
2799 if (!ucounts)
2800 return ERR_PTR(-ENOSPC);
2801
2743 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2802 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2744 if (!new_ns) 2803 if (!new_ns) {
2804 dec_mnt_namespaces(ucounts);
2745 return ERR_PTR(-ENOMEM); 2805 return ERR_PTR(-ENOMEM);
2806 }
2746 ret = ns_alloc_inum(&new_ns->ns); 2807 ret = ns_alloc_inum(&new_ns->ns);
2747 if (ret) { 2808 if (ret) {
2748 kfree(new_ns); 2809 kfree(new_ns);
2810 dec_mnt_namespaces(ucounts);
2749 return ERR_PTR(ret); 2811 return ERR_PTR(ret);
2750 } 2812 }
2751 new_ns->ns.ops = &mntns_operations; 2813 new_ns->ns.ops = &mntns_operations;
@@ -2756,6 +2818,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2756 init_waitqueue_head(&new_ns->poll); 2818 init_waitqueue_head(&new_ns->poll);
2757 new_ns->event = 0; 2819 new_ns->event = 0;
2758 new_ns->user_ns = get_user_ns(user_ns); 2820 new_ns->user_ns = get_user_ns(user_ns);
2821 new_ns->ucounts = ucounts;
2822 new_ns->mounts = 0;
2823 new_ns->pending_mounts = 0;
2759 return new_ns; 2824 return new_ns;
2760} 2825}
2761 2826
@@ -2805,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2805 q = new; 2870 q = new;
2806 while (p) { 2871 while (p) {
2807 q->mnt_ns = new_ns; 2872 q->mnt_ns = new_ns;
2873 new_ns->mounts++;
2808 if (new_fs) { 2874 if (new_fs) {
2809 if (&p->mnt == new_fs->root.mnt) { 2875 if (&p->mnt == new_fs->root.mnt) {
2810 new_fs->root.mnt = mntget(&q->mnt); 2876 new_fs->root.mnt = mntget(&q->mnt);
@@ -2843,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2843 struct mount *mnt = real_mount(m); 2909 struct mount *mnt = real_mount(m);
2844 mnt->mnt_ns = new_ns; 2910 mnt->mnt_ns = new_ns;
2845 new_ns->root = mnt; 2911 new_ns->root = mnt;
2912 new_ns->mounts++;
2846 list_add(&mnt->mnt_list, &new_ns->list); 2913 list_add(&mnt->mnt_list, &new_ns->list);
2847 } else { 2914 } else {
2848 mntput(m); 2915 mntput(m);
@@ -3348,10 +3415,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3348 return 0; 3415 return 0;
3349} 3416}
3350 3417
3418static struct user_namespace *mntns_owner(struct ns_common *ns)
3419{
3420 return to_mnt_ns(ns)->user_ns;
3421}
3422
3351const struct proc_ns_operations mntns_operations = { 3423const struct proc_ns_operations mntns_operations = {
3352 .name = "mnt", 3424 .name = "mnt",
3353 .type = CLONE_NEWNS, 3425 .type = CLONE_NEWNS,
3354 .get = mntns_get, 3426 .get = mntns_get,
3355 .put = mntns_put, 3427 .put = mntns_put,
3356 .install = mntns_install, 3428 .install = mntns_install,
3429 .owner = mntns_owner,
3357}; 3430};
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d6016e20..30bb10034120 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
5#include <linux/magic.h> 5#include <linux/magic.h>
6#include <linux/ktime.h> 6#include <linux/ktime.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/user_namespace.h>
9#include <linux/nsfs.h>
8 10
9static struct vfsmount *nsfs_mnt; 11static struct vfsmount *nsfs_mnt;
10 12
13static long ns_ioctl(struct file *filp, unsigned int ioctl,
14 unsigned long arg);
11static const struct file_operations ns_file_operations = { 15static const struct file_operations ns_file_operations = {
12 .llseek = no_llseek, 16 .llseek = no_llseek,
17 .unlocked_ioctl = ns_ioctl,
13}; 18};
14 19
15static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 20static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
44 ns->ops->put(ns); 49 ns->ops->put(ns);
45} 50}
46 51
47void *ns_get_path(struct path *path, struct task_struct *task, 52static void *__ns_get_path(struct path *path, struct ns_common *ns)
48 const struct proc_ns_operations *ns_ops)
49{ 53{
50 struct vfsmount *mnt = mntget(nsfs_mnt); 54 struct vfsmount *mnt = nsfs_mnt;
51 struct qstr qname = { .name = "", }; 55 struct qstr qname = { .name = "", };
52 struct dentry *dentry; 56 struct dentry *dentry;
53 struct inode *inode; 57 struct inode *inode;
54 struct ns_common *ns;
55 unsigned long d; 58 unsigned long d;
56 59
57again:
58 ns = ns_ops->get(task);
59 if (!ns) {
60 mntput(mnt);
61 return ERR_PTR(-ENOENT);
62 }
63 rcu_read_lock(); 60 rcu_read_lock();
64 d = atomic_long_read(&ns->stashed); 61 d = atomic_long_read(&ns->stashed);
65 if (!d) 62 if (!d)
@@ -68,17 +65,16 @@ again:
68 if (!lockref_get_not_dead(&dentry->d_lockref)) 65 if (!lockref_get_not_dead(&dentry->d_lockref))
69 goto slow; 66 goto slow;
70 rcu_read_unlock(); 67 rcu_read_unlock();
71 ns_ops->put(ns); 68 ns->ops->put(ns);
72got_it: 69got_it:
73 path->mnt = mnt; 70 path->mnt = mntget(mnt);
74 path->dentry = dentry; 71 path->dentry = dentry;
75 return NULL; 72 return NULL;
76slow: 73slow:
77 rcu_read_unlock(); 74 rcu_read_unlock();
78 inode = new_inode_pseudo(mnt->mnt_sb); 75 inode = new_inode_pseudo(mnt->mnt_sb);
79 if (!inode) { 76 if (!inode) {
80 ns_ops->put(ns); 77 ns->ops->put(ns);
81 mntput(mnt);
82 return ERR_PTR(-ENOMEM); 78 return ERR_PTR(-ENOMEM);
83 } 79 }
84 inode->i_ino = ns->inum; 80 inode->i_ino = ns->inum;
@@ -91,21 +87,96 @@ slow:
91 dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); 87 dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
92 if (!dentry) { 88 if (!dentry) {
93 iput(inode); 89 iput(inode);
94 mntput(mnt);
95 return ERR_PTR(-ENOMEM); 90 return ERR_PTR(-ENOMEM);
96 } 91 }
97 d_instantiate(dentry, inode); 92 d_instantiate(dentry, inode);
98 dentry->d_fsdata = (void *)ns_ops; 93 dentry->d_fsdata = (void *)ns->ops;
99 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); 94 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
100 if (d) { 95 if (d) {
101 d_delete(dentry); /* make sure ->d_prune() does nothing */ 96 d_delete(dentry); /* make sure ->d_prune() does nothing */
102 dput(dentry); 97 dput(dentry);
103 cpu_relax(); 98 cpu_relax();
104 goto again; 99 return ERR_PTR(-EAGAIN);
105 } 100 }
106 goto got_it; 101 goto got_it;
107} 102}
108 103
104void *ns_get_path(struct path *path, struct task_struct *task,
105 const struct proc_ns_operations *ns_ops)
106{
107 struct ns_common *ns;
108 void *ret;
109
110again:
111 ns = ns_ops->get(task);
112 if (!ns)
113 return ERR_PTR(-ENOENT);
114
115 ret = __ns_get_path(path, ns);
116 if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
117 goto again;
118 return ret;
119}
120
121static int open_related_ns(struct ns_common *ns,
122 struct ns_common *(*get_ns)(struct ns_common *ns))
123{
124 struct path path = {};
125 struct file *f;
126 void *err;
127 int fd;
128
129 fd = get_unused_fd_flags(O_CLOEXEC);
130 if (fd < 0)
131 return fd;
132
133 while (1) {
134 struct ns_common *relative;
135
136 relative = get_ns(ns);
137 if (IS_ERR(relative)) {
138 put_unused_fd(fd);
139 return PTR_ERR(relative);
140 }
141
142 err = __ns_get_path(&path, relative);
143 if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
144 continue;
145 break;
146 }
147 if (IS_ERR(err)) {
148 put_unused_fd(fd);
149 return PTR_ERR(err);
150 }
151
152 f = dentry_open(&path, O_RDONLY, current_cred());
153 path_put(&path);
154 if (IS_ERR(f)) {
155 put_unused_fd(fd);
156 fd = PTR_ERR(f);
157 } else
158 fd_install(fd, f);
159
160 return fd;
161}
162
163static long ns_ioctl(struct file *filp, unsigned int ioctl,
164 unsigned long arg)
165{
166 struct ns_common *ns = get_proc_ns(file_inode(filp));
167
168 switch (ioctl) {
169 case NS_GET_USERNS:
170 return open_related_ns(ns, ns_get_owner);
171 case NS_GET_PARENT:
172 if (!ns->ops->get_parent)
173 return -EINVAL;
174 return open_related_ns(ns, ns->ops->get_parent);
175 default:
176 return -ENOTTY;
177 }
178}
179
109int ns_get_name(char *buf, size_t size, struct task_struct *task, 180int ns_get_name(char *buf, size_t size, struct task_struct *task,
110 const struct proc_ns_operations *ns_ops) 181 const struct proc_ns_operations *ns_ops)
111{ 182{
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..234a9ac49958 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m)
259 read_sequnlock_excl(&mount_lock); 259 read_sequnlock_excl(&mount_lock);
260 } 260 }
261 hlist_add_head(&child->mnt_hash, list); 261 hlist_add_head(&child->mnt_hash, list);
262 return 0; 262 return count_mounts(m->mnt_ns, child);
263} 263}
264 264
265/* 265/*
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..550f5a8b4fcf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
52struct mount *copy_tree(struct mount *, struct dentry *, int); 52struct mount *copy_tree(struct mount *, struct dentry *, int);
53bool is_path_reachable(struct mount *, struct dentry *, 53bool is_path_reachable(struct mount *, struct dentry *,
54 const struct path *root); 54 const struct path *root);
55int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
55#endif /* _LINUX_PNODE_H */ 56#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2ed3d71d4767..71025b9e2a4e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
72 72
73static void drop_sysctl_table(struct ctl_table_header *header); 73static void drop_sysctl_table(struct ctl_table_header *header);
74static int sysctl_follow_link(struct ctl_table_header **phead, 74static int sysctl_follow_link(struct ctl_table_header **phead,
75 struct ctl_table **pentry, struct nsproxy *namespaces); 75 struct ctl_table **pentry);
76static int insert_links(struct ctl_table_header *head); 76static int insert_links(struct ctl_table_header *head);
77static void put_links(struct ctl_table_header *header); 77static void put_links(struct ctl_table_header *header);
78 78
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
319} 319}
320 320
321static struct ctl_table_set * 321static struct ctl_table_set *
322lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 322lookup_header_set(struct ctl_table_root *root)
323{ 323{
324 struct ctl_table_set *set = &root->default_set; 324 struct ctl_table_set *set = &root->default_set;
325 if (root->lookup) 325 if (root->lookup)
326 set = root->lookup(root, namespaces); 326 set = root->lookup(root);
327 return set; 327 return set;
328} 328}
329 329
@@ -496,7 +496,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
496 goto out; 496 goto out;
497 497
498 if (S_ISLNK(p->mode)) { 498 if (S_ISLNK(p->mode)) {
499 ret = sysctl_follow_link(&h, &p, current->nsproxy); 499 ret = sysctl_follow_link(&h, &p);
500 err = ERR_PTR(ret); 500 err = ERR_PTR(ret);
501 if (ret) 501 if (ret)
502 goto out; 502 goto out;
@@ -664,7 +664,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
664 664
665 if (S_ISLNK(table->mode)) { 665 if (S_ISLNK(table->mode)) {
666 /* It is not an error if we can not follow the link ignore it */ 666 /* It is not an error if we can not follow the link ignore it */
667 int err = sysctl_follow_link(&head, &table, current->nsproxy); 667 int err = sysctl_follow_link(&head, &table);
668 if (err) 668 if (err)
669 goto out; 669 goto out;
670 } 670 }
@@ -981,7 +981,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
981} 981}
982 982
983static int sysctl_follow_link(struct ctl_table_header **phead, 983static int sysctl_follow_link(struct ctl_table_header **phead,
984 struct ctl_table **pentry, struct nsproxy *namespaces) 984 struct ctl_table **pentry)
985{ 985{
986 struct ctl_table_header *head; 986 struct ctl_table_header *head;
987 struct ctl_table_root *root; 987 struct ctl_table_root *root;
@@ -993,7 +993,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
993 ret = 0; 993 ret = 0;
994 spin_lock(&sysctl_lock); 994 spin_lock(&sysctl_lock);
995 root = (*pentry)->data; 995 root = (*pentry)->data;
996 set = lookup_header_set(root, namespaces); 996 set = lookup_header_set(root);
997 dir = xlate_dir(set, (*phead)->parent); 997 dir = xlate_dir(set, (*phead)->parent);
998 if (IS_ERR(dir)) 998 if (IS_ERR(dir))
999 ret = PTR_ERR(dir); 999 ret = PTR_ERR(dir);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a4414a11eea7..440a72164a11 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -644,6 +644,7 @@ struct cgroup_namespace {
644 atomic_t count; 644 atomic_t count;
645 struct ns_common ns; 645 struct ns_common ns;
646 struct user_namespace *user_ns; 646 struct user_namespace *user_ns;
647 struct ucounts *ucounts;
647 struct css_set *root_cset; 648 struct css_set *root_cset;
648}; 649};
649 650
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index d10e54f03c09..848e5796400e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -58,6 +58,7 @@ struct ipc_namespace {
58 58
59 /* user_ns which owns the ipc ns */ 59 /* user_ns which owns the ipc ns */
60 struct user_namespace *user_ns; 60 struct user_namespace *user_ns;
61 struct ucounts *ucounts;
61 62
62 struct ns_common ns; 63 struct ns_common ns;
63}; 64};
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 54a594d49733..1172cce949a4 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);
96 96
97extern dev_t name_to_dev_t(const char *name); 97extern dev_t name_to_dev_t(const char *name);
98 98
99extern unsigned int sysctl_mount_max;
100
99#endif /* _LINUX_MOUNT_H */ 101#endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 918b117a7cd3..34cce96741bc 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -40,6 +40,7 @@ struct pid_namespace {
40 struct fs_pin *bacct; 40 struct fs_pin *bacct;
41#endif 41#endif
42 struct user_namespace *user_ns; 42 struct user_namespace *user_ns;
43 struct ucounts *ucounts;
43 struct work_struct proc_work; 44 struct work_struct proc_work;
44 kgid_t pid_gid; 45 kgid_t pid_gid;
45 int hide_pid; 46 int hide_pid;
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index de0e7719d4c5..12cb8bd81d2d 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -18,6 +18,8 @@ struct proc_ns_operations {
18 struct ns_common *(*get)(struct task_struct *task); 18 struct ns_common *(*get)(struct task_struct *task);
19 void (*put)(struct ns_common *ns); 19 void (*put)(struct ns_common *ns);
20 int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); 20 int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
21 struct user_namespace *(*owner)(struct ns_common *ns);
22 struct ns_common *(*get_parent)(struct ns_common *ns);
21}; 23};
22 24
23extern const struct proc_ns_operations netns_operations; 25extern const struct proc_ns_operations netns_operations;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ecc3e07c6e63..adf4e51cf597 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -158,8 +158,7 @@ struct ctl_table_set {
158 158
159struct ctl_table_root { 159struct ctl_table_root {
160 struct ctl_table_set default_set; 160 struct ctl_table_set default_set;
161 struct ctl_table_set *(*lookup)(struct ctl_table_root *root, 161 struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
162 struct nsproxy *namespaces);
163 void (*set_ownership)(struct ctl_table_header *head, 162 void (*set_ownership)(struct ctl_table_header *head,
164 struct ctl_table *table, 163 struct ctl_table *table,
165 kuid_t *uid, kgid_t *gid); 164 kuid_t *uid, kgid_t *gid);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..eb209d4523f5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,19 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
22 22
23#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED 23#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
24 24
25struct ucounts;
26
27enum ucount_type {
28 UCOUNT_USER_NAMESPACES,
29 UCOUNT_PID_NAMESPACES,
30 UCOUNT_UTS_NAMESPACES,
31 UCOUNT_IPC_NAMESPACES,
32 UCOUNT_NET_NAMESPACES,
33 UCOUNT_MNT_NAMESPACES,
34 UCOUNT_CGROUP_NAMESPACES,
35 UCOUNT_COUNTS,
36};
37
25struct user_namespace { 38struct user_namespace {
26 struct uid_gid_map uid_map; 39 struct uid_gid_map uid_map;
27 struct uid_gid_map gid_map; 40 struct uid_gid_map gid_map;
@@ -39,10 +52,30 @@ struct user_namespace {
39 struct key *persistent_keyring_register; 52 struct key *persistent_keyring_register;
40 struct rw_semaphore persistent_keyring_register_sem; 53 struct rw_semaphore persistent_keyring_register_sem;
41#endif 54#endif
55 struct work_struct work;
56#ifdef CONFIG_SYSCTL
57 struct ctl_table_set set;
58 struct ctl_table_header *sysctls;
59#endif
60 struct ucounts *ucounts;
61 int ucount_max[UCOUNT_COUNTS];
62};
63
64struct ucounts {
65 struct hlist_node node;
66 struct user_namespace *ns;
67 kuid_t uid;
68 atomic_t count;
69 atomic_t ucount[UCOUNT_COUNTS];
42}; 70};
43 71
44extern struct user_namespace init_user_ns; 72extern struct user_namespace init_user_ns;
45 73
74bool setup_userns_sysctls(struct user_namespace *ns);
75void retire_userns_sysctls(struct user_namespace *ns);
76struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
77void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
78
46#ifdef CONFIG_USER_NS 79#ifdef CONFIG_USER_NS
47 80
48static inline struct user_namespace *get_user_ns(struct user_namespace *ns) 81static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -54,12 +87,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
54 87
55extern int create_user_ns(struct cred *new); 88extern int create_user_ns(struct cred *new);
56extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); 89extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
57extern void free_user_ns(struct user_namespace *ns); 90extern void __put_user_ns(struct user_namespace *ns);
58 91
59static inline void put_user_ns(struct user_namespace *ns) 92static inline void put_user_ns(struct user_namespace *ns)
60{ 93{
61 if (ns && atomic_dec_and_test(&ns->count)) 94 if (ns && atomic_dec_and_test(&ns->count))
62 free_user_ns(ns); 95 __put_user_ns(ns);
63} 96}
64 97
65struct seq_operations; 98struct seq_operations;
@@ -73,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
73extern int proc_setgroups_show(struct seq_file *m, void *v); 106extern int proc_setgroups_show(struct seq_file *m, void *v);
74extern bool userns_may_setgroups(const struct user_namespace *ns); 107extern bool userns_may_setgroups(const struct user_namespace *ns);
75extern bool current_in_userns(const struct user_namespace *target_ns); 108extern bool current_in_userns(const struct user_namespace *target_ns);
109
110struct ns_common *ns_get_owner(struct ns_common *ns);
76#else 111#else
77 112
78static inline struct user_namespace *get_user_ns(struct user_namespace *ns) 113static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -106,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
106{ 141{
107 return true; 142 return true;
108} 143}
144
145static inline struct ns_common *ns_get_owner(struct ns_common *ns)
146{
147 return ERR_PTR(-EPERM);
148}
109#endif 149#endif
110 150
111#endif /* _LINUX_USER_H */ 151#endif /* _LINUX_USER_H */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 5093f58ae192..60f0bb83b313 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -24,6 +24,7 @@ struct uts_namespace {
24 struct kref kref; 24 struct kref kref;
25 struct new_utsname name; 25 struct new_utsname name;
26 struct user_namespace *user_ns; 26 struct user_namespace *user_ns;
27 struct ucounts *ucounts;
27 struct ns_common ns; 28 struct ns_common ns;
28}; 29};
29extern struct uts_namespace init_uts_ns; 30extern struct uts_namespace init_uts_ns;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0933c7455a30..fc4f757107df 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
60 struct list_head exit_list; /* Use only net_mutex */ 60 struct list_head exit_list; /* Use only net_mutex */
61 61
62 struct user_namespace *user_ns; /* Owning user namespace */ 62 struct user_namespace *user_ns; /* Owning user namespace */
63 struct ucounts *ucounts;
63 spinlock_t nsid_lock; 64 spinlock_t nsid_lock;
64 struct idr netns_ids; 65 struct idr netns_ids;
65 66
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..3af617230d1b
--- /dev/null
+++ b/include/uapi/linux/nsfs.h
@@ -0,0 +1,13 @@
1#ifndef __LINUX_NSFS_H
2#define __LINUX_NSFS_H
3
4#include <linux/ioctl.h>
5
6#define NSIO 0xb7
7
8/* Returns a file descriptor that refers to an owning user namespace */
9#define NS_GET_USERNS _IO(NSIO, 0x1)
10/* Returns a file descriptor that refers to a parent namespace */
11#define NS_GET_PARENT _IO(NSIO, 0x2)
12
13#endif /* __LINUX_NSFS_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index d87e6baa1323..0abdea496493 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,39 +16,61 @@
16 16
17#include "util.h" 17#include "util.h"
18 18
19static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
20{
21 return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
22}
23
24static void dec_ipc_namespaces(struct ucounts *ucounts)
25{
26 dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES);
27}
28
19static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, 29static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
20 struct ipc_namespace *old_ns) 30 struct ipc_namespace *old_ns)
21{ 31{
22 struct ipc_namespace *ns; 32 struct ipc_namespace *ns;
33 struct ucounts *ucounts;
23 int err; 34 int err;
24 35
36 err = -ENOSPC;
37 ucounts = inc_ipc_namespaces(user_ns);
38 if (!ucounts)
39 goto fail;
40
41 err = -ENOMEM;
25 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); 42 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
26 if (ns == NULL) 43 if (ns == NULL)
27 return ERR_PTR(-ENOMEM); 44 goto fail_dec;
28 45
29 err = ns_alloc_inum(&ns->ns); 46 err = ns_alloc_inum(&ns->ns);
30 if (err) { 47 if (err)
31 kfree(ns); 48 goto fail_free;
32 return ERR_PTR(err);
33 }
34 ns->ns.ops = &ipcns_operations; 49 ns->ns.ops = &ipcns_operations;
35 50
36 atomic_set(&ns->count, 1); 51 atomic_set(&ns->count, 1);
37 ns->user_ns = get_user_ns(user_ns); 52 ns->user_ns = get_user_ns(user_ns);
53 ns->ucounts = ucounts;
38 54
39 err = mq_init_ns(ns); 55 err = mq_init_ns(ns);
40 if (err) { 56 if (err)
41 put_user_ns(ns->user_ns); 57 goto fail_put;
42 ns_free_inum(&ns->ns);
43 kfree(ns);
44 return ERR_PTR(err);
45 }
46 58
47 sem_init_ns(ns); 59 sem_init_ns(ns);
48 msg_init_ns(ns); 60 msg_init_ns(ns);
49 shm_init_ns(ns); 61 shm_init_ns(ns);
50 62
51 return ns; 63 return ns;
64
65fail_put:
66 put_user_ns(ns->user_ns);
67 ns_free_inum(&ns->ns);
68fail_free:
69 kfree(ns);
70fail_dec:
71 dec_ipc_namespaces(ucounts);
72fail:
73 return ERR_PTR(err);
52} 74}
53 75
54struct ipc_namespace *copy_ipcs(unsigned long flags, 76struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -96,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
96 msg_exit_ns(ns); 118 msg_exit_ns(ns);
97 shm_exit_ns(ns); 119 shm_exit_ns(ns);
98 120
121 dec_ipc_namespaces(ns->ucounts);
99 put_user_ns(ns->user_ns); 122 put_user_ns(ns->user_ns);
100 ns_free_inum(&ns->ns); 123 ns_free_inum(&ns->ns);
101 kfree(ns); 124 kfree(ns);
@@ -165,10 +188,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
165 return 0; 188 return 0;
166} 189}
167 190
191static struct user_namespace *ipcns_owner(struct ns_common *ns)
192{
193 return to_ipc_ns(ns)->user_ns;
194}
195
168const struct proc_ns_operations ipcns_operations = { 196const struct proc_ns_operations ipcns_operations = {
169 .name = "ipc", 197 .name = "ipc",
170 .type = CLONE_NEWIPC, 198 .type = CLONE_NEWIPC,
171 .get = ipcns_get, 199 .get = ipcns_get,
172 .put = ipcns_put, 200 .put = ipcns_put,
173 .install = ipcns_install, 201 .install = ipcns_install,
202 .owner = ipcns_owner,
174}; 203};
diff --git a/kernel/Makefile b/kernel/Makefile
index e2ec54e2b952..eb26e12c6c2a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \
9 extable.o params.o \ 9 extable.o params.o \
10 kthread.o sys_ni.o nsproxy.o \ 10 kthread.o sys_ni.o nsproxy.o \
11 notifier.o ksysfs.o cred.o reboot.o \ 11 notifier.o ksysfs.o cred.o reboot.o \
12 async.o range.o smpboot.o 12 async.o range.o smpboot.o ucount.o
13 13
14obj-$(CONFIG_MULTIUSER) += groups.o 14obj-$(CONFIG_MULTIUSER) += groups.o
15 15
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9ba28310eab6..44066158f0d1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6328,6 +6328,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
6328 6328
6329/* cgroup namespaces */ 6329/* cgroup namespaces */
6330 6330
6331static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
6332{
6333 return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
6334}
6335
6336static void dec_cgroup_namespaces(struct ucounts *ucounts)
6337{
6338 dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
6339}
6340
6331static struct cgroup_namespace *alloc_cgroup_ns(void) 6341static struct cgroup_namespace *alloc_cgroup_ns(void)
6332{ 6342{
6333 struct cgroup_namespace *new_ns; 6343 struct cgroup_namespace *new_ns;
@@ -6349,6 +6359,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
6349void free_cgroup_ns(struct cgroup_namespace *ns) 6359void free_cgroup_ns(struct cgroup_namespace *ns)
6350{ 6360{
6351 put_css_set(ns->root_cset); 6361 put_css_set(ns->root_cset);
6362 dec_cgroup_namespaces(ns->ucounts);
6352 put_user_ns(ns->user_ns); 6363 put_user_ns(ns->user_ns);
6353 ns_free_inum(&ns->ns); 6364 ns_free_inum(&ns->ns);
6354 kfree(ns); 6365 kfree(ns);
@@ -6360,6 +6371,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6360 struct cgroup_namespace *old_ns) 6371 struct cgroup_namespace *old_ns)
6361{ 6372{
6362 struct cgroup_namespace *new_ns; 6373 struct cgroup_namespace *new_ns;
6374 struct ucounts *ucounts;
6363 struct css_set *cset; 6375 struct css_set *cset;
6364 6376
6365 BUG_ON(!old_ns); 6377 BUG_ON(!old_ns);
@@ -6373,6 +6385,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6373 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 6385 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
6374 return ERR_PTR(-EPERM); 6386 return ERR_PTR(-EPERM);
6375 6387
6388 ucounts = inc_cgroup_namespaces(user_ns);
6389 if (!ucounts)
6390 return ERR_PTR(-ENOSPC);
6391
6376 /* It is not safe to take cgroup_mutex here */ 6392 /* It is not safe to take cgroup_mutex here */
6377 spin_lock_irq(&css_set_lock); 6393 spin_lock_irq(&css_set_lock);
6378 cset = task_css_set(current); 6394 cset = task_css_set(current);
@@ -6382,10 +6398,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6382 new_ns = alloc_cgroup_ns(); 6398 new_ns = alloc_cgroup_ns();
6383 if (IS_ERR(new_ns)) { 6399 if (IS_ERR(new_ns)) {
6384 put_css_set(cset); 6400 put_css_set(cset);
6401 dec_cgroup_namespaces(ucounts);
6385 return new_ns; 6402 return new_ns;
6386 } 6403 }
6387 6404
6388 new_ns->user_ns = get_user_ns(user_ns); 6405 new_ns->user_ns = get_user_ns(user_ns);
6406 new_ns->ucounts = ucounts;
6389 new_ns->root_cset = cset; 6407 new_ns->root_cset = cset;
6390 6408
6391 return new_ns; 6409 return new_ns;
@@ -6436,12 +6454,18 @@ static void cgroupns_put(struct ns_common *ns)
6436 put_cgroup_ns(to_cg_ns(ns)); 6454 put_cgroup_ns(to_cg_ns(ns));
6437} 6455}
6438 6456
6457static struct user_namespace *cgroupns_owner(struct ns_common *ns)
6458{
6459 return to_cg_ns(ns)->user_ns;
6460}
6461
6439const struct proc_ns_operations cgroupns_operations = { 6462const struct proc_ns_operations cgroupns_operations = {
6440 .name = "cgroup", 6463 .name = "cgroup",
6441 .type = CLONE_NEWCGROUP, 6464 .type = CLONE_NEWCGROUP,
6442 .get = cgroupns_get, 6465 .get = cgroupns_get,
6443 .put = cgroupns_put, 6466 .put = cgroupns_put,
6444 .install = cgroupns_install, 6467 .install = cgroupns_install,
6468 .owner = cgroupns_owner,
6445}; 6469};
6446 6470
6447static __init int cgroup_namespaces_init(void) 6471static __init int cgroup_namespaces_init(void)
diff --git a/kernel/fork.c b/kernel/fork.c
index c060c7e7c247..9a05bd93f8e7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -418,6 +418,7 @@ int arch_task_struct_size __read_mostly;
418 418
419void __init fork_init(void) 419void __init fork_init(void)
420{ 420{
421 int i;
421#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 422#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
422#ifndef ARCH_MIN_TASKALIGN 423#ifndef ARCH_MIN_TASKALIGN
423#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 424#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
@@ -437,6 +438,10 @@ void __init fork_init(void)
437 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 438 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
438 init_task.signal->rlim[RLIMIT_SIGPENDING] = 439 init_task.signal->rlim[RLIMIT_SIGPENDING] =
439 init_task.signal->rlim[RLIMIT_NPROC]; 440 init_task.signal->rlim[RLIMIT_NPROC];
441
442 for (i = 0; i < UCOUNT_COUNTS; i++) {
443 init_user_ns.ucount_max[i] = max_threads/2;
444 }
440} 445}
441 446
442int __weak arch_dup_task_struct(struct task_struct *dst, 447int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..df9e8e9e0be7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -79,23 +79,36 @@ static void proc_cleanup_work(struct work_struct *work)
79/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ 79/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
80#define MAX_PID_NS_LEVEL 32 80#define MAX_PID_NS_LEVEL 32
81 81
82static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
83{
84 return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
85}
86
87static void dec_pid_namespaces(struct ucounts *ucounts)
88{
89 dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
90}
91
82static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, 92static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
83 struct pid_namespace *parent_pid_ns) 93 struct pid_namespace *parent_pid_ns)
84{ 94{
85 struct pid_namespace *ns; 95 struct pid_namespace *ns;
86 unsigned int level = parent_pid_ns->level + 1; 96 unsigned int level = parent_pid_ns->level + 1;
97 struct ucounts *ucounts;
87 int i; 98 int i;
88 int err; 99 int err;
89 100
90 if (level > MAX_PID_NS_LEVEL) { 101 err = -ENOSPC;
91 err = -EINVAL; 102 if (level > MAX_PID_NS_LEVEL)
103 goto out;
104 ucounts = inc_pid_namespaces(user_ns);
105 if (!ucounts)
92 goto out; 106 goto out;
93 }
94 107
95 err = -ENOMEM; 108 err = -ENOMEM;
96 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 109 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
97 if (ns == NULL) 110 if (ns == NULL)
98 goto out; 111 goto out_dec;
99 112
100 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); 113 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
101 if (!ns->pidmap[0].page) 114 if (!ns->pidmap[0].page)
@@ -114,6 +127,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
114 ns->level = level; 127 ns->level = level;
115 ns->parent = get_pid_ns(parent_pid_ns); 128 ns->parent = get_pid_ns(parent_pid_ns);
116 ns->user_ns = get_user_ns(user_ns); 129 ns->user_ns = get_user_ns(user_ns);
130 ns->ucounts = ucounts;
117 ns->nr_hashed = PIDNS_HASH_ADDING; 131 ns->nr_hashed = PIDNS_HASH_ADDING;
118 INIT_WORK(&ns->proc_work, proc_cleanup_work); 132 INIT_WORK(&ns->proc_work, proc_cleanup_work);
119 133
@@ -129,6 +143,8 @@ out_free_map:
129 kfree(ns->pidmap[0].page); 143 kfree(ns->pidmap[0].page);
130out_free: 144out_free:
131 kmem_cache_free(pid_ns_cachep, ns); 145 kmem_cache_free(pid_ns_cachep, ns);
146out_dec:
147 dec_pid_namespaces(ucounts);
132out: 148out:
133 return ERR_PTR(err); 149 return ERR_PTR(err);
134} 150}
@@ -146,6 +162,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
146 ns_free_inum(&ns->ns); 162 ns_free_inum(&ns->ns);
147 for (i = 0; i < PIDMAP_ENTRIES; i++) 163 for (i = 0; i < PIDMAP_ENTRIES; i++)
148 kfree(ns->pidmap[i].page); 164 kfree(ns->pidmap[i].page);
165 dec_pid_namespaces(ns->ucounts);
149 put_user_ns(ns->user_ns); 166 put_user_ns(ns->user_ns);
150 call_rcu(&ns->rcu, delayed_free_pidns); 167 call_rcu(&ns->rcu, delayed_free_pidns);
151} 168}
@@ -388,12 +405,37 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
388 return 0; 405 return 0;
389} 406}
390 407
408static struct ns_common *pidns_get_parent(struct ns_common *ns)
409{
410 struct pid_namespace *active = task_active_pid_ns(current);
411 struct pid_namespace *pid_ns, *p;
412
413 /* See if the parent is in the current namespace */
414 pid_ns = p = to_pid_ns(ns)->parent;
415 for (;;) {
416 if (!p)
417 return ERR_PTR(-EPERM);
418 if (p == active)
419 break;
420 p = p->parent;
421 }
422
423 return &get_pid_ns(pid_ns)->ns;
424}
425
426static struct user_namespace *pidns_owner(struct ns_common *ns)
427{
428 return to_pid_ns(ns)->user_ns;
429}
430
391const struct proc_ns_operations pidns_operations = { 431const struct proc_ns_operations pidns_operations = {
392 .name = "pid", 432 .name = "pid",
393 .type = CLONE_NEWPID, 433 .type = CLONE_NEWPID,
394 .get = pidns_get, 434 .get = pidns_get,
395 .put = pidns_put, 435 .put = pidns_put,
396 .install = pidns_install, 436 .install = pidns_install,
437 .owner = pidns_owner,
438 .get_parent = pidns_get_parent,
397}; 439};
398 440
399static __init int pid_namespaces_init(void) 441static __init int pid_namespaces_init(void)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a13bbdaab47d..a43775c6646c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,6 +65,7 @@
65#include <linux/sched/sysctl.h> 65#include <linux/sched/sysctl.h>
66#include <linux/kexec.h> 66#include <linux/kexec.h>
67#include <linux/bpf.h> 67#include <linux/bpf.h>
68#include <linux/mount.h>
68 69
69#include <asm/uaccess.h> 70#include <asm/uaccess.h>
70#include <asm/processor.h> 71#include <asm/processor.h>
@@ -1838,6 +1839,14 @@ static struct ctl_table fs_table[] = {
1838 .mode = 0644, 1839 .mode = 0644,
1839 .proc_handler = proc_doulongvec_minmax, 1840 .proc_handler = proc_doulongvec_minmax,
1840 }, 1841 },
1842 {
1843 .procname = "mount-max",
1844 .data = &sysctl_mount_max,
1845 .maxlen = sizeof(unsigned int),
1846 .mode = 0644,
1847 .proc_handler = proc_dointvec_minmax,
1848 .extra1 = &one,
1849 },
1841 { } 1850 { }
1842}; 1851};
1843 1852
diff --git a/kernel/ucount.c b/kernel/ucount.c
new file mode 100644
index 000000000000..9d20d5dd298a
--- /dev/null
+++ b/kernel/ucount.c
@@ -0,0 +1,235 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8#include <linux/stat.h>
9#include <linux/sysctl.h>
10#include <linux/slab.h>
11#include <linux/hash.h>
12#include <linux/user_namespace.h>
13
14#define UCOUNTS_HASHTABLE_BITS 10
15static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
16static DEFINE_SPINLOCK(ucounts_lock);
17
18#define ucounts_hashfn(ns, uid) \
19 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
20 UCOUNTS_HASHTABLE_BITS)
21#define ucounts_hashentry(ns, uid) \
22 (ucounts_hashtable + ucounts_hashfn(ns, uid))
23
24
25#ifdef CONFIG_SYSCTL
26static struct ctl_table_set *
27set_lookup(struct ctl_table_root *root)
28{
29 return &current_user_ns()->set;
30}
31
32static int set_is_seen(struct ctl_table_set *set)
33{
34 return &current_user_ns()->set == set;
35}
36
37static int set_permissions(struct ctl_table_header *head,
38 struct ctl_table *table)
39{
40 struct user_namespace *user_ns =
41 container_of(head->set, struct user_namespace, set);
42 int mode;
43
44 /* Allow users with CAP_SYS_RESOURCE unrestrained access */
45 if (ns_capable(user_ns, CAP_SYS_RESOURCE))
46 mode = (table->mode & S_IRWXU) >> 6;
47 else
48 /* Allow all others at most read-only access */
49 mode = table->mode & S_IROTH;
50 return (mode << 6) | (mode << 3) | mode;
51}
52
53static struct ctl_table_root set_root = {
54 .lookup = set_lookup,
55 .permissions = set_permissions,
56};
57
58static int zero = 0;
59static int int_max = INT_MAX;
60#define UCOUNT_ENTRY(name) \
61 { \
62 .procname = name, \
63 .maxlen = sizeof(int), \
64 .mode = 0644, \
65 .proc_handler = proc_dointvec_minmax, \
66 .extra1 = &zero, \
67 .extra2 = &int_max, \
68 }
69static struct ctl_table user_table[] = {
70 UCOUNT_ENTRY("max_user_namespaces"),
71 UCOUNT_ENTRY("max_pid_namespaces"),
72 UCOUNT_ENTRY("max_uts_namespaces"),
73 UCOUNT_ENTRY("max_ipc_namespaces"),
74 UCOUNT_ENTRY("max_net_namespaces"),
75 UCOUNT_ENTRY("max_mnt_namespaces"),
76 UCOUNT_ENTRY("max_cgroup_namespaces"),
77 { }
78};
79#endif /* CONFIG_SYSCTL */
80
81bool setup_userns_sysctls(struct user_namespace *ns)
82{
83#ifdef CONFIG_SYSCTL
84 struct ctl_table *tbl;
85 setup_sysctl_set(&ns->set, &set_root, set_is_seen);
86 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
87 if (tbl) {
88 int i;
89 for (i = 0; i < UCOUNT_COUNTS; i++) {
90 tbl[i].data = &ns->ucount_max[i];
91 }
92 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
93 }
94 if (!ns->sysctls) {
95 kfree(tbl);
96 retire_sysctl_set(&ns->set);
97 return false;
98 }
99#endif
100 return true;
101}
102
103void retire_userns_sysctls(struct user_namespace *ns)
104{
105#ifdef CONFIG_SYSCTL
106 struct ctl_table *tbl;
107
108 tbl = ns->sysctls->ctl_table_arg;
109 unregister_sysctl_table(ns->sysctls);
110 retire_sysctl_set(&ns->set);
111 kfree(tbl);
112#endif
113}
114
115static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
116{
117 struct ucounts *ucounts;
118
119 hlist_for_each_entry(ucounts, hashent, node) {
120 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
121 return ucounts;
122 }
123 return NULL;
124}
125
126static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
127{
128 struct hlist_head *hashent = ucounts_hashentry(ns, uid);
129 struct ucounts *ucounts, *new;
130
131 spin_lock(&ucounts_lock);
132 ucounts = find_ucounts(ns, uid, hashent);
133 if (!ucounts) {
134 spin_unlock(&ucounts_lock);
135
136 new = kzalloc(sizeof(*new), GFP_KERNEL);
137 if (!new)
138 return NULL;
139
140 new->ns = ns;
141 new->uid = uid;
142 atomic_set(&new->count, 0);
143
144 spin_lock(&ucounts_lock);
145 ucounts = find_ucounts(ns, uid, hashent);
146 if (ucounts) {
147 kfree(new);
148 } else {
149 hlist_add_head(&new->node, hashent);
150 ucounts = new;
151 }
152 }
153 if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
154 ucounts = NULL;
155 spin_unlock(&ucounts_lock);
156 return ucounts;
157}
158
159static void put_ucounts(struct ucounts *ucounts)
160{
161 if (atomic_dec_and_test(&ucounts->count)) {
162 spin_lock(&ucounts_lock);
163 hlist_del_init(&ucounts->node);
164 spin_unlock(&ucounts_lock);
165
166 kfree(ucounts);
167 }
168}
169
170static inline bool atomic_inc_below(atomic_t *v, int u)
171{
172 int c, old;
173 c = atomic_read(v);
174 for (;;) {
175 if (unlikely(c >= u))
176 return false;
177 old = atomic_cmpxchg(v, c, c+1);
178 if (likely(old == c))
179 return true;
180 c = old;
181 }
182}
183
184struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
185 enum ucount_type type)
186{
187 struct ucounts *ucounts, *iter, *bad;
188 struct user_namespace *tns;
189 ucounts = get_ucounts(ns, uid);
190 for (iter = ucounts; iter; iter = tns->ucounts) {
191 int max;
192 tns = iter->ns;
193 max = READ_ONCE(tns->ucount_max[type]);
194 if (!atomic_inc_below(&iter->ucount[type], max))
195 goto fail;
196 }
197 return ucounts;
198fail:
199 bad = iter;
200 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
201 atomic_dec(&iter->ucount[type]);
202
203 put_ucounts(ucounts);
204 return NULL;
205}
206
207void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
208{
209 struct ucounts *iter;
210 for (iter = ucounts; iter; iter = iter->ns->ucounts) {
211 int dec = atomic_dec_if_positive(&iter->ucount[type]);
212 WARN_ON_ONCE(dec < 0);
213 }
214 put_ucounts(ucounts);
215}
216
217static __init int user_namespace_sysctl_init(void)
218{
219#ifdef CONFIG_SYSCTL
220 static struct ctl_table_header *user_header;
221 static struct ctl_table empty[1];
222 /*
223 * It is necessary to register the user directory in the
224 * default set so that registrations in the child sets work
225 * properly.
226 */
227 user_header = register_sysctl("user", empty);
228 BUG_ON(!user_header);
229 BUG_ON(!setup_userns_sysctls(&init_user_ns));
230#endif
231 return 0;
232}
233subsys_initcall(user_namespace_sysctl_init);
234
235
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 68f594212759..86b7854fec8e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -29,6 +29,17 @@ static DEFINE_MUTEX(userns_state_mutex);
29static bool new_idmap_permitted(const struct file *file, 29static bool new_idmap_permitted(const struct file *file,
30 struct user_namespace *ns, int cap_setid, 30 struct user_namespace *ns, int cap_setid,
31 struct uid_gid_map *map); 31 struct uid_gid_map *map);
32static void free_user_ns(struct work_struct *work);
33
34static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
35{
36 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
37}
38
39static void dec_user_namespaces(struct ucounts *ucounts)
40{
41 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
42}
32 43
33static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) 44static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
34{ 45{
@@ -62,10 +73,16 @@ int create_user_ns(struct cred *new)
62 struct user_namespace *ns, *parent_ns = new->user_ns; 73 struct user_namespace *ns, *parent_ns = new->user_ns;
63 kuid_t owner = new->euid; 74 kuid_t owner = new->euid;
64 kgid_t group = new->egid; 75 kgid_t group = new->egid;
65 int ret; 76 struct ucounts *ucounts;
77 int ret, i;
66 78
79 ret = -ENOSPC;
67 if (parent_ns->level > 32) 80 if (parent_ns->level > 32)
68 return -EUSERS; 81 goto fail;
82
83 ucounts = inc_user_namespaces(parent_ns, owner);
84 if (!ucounts)
85 goto fail;
69 86
70 /* 87 /*
71 * Verify that we can not violate the policy of which files 88 * Verify that we can not violate the policy of which files
@@ -73,26 +90,27 @@ int create_user_ns(struct cred *new)
73 * by verifing that the root directory is at the root of the 90 * by verifing that the root directory is at the root of the
74 * mount namespace which allows all files to be accessed. 91 * mount namespace which allows all files to be accessed.
75 */ 92 */
93 ret = -EPERM;
76 if (current_chrooted()) 94 if (current_chrooted())
77 return -EPERM; 95 goto fail_dec;
78 96
79 /* The creator needs a mapping in the parent user namespace 97 /* The creator needs a mapping in the parent user namespace
80 * or else we won't be able to reasonably tell userspace who 98 * or else we won't be able to reasonably tell userspace who
81 * created a user_namespace. 99 * created a user_namespace.
82 */ 100 */
101 ret = -EPERM;
83 if (!kuid_has_mapping(parent_ns, owner) || 102 if (!kuid_has_mapping(parent_ns, owner) ||
84 !kgid_has_mapping(parent_ns, group)) 103 !kgid_has_mapping(parent_ns, group))
85 return -EPERM; 104 goto fail_dec;
86 105
106 ret = -ENOMEM;
87 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); 107 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
88 if (!ns) 108 if (!ns)
89 return -ENOMEM; 109 goto fail_dec;
90 110
91 ret = ns_alloc_inum(&ns->ns); 111 ret = ns_alloc_inum(&ns->ns);
92 if (ret) { 112 if (ret)
93 kmem_cache_free(user_ns_cachep, ns); 113 goto fail_free;
94 return ret;
95 }
96 ns->ns.ops = &userns_operations; 114 ns->ns.ops = &userns_operations;
97 115
98 atomic_set(&ns->count, 1); 116 atomic_set(&ns->count, 1);
@@ -101,18 +119,37 @@ int create_user_ns(struct cred *new)
101 ns->level = parent_ns->level + 1; 119 ns->level = parent_ns->level + 1;
102 ns->owner = owner; 120 ns->owner = owner;
103 ns->group = group; 121 ns->group = group;
122 INIT_WORK(&ns->work, free_user_ns);
123 for (i = 0; i < UCOUNT_COUNTS; i++) {
124 ns->ucount_max[i] = INT_MAX;
125 }
126 ns->ucounts = ucounts;
104 127
105 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ 128 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
106 mutex_lock(&userns_state_mutex); 129 mutex_lock(&userns_state_mutex);
107 ns->flags = parent_ns->flags; 130 ns->flags = parent_ns->flags;
108 mutex_unlock(&userns_state_mutex); 131 mutex_unlock(&userns_state_mutex);
109 132
110 set_cred_user_ns(new, ns);
111
112#ifdef CONFIG_PERSISTENT_KEYRINGS 133#ifdef CONFIG_PERSISTENT_KEYRINGS
113 init_rwsem(&ns->persistent_keyring_register_sem); 134 init_rwsem(&ns->persistent_keyring_register_sem);
114#endif 135#endif
136 ret = -ENOMEM;
137 if (!setup_userns_sysctls(ns))
138 goto fail_keyring;
139
140 set_cred_user_ns(new, ns);
115 return 0; 141 return 0;
142fail_keyring:
143#ifdef CONFIG_PERSISTENT_KEYRINGS
144 key_put(ns->persistent_keyring_register);
145#endif
146 ns_free_inum(&ns->ns);
147fail_free:
148 kmem_cache_free(user_ns_cachep, ns);
149fail_dec:
150 dec_user_namespaces(ucounts);
151fail:
152 return ret;
116} 153}
117 154
118int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) 155int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -135,21 +172,30 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
135 return err; 172 return err;
136} 173}
137 174
138void free_user_ns(struct user_namespace *ns) 175static void free_user_ns(struct work_struct *work)
139{ 176{
140 struct user_namespace *parent; 177 struct user_namespace *parent, *ns =
178 container_of(work, struct user_namespace, work);
141 179
142 do { 180 do {
181 struct ucounts *ucounts = ns->ucounts;
143 parent = ns->parent; 182 parent = ns->parent;
183 retire_userns_sysctls(ns);
144#ifdef CONFIG_PERSISTENT_KEYRINGS 184#ifdef CONFIG_PERSISTENT_KEYRINGS
145 key_put(ns->persistent_keyring_register); 185 key_put(ns->persistent_keyring_register);
146#endif 186#endif
147 ns_free_inum(&ns->ns); 187 ns_free_inum(&ns->ns);
148 kmem_cache_free(user_ns_cachep, ns); 188 kmem_cache_free(user_ns_cachep, ns);
189 dec_user_namespaces(ucounts);
149 ns = parent; 190 ns = parent;
150 } while (atomic_dec_and_test(&parent->count)); 191 } while (atomic_dec_and_test(&parent->count));
151} 192}
152EXPORT_SYMBOL(free_user_ns); 193
194void __put_user_ns(struct user_namespace *ns)
195{
196 schedule_work(&ns->work);
197}
198EXPORT_SYMBOL(__put_user_ns);
153 199
154static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) 200static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
155{ 201{
@@ -1004,12 +1050,37 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1004 return commit_creds(cred); 1050 return commit_creds(cred);
1005} 1051}
1006 1052
1053struct ns_common *ns_get_owner(struct ns_common *ns)
1054{
1055 struct user_namespace *my_user_ns = current_user_ns();
1056 struct user_namespace *owner, *p;
1057
1058 /* See if the owner is in the current user namespace */
1059 owner = p = ns->ops->owner(ns);
1060 for (;;) {
1061 if (!p)
1062 return ERR_PTR(-EPERM);
1063 if (p == my_user_ns)
1064 break;
1065 p = p->parent;
1066 }
1067
1068 return &get_user_ns(owner)->ns;
1069}
1070
1071static struct user_namespace *userns_owner(struct ns_common *ns)
1072{
1073 return to_user_ns(ns)->parent;
1074}
1075
1007const struct proc_ns_operations userns_operations = { 1076const struct proc_ns_operations userns_operations = {
1008 .name = "user", 1077 .name = "user",
1009 .type = CLONE_NEWUSER, 1078 .type = CLONE_NEWUSER,
1010 .get = userns_get, 1079 .get = userns_get,
1011 .put = userns_put, 1080 .put = userns_put,
1012 .install = userns_install, 1081 .install = userns_install,
1082 .owner = userns_owner,
1083 .get_parent = ns_get_owner,
1013}; 1084};
1014 1085
1015static __init int user_namespaces_init(void) 1086static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 831ea7108232..6976cd47dcf6 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,16 @@
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/proc_ns.h> 18#include <linux/proc_ns.h>
19 19
20static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
21{
22 return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES);
23}
24
25static void dec_uts_namespaces(struct ucounts *ucounts)
26{
27 dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES);
28}
29
20static struct uts_namespace *create_uts_ns(void) 30static struct uts_namespace *create_uts_ns(void)
21{ 31{
22 struct uts_namespace *uts_ns; 32 struct uts_namespace *uts_ns;
@@ -36,18 +46,24 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
36 struct uts_namespace *old_ns) 46 struct uts_namespace *old_ns)
37{ 47{
38 struct uts_namespace *ns; 48 struct uts_namespace *ns;
49 struct ucounts *ucounts;
39 int err; 50 int err;
40 51
52 err = -ENOSPC;
53 ucounts = inc_uts_namespaces(user_ns);
54 if (!ucounts)
55 goto fail;
56
57 err = -ENOMEM;
41 ns = create_uts_ns(); 58 ns = create_uts_ns();
42 if (!ns) 59 if (!ns)
43 return ERR_PTR(-ENOMEM); 60 goto fail_dec;
44 61
45 err = ns_alloc_inum(&ns->ns); 62 err = ns_alloc_inum(&ns->ns);
46 if (err) { 63 if (err)
47 kfree(ns); 64 goto fail_free;
48 return ERR_PTR(err);
49 }
50 65
66 ns->ucounts = ucounts;
51 ns->ns.ops = &utsns_operations; 67 ns->ns.ops = &utsns_operations;
52 68
53 down_read(&uts_sem); 69 down_read(&uts_sem);
@@ -55,6 +71,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
55 ns->user_ns = get_user_ns(user_ns); 71 ns->user_ns = get_user_ns(user_ns);
56 up_read(&uts_sem); 72 up_read(&uts_sem);
57 return ns; 73 return ns;
74
75fail_free:
76 kfree(ns);
77fail_dec:
78 dec_uts_namespaces(ucounts);
79fail:
80 return ERR_PTR(err);
58} 81}
59 82
60/* 83/*
@@ -85,6 +108,7 @@ void free_uts_ns(struct kref *kref)
85 struct uts_namespace *ns; 108 struct uts_namespace *ns;
86 109
87 ns = container_of(kref, struct uts_namespace, kref); 110 ns = container_of(kref, struct uts_namespace, kref);
111 dec_uts_namespaces(ns->ucounts);
88 put_user_ns(ns->user_ns); 112 put_user_ns(ns->user_ns);
89 ns_free_inum(&ns->ns); 113 ns_free_inum(&ns->ns);
90 kfree(ns); 114 kfree(ns);
@@ -130,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
130 return 0; 154 return 0;
131} 155}
132 156
157static struct user_namespace *utsns_owner(struct ns_common *ns)
158{
159 return to_uts_ns(ns)->user_ns;
160}
161
133const struct proc_ns_operations utsns_operations = { 162const struct proc_ns_operations utsns_operations = {
134 .name = "uts", 163 .name = "uts",
135 .type = CLONE_NEWUTS, 164 .type = CLONE_NEWUTS,
136 .get = utsns_get, 165 .get = utsns_get,
137 .put = utsns_put, 166 .put = utsns_put,
138 .install = utsns_install, 167 .install = utsns_install,
168 .owner = utsns_owner,
139}; 169};
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42bdda0e616b..989434f36f96 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -309,6 +309,16 @@ out_undo:
309 309
310 310
311#ifdef CONFIG_NET_NS 311#ifdef CONFIG_NET_NS
312static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
313{
314 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
315}
316
317static void dec_net_namespaces(struct ucounts *ucounts)
318{
319 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
320}
321
312static struct kmem_cache *net_cachep; 322static struct kmem_cache *net_cachep;
313static struct workqueue_struct *netns_wq; 323static struct workqueue_struct *netns_wq;
314 324
@@ -350,19 +360,27 @@ void net_drop_ns(void *p)
350struct net *copy_net_ns(unsigned long flags, 360struct net *copy_net_ns(unsigned long flags,
351 struct user_namespace *user_ns, struct net *old_net) 361 struct user_namespace *user_ns, struct net *old_net)
352{ 362{
363 struct ucounts *ucounts;
353 struct net *net; 364 struct net *net;
354 int rv; 365 int rv;
355 366
356 if (!(flags & CLONE_NEWNET)) 367 if (!(flags & CLONE_NEWNET))
357 return get_net(old_net); 368 return get_net(old_net);
358 369
370 ucounts = inc_net_namespaces(user_ns);
371 if (!ucounts)
372 return ERR_PTR(-ENOSPC);
373
359 net = net_alloc(); 374 net = net_alloc();
360 if (!net) 375 if (!net) {
376 dec_net_namespaces(ucounts);
361 return ERR_PTR(-ENOMEM); 377 return ERR_PTR(-ENOMEM);
378 }
362 379
363 get_user_ns(user_ns); 380 get_user_ns(user_ns);
364 381
365 mutex_lock(&net_mutex); 382 mutex_lock(&net_mutex);
383 net->ucounts = ucounts;
366 rv = setup_net(net, user_ns); 384 rv = setup_net(net, user_ns);
367 if (rv == 0) { 385 if (rv == 0) {
368 rtnl_lock(); 386 rtnl_lock();
@@ -371,6 +389,7 @@ struct net *copy_net_ns(unsigned long flags,
371 } 389 }
372 mutex_unlock(&net_mutex); 390 mutex_unlock(&net_mutex);
373 if (rv < 0) { 391 if (rv < 0) {
392 dec_net_namespaces(ucounts);
374 put_user_ns(user_ns); 393 put_user_ns(user_ns);
375 net_drop_ns(net); 394 net_drop_ns(net);
376 return ERR_PTR(rv); 395 return ERR_PTR(rv);
@@ -443,6 +462,7 @@ static void cleanup_net(struct work_struct *work)
443 /* Finally it is safe to free my network namespace structure */ 462 /* Finally it is safe to free my network namespace structure */
444 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 463 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
445 list_del_init(&net->exit_list); 464 list_del_init(&net->exit_list);
465 dec_net_namespaces(net->ucounts);
446 put_user_ns(net->user_ns); 466 put_user_ns(net->user_ns);
447 net_drop_ns(net); 467 net_drop_ns(net);
448 } 468 }
@@ -1004,11 +1024,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1004 return 0; 1024 return 0;
1005} 1025}
1006 1026
1027static struct user_namespace *netns_owner(struct ns_common *ns)
1028{
1029 return to_net_ns(ns)->user_ns;
1030}
1031
1007const struct proc_ns_operations netns_operations = { 1032const struct proc_ns_operations netns_operations = {
1008 .name = "net", 1033 .name = "net",
1009 .type = CLONE_NEWNET, 1034 .type = CLONE_NEWNET,
1010 .get = netns_get, 1035 .get = netns_get,
1011 .put = netns_put, 1036 .put = netns_put,
1012 .install = netns_install, 1037 .install = netns_install,
1038 .owner = netns_owner,
1013}; 1039};
1014#endif 1040#endif
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e0c71bd8f7cf..919981324171 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
27#endif 27#endif
28 28
29static struct ctl_table_set * 29static struct ctl_table_set *
30net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) 30net_ctl_header_lookup(struct ctl_table_root *root)
31{ 31{
32 return &namespaces->net_ns->sysctls; 32 return &current->nsproxy->net_ns->sysctls;
33} 33}
34 34
35static int is_seen(struct ctl_table_set *set) 35static int is_seen(struct ctl_table_set *set)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index ff9e5f20a5a7..f770dba2a6f6 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -15,6 +15,7 @@ TARGETS += memory-hotplug
15TARGETS += mount 15TARGETS += mount
16TARGETS += mqueue 16TARGETS += mqueue
17TARGETS += net 17TARGETS += net
18TARGETS += nsfs
18TARGETS += powerpc 19TARGETS += powerpc
19TARGETS += pstore 20TARGETS += pstore
20TARGETS += ptrace 21TARGETS += ptrace
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
new file mode 100644
index 000000000000..2306054a901a
--- /dev/null
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -0,0 +1,12 @@
1TEST_PROGS := owner pidns
2
3CFLAGS := -Wall -Werror
4
5all: owner pidns
6owner: owner.c
7pidns: pidns.c
8
9clean:
10 $(RM) owner pidns
11
12include ../lib.mk
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c
new file mode 100644
index 000000000000..437205f8b714
--- /dev/null
+++ b/tools/testing/selftests/nsfs/owner.c
@@ -0,0 +1,91 @@
1#define _GNU_SOURCE
2#include <sched.h>
3#include <unistd.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <signal.h>
7#include <errno.h>
8#include <sys/types.h>
9#include <sys/stat.h>
10#include <fcntl.h>
11#include <sys/ioctl.h>
12#include <sys/prctl.h>
13#include <sys/wait.h>
14
15#define NSIO 0xb7
16#define NS_GET_USERNS _IO(NSIO, 0x1)
17
18#define pr_err(fmt, ...) \
19 ({ \
20 fprintf(stderr, "%s:%d:" fmt ": %m\n", \
21 __func__, __LINE__, ##__VA_ARGS__); \
22 1; \
23 })
24
25int main(int argc, char *argvp[])
26{
27 int pfd[2], ns, uns, init_uns;
28 struct stat st1, st2;
29 char path[128];
30 pid_t pid;
31 char c;
32
33 if (pipe(pfd))
34 return 1;
35
36 pid = fork();
37 if (pid < 0)
38 return pr_err("fork");
39 if (pid == 0) {
40 prctl(PR_SET_PDEATHSIG, SIGKILL);
41 if (unshare(CLONE_NEWUTS | CLONE_NEWUSER))
42 return pr_err("unshare");
43 close(pfd[0]);
44 close(pfd[1]);
45 while (1)
46 sleep(1);
47 return 0;
48 }
49 close(pfd[1]);
50 if (read(pfd[0], &c, 1) != 0)
51 return pr_err("Unable to read from pipe");
52 close(pfd[0]);
53
54 snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
55 ns = open(path, O_RDONLY);
56 if (ns < 0)
57 return pr_err("Unable to open %s", path);
58
59 uns = ioctl(ns, NS_GET_USERNS);
60 if (uns < 0)
61 return pr_err("Unable to get an owning user namespace");
62
63 if (fstat(uns, &st1))
64 return pr_err("fstat");
65
66 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
67 if (stat(path, &st2))
68 return pr_err("stat");
69
70 if (st1.st_ino != st2.st_ino)
71 return pr_err("NS_GET_USERNS returned a wrong namespace");
72
73 init_uns = ioctl(uns, NS_GET_USERNS);
74 if (uns < 0)
75 return pr_err("Unable to get an owning user namespace");
76
77 if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
78 return pr_err("Don't get EPERM");
79
80 if (unshare(CLONE_NEWUSER))
81 return pr_err("unshare");
82
83 if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM)
84 return pr_err("Don't get EPERM");
85 if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
86 return pr_err("Don't get EPERM");
87
88 kill(pid, SIGKILL);
89 wait(NULL);
90 return 0;
91}
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
new file mode 100644
index 000000000000..ae3a0d68e966
--- /dev/null
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -0,0 +1,78 @@
1#define _GNU_SOURCE
2#include <sched.h>
3#include <unistd.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <signal.h>
7#include <errno.h>
8#include <sys/types.h>
9#include <sys/stat.h>
10#include <fcntl.h>
11#include <sys/ioctl.h>
12#include <sys/prctl.h>
13#include <sys/wait.h>
14
15#define pr_err(fmt, ...) \
16 ({ \
17 fprintf(stderr, "%s:%d:" fmt ": %m\n", \
18 __func__, __LINE__, ##__VA_ARGS__); \
19 1; \
20 })
21
22#define NSIO 0xb7
23#define NS_GET_USERNS _IO(NSIO, 0x1)
24#define NS_GET_PARENT _IO(NSIO, 0x2)
25
26#define __stack_aligned__ __attribute__((aligned(16)))
27struct cr_clone_arg {
28 char stack[128] __stack_aligned__;
29 char stack_ptr[0];
30};
31
32static int child(void *args)
33{
34 prctl(PR_SET_PDEATHSIG, SIGKILL);
35 while (1)
36 sleep(1);
37 exit(0);
38}
39
40int main(int argc, char *argv[])
41{
42 char *ns_strs[] = {"pid", "user"};
43 char path[] = "/proc/0123456789/ns/pid";
44 struct cr_clone_arg ca;
45 struct stat st1, st2;
46 int ns, pns, i;
47 pid_t pid;
48
49 pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL);
50 if (pid < 0)
51 return pr_err("clone");
52
53 for (i = 0; i < 2; i++) {
54 snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]);
55 ns = open(path, O_RDONLY);
56 if (ns < 0)
57 return pr_err("Unable to open %s", path);
58
59 pns = ioctl(ns, NS_GET_PARENT);
60 if (pns < 0)
61 return pr_err("Unable to get a parent pidns");
62
63 snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]);
64 if (stat(path, &st2))
65 return pr_err("Unable to stat %s", path);
66 if (fstat(pns, &st1))
67 return pr_err("Unable to stat the parent pidns");
68 if (st1.st_ino != st2.st_ino)
69 return pr_err("NS_GET_PARENT returned a wrong namespace");
70
71 if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
72 return pr_err("Don't get EPERM");;
73 }
74
75 kill(pid, SIGKILL);
76 wait(NULL);
77 return 0;
78}