diff options
35 files changed, 1007 insertions, 71 deletions
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README index 8c3306e01d52..91f54ffa0077 100644 --- a/Documentation/sysctl/README +++ b/Documentation/sysctl/README | |||
@@ -69,6 +69,7 @@ proc/ <empty> | |||
69 | sunrpc/ SUN Remote Procedure Call (NFS) | 69 | sunrpc/ SUN Remote Procedure Call (NFS) |
70 | vm/ memory management tuning | 70 | vm/ memory management tuning |
71 | buffer and cache management | 71 | buffer and cache management |
72 | user/ Per user per user namespace limits | ||
72 | 73 | ||
73 | These are the subdirs I have on my system. There might be more | 74 | These are the subdirs I have on my system. There might be more |
74 | or other subdirs in another setup. If you see another dir, I'd | 75 | or other subdirs in another setup. If you see another dir, I'd |
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 302b5ed616a6..35e17f748ca7 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -265,6 +265,13 @@ aio-nr can grow to. | |||
265 | 265 | ||
266 | ============================================================== | 266 | ============================================================== |
267 | 267 | ||
268 | mount-max: | ||
269 | |||
270 | This denotes the maximum number of mounts that may exist | ||
271 | in a mount namespace. | ||
272 | |||
273 | ============================================================== | ||
274 | |||
268 | 275 | ||
269 | 2. /proc/sys/fs/binfmt_misc | 276 | 2. /proc/sys/fs/binfmt_misc |
270 | ---------------------------------------------------------- | 277 | ---------------------------------------------------------- |
diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt new file mode 100644 index 000000000000..1291c498f78f --- /dev/null +++ b/Documentation/sysctl/user.txt | |||
@@ -0,0 +1,66 @@ | |||
1 | Documentation for /proc/sys/user/* kernel version 4.9.0 | ||
2 | (c) 2016 Eric Biederman <ebiederm@xmission.com> | ||
3 | |||
4 | ============================================================== | ||
5 | |||
6 | This file contains the documetation for the sysctl files in | ||
7 | /proc/sys/user. | ||
8 | |||
9 | The files in this directory can be used to override the default | ||
10 | limits on the number of namespaces and other objects that have | ||
11 | per user per user namespace limits. | ||
12 | |||
13 | The primary purpose of these limits is to stop programs that | ||
14 | malfunction and attempt to create a ridiculous number of objects, | ||
15 | before the malfunction becomes a system wide problem. It is the | ||
16 | intention that the defaults of these limits are set high enough that | ||
17 | no program in normal operation should run into these limits. | ||
18 | |||
19 | The creation of per user per user namespace objects are charged to | ||
20 | the user in the user namespace who created the object and | ||
21 | verified to be below the per user limit in that user namespace. | ||
22 | |||
23 | The creation of objects is also charged to all of the users | ||
24 | who created user namespaces the creation of the object happens | ||
25 | in (user namespaces can be nested) and verified to be below the per user | ||
26 | limits in the user namespaces of those users. | ||
27 | |||
28 | This recursive counting of created objects ensures that creating a | ||
29 | user namespace does not allow a user to escape their current limits. | ||
30 | |||
31 | Currently, these files are in /proc/sys/user: | ||
32 | |||
33 | - max_cgroup_namespaces | ||
34 | |||
35 | The maximum number of cgroup namespaces that any user in the current | ||
36 | user namespace may create. | ||
37 | |||
38 | - max_ipc_namespaces | ||
39 | |||
40 | The maximum number of ipc namespaces that any user in the current | ||
41 | user namespace may create. | ||
42 | |||
43 | - max_mnt_namespaces | ||
44 | |||
45 | The maximum number of mount namespaces that any user in the current | ||
46 | user namespace may create. | ||
47 | |||
48 | - max_net_namespaces | ||
49 | |||
50 | The maximum number of network namespaces that any user in the | ||
51 | current user namespace may create. | ||
52 | |||
53 | - max_pid_namespaces | ||
54 | |||
55 | The maximum number of pid namespaces that any user in the current | ||
56 | user namespace may create. | ||
57 | |||
58 | - max_user_namespaces | ||
59 | |||
60 | The maximum number of user namespaces that any user in the current | ||
61 | user namespace may create. | ||
62 | |||
63 | - max_uts_namespaces | ||
64 | |||
65 | The maximum number of user namespaces that any user in the current | ||
66 | user namespace may create. | ||
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 431fd7ee3488..e44271dfceb6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
431 | memcpy(&wq->name, &qstr, sizeof(struct qstr)); | 431 | memcpy(&wq->name, &qstr, sizeof(struct qstr)); |
432 | wq->dev = autofs4_get_dev(sbi); | 432 | wq->dev = autofs4_get_dev(sbi); |
433 | wq->ino = autofs4_get_ino(sbi); | 433 | wq->ino = autofs4_get_ino(sbi); |
434 | wq->uid = current_uid(); | 434 | wq->uid = current_real_cred()->uid; |
435 | wq->gid = current_gid(); | 435 | wq->gid = current_real_cred()->gid; |
436 | wq->pid = pid; | 436 | wq->pid = pid; |
437 | wq->tgid = tgid; | 437 | wq->tgid = tgid; |
438 | wq->status = -EINTR; /* Status return if interrupted */ | 438 | wq->status = -EINTR; /* Status return if interrupted */ |
diff --git a/fs/mount.h b/fs/mount.h index 14db05d424f7..d2e25d7b64b3 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -10,9 +10,12 @@ struct mnt_namespace { | |||
10 | struct mount * root; | 10 | struct mount * root; |
11 | struct list_head list; | 11 | struct list_head list; |
12 | struct user_namespace *user_ns; | 12 | struct user_namespace *user_ns; |
13 | struct ucounts *ucounts; | ||
13 | u64 seq; /* Sequence number to prevent loops */ | 14 | u64 seq; /* Sequence number to prevent loops */ |
14 | wait_queue_head_t poll; | 15 | wait_queue_head_t poll; |
15 | u64 event; | 16 | u64 event; |
17 | unsigned int mounts; /* # of mounts in the namespace */ | ||
18 | unsigned int pending_mounts; | ||
16 | }; | 19 | }; |
17 | 20 | ||
18 | struct mnt_pcp { | 21 | struct mnt_pcp { |
diff --git a/fs/namespace.c b/fs/namespace.c index 7bb2cda3bfef..db1b5a38864e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -27,6 +27,9 @@ | |||
27 | #include "pnode.h" | 27 | #include "pnode.h" |
28 | #include "internal.h" | 28 | #include "internal.h" |
29 | 29 | ||
30 | /* Maximum number of mounts in a mount namespace */ | ||
31 | unsigned int sysctl_mount_max __read_mostly = 100000; | ||
32 | |||
30 | static unsigned int m_hash_mask __read_mostly; | 33 | static unsigned int m_hash_mask __read_mostly; |
31 | static unsigned int m_hash_shift __read_mostly; | 34 | static unsigned int m_hash_shift __read_mostly; |
32 | static unsigned int mp_hash_mask __read_mostly; | 35 | static unsigned int mp_hash_mask __read_mostly; |
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) | |||
899 | 902 | ||
900 | list_splice(&head, n->list.prev); | 903 | list_splice(&head, n->list.prev); |
901 | 904 | ||
905 | n->mounts += n->pending_mounts; | ||
906 | n->pending_mounts = 0; | ||
907 | |||
902 | attach_shadowed(mnt, parent, shadows); | 908 | attach_shadowed(mnt, parent, shadows); |
903 | touch_mnt_namespace(n); | 909 | touch_mnt_namespace(n); |
904 | } | 910 | } |
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) | |||
1419 | propagate_umount(&tmp_list); | 1425 | propagate_umount(&tmp_list); |
1420 | 1426 | ||
1421 | while (!list_empty(&tmp_list)) { | 1427 | while (!list_empty(&tmp_list)) { |
1428 | struct mnt_namespace *ns; | ||
1422 | bool disconnect; | 1429 | bool disconnect; |
1423 | p = list_first_entry(&tmp_list, struct mount, mnt_list); | 1430 | p = list_first_entry(&tmp_list, struct mount, mnt_list); |
1424 | list_del_init(&p->mnt_expire); | 1431 | list_del_init(&p->mnt_expire); |
1425 | list_del_init(&p->mnt_list); | 1432 | list_del_init(&p->mnt_list); |
1426 | __touch_mnt_namespace(p->mnt_ns); | 1433 | ns = p->mnt_ns; |
1434 | if (ns) { | ||
1435 | ns->mounts--; | ||
1436 | __touch_mnt_namespace(ns); | ||
1437 | } | ||
1427 | p->mnt_ns = NULL; | 1438 | p->mnt_ns = NULL; |
1428 | if (how & UMOUNT_SYNC) | 1439 | if (how & UMOUNT_SYNC) |
1429 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | 1440 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse) | |||
1840 | return 0; | 1851 | return 0; |
1841 | } | 1852 | } |
1842 | 1853 | ||
1854 | int count_mounts(struct mnt_namespace *ns, struct mount *mnt) | ||
1855 | { | ||
1856 | unsigned int max = READ_ONCE(sysctl_mount_max); | ||
1857 | unsigned int mounts = 0, old, pending, sum; | ||
1858 | struct mount *p; | ||
1859 | |||
1860 | for (p = mnt; p; p = next_mnt(p, mnt)) | ||
1861 | mounts++; | ||
1862 | |||
1863 | old = ns->mounts; | ||
1864 | pending = ns->pending_mounts; | ||
1865 | sum = old + pending; | ||
1866 | if ((old > sum) || | ||
1867 | (pending > sum) || | ||
1868 | (max < sum) || | ||
1869 | (mounts > (max - sum))) | ||
1870 | return -ENOSPC; | ||
1871 | |||
1872 | ns->pending_mounts = pending + mounts; | ||
1873 | return 0; | ||
1874 | } | ||
1875 | |||
1843 | /* | 1876 | /* |
1844 | * @source_mnt : mount tree to be attached | 1877 | * @source_mnt : mount tree to be attached |
1845 | * @nd : place the mount tree @source_mnt is attached | 1878 | * @nd : place the mount tree @source_mnt is attached |
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1909 | struct path *parent_path) | 1942 | struct path *parent_path) |
1910 | { | 1943 | { |
1911 | HLIST_HEAD(tree_list); | 1944 | HLIST_HEAD(tree_list); |
1945 | struct mnt_namespace *ns = dest_mnt->mnt_ns; | ||
1912 | struct mount *child, *p; | 1946 | struct mount *child, *p; |
1913 | struct hlist_node *n; | 1947 | struct hlist_node *n; |
1914 | int err; | 1948 | int err; |
1915 | 1949 | ||
1950 | /* Is there space to add these mounts to the mount namespace? */ | ||
1951 | if (!parent_path) { | ||
1952 | err = count_mounts(ns, source_mnt); | ||
1953 | if (err) | ||
1954 | goto out; | ||
1955 | } | ||
1956 | |||
1916 | if (IS_MNT_SHARED(dest_mnt)) { | 1957 | if (IS_MNT_SHARED(dest_mnt)) { |
1917 | err = invent_group_ids(source_mnt, true); | 1958 | err = invent_group_ids(source_mnt, true); |
1918 | if (err) | 1959 | if (err) |
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1949 | out_cleanup_ids: | 1990 | out_cleanup_ids: |
1950 | while (!hlist_empty(&tree_list)) { | 1991 | while (!hlist_empty(&tree_list)) { |
1951 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); | 1992 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); |
1993 | child->mnt_parent->mnt_ns->pending_mounts = 0; | ||
1952 | umount_tree(child, UMOUNT_SYNC); | 1994 | umount_tree(child, UMOUNT_SYNC); |
1953 | } | 1995 | } |
1954 | unlock_mount_hash(); | 1996 | unlock_mount_hash(); |
1955 | cleanup_group_ids(source_mnt, NULL); | 1997 | cleanup_group_ids(source_mnt, NULL); |
1956 | out: | 1998 | out: |
1999 | ns->pending_mounts = 0; | ||
1957 | return err; | 2000 | return err; |
1958 | } | 2001 | } |
1959 | 2002 | ||
@@ -2719,9 +2762,20 @@ dput_out: | |||
2719 | return retval; | 2762 | return retval; |
2720 | } | 2763 | } |
2721 | 2764 | ||
2765 | static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) | ||
2766 | { | ||
2767 | return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES); | ||
2768 | } | ||
2769 | |||
2770 | static void dec_mnt_namespaces(struct ucounts *ucounts) | ||
2771 | { | ||
2772 | dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES); | ||
2773 | } | ||
2774 | |||
2722 | static void free_mnt_ns(struct mnt_namespace *ns) | 2775 | static void free_mnt_ns(struct mnt_namespace *ns) |
2723 | { | 2776 | { |
2724 | ns_free_inum(&ns->ns); | 2777 | ns_free_inum(&ns->ns); |
2778 | dec_mnt_namespaces(ns->ucounts); | ||
2725 | put_user_ns(ns->user_ns); | 2779 | put_user_ns(ns->user_ns); |
2726 | kfree(ns); | 2780 | kfree(ns); |
2727 | } | 2781 | } |
@@ -2738,14 +2792,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); | |||
2738 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | 2792 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) |
2739 | { | 2793 | { |
2740 | struct mnt_namespace *new_ns; | 2794 | struct mnt_namespace *new_ns; |
2795 | struct ucounts *ucounts; | ||
2741 | int ret; | 2796 | int ret; |
2742 | 2797 | ||
2798 | ucounts = inc_mnt_namespaces(user_ns); | ||
2799 | if (!ucounts) | ||
2800 | return ERR_PTR(-ENOSPC); | ||
2801 | |||
2743 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 2802 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
2744 | if (!new_ns) | 2803 | if (!new_ns) { |
2804 | dec_mnt_namespaces(ucounts); | ||
2745 | return ERR_PTR(-ENOMEM); | 2805 | return ERR_PTR(-ENOMEM); |
2806 | } | ||
2746 | ret = ns_alloc_inum(&new_ns->ns); | 2807 | ret = ns_alloc_inum(&new_ns->ns); |
2747 | if (ret) { | 2808 | if (ret) { |
2748 | kfree(new_ns); | 2809 | kfree(new_ns); |
2810 | dec_mnt_namespaces(ucounts); | ||
2749 | return ERR_PTR(ret); | 2811 | return ERR_PTR(ret); |
2750 | } | 2812 | } |
2751 | new_ns->ns.ops = &mntns_operations; | 2813 | new_ns->ns.ops = &mntns_operations; |
@@ -2756,6 +2818,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | |||
2756 | init_waitqueue_head(&new_ns->poll); | 2818 | init_waitqueue_head(&new_ns->poll); |
2757 | new_ns->event = 0; | 2819 | new_ns->event = 0; |
2758 | new_ns->user_ns = get_user_ns(user_ns); | 2820 | new_ns->user_ns = get_user_ns(user_ns); |
2821 | new_ns->ucounts = ucounts; | ||
2822 | new_ns->mounts = 0; | ||
2823 | new_ns->pending_mounts = 0; | ||
2759 | return new_ns; | 2824 | return new_ns; |
2760 | } | 2825 | } |
2761 | 2826 | ||
@@ -2805,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
2805 | q = new; | 2870 | q = new; |
2806 | while (p) { | 2871 | while (p) { |
2807 | q->mnt_ns = new_ns; | 2872 | q->mnt_ns = new_ns; |
2873 | new_ns->mounts++; | ||
2808 | if (new_fs) { | 2874 | if (new_fs) { |
2809 | if (&p->mnt == new_fs->root.mnt) { | 2875 | if (&p->mnt == new_fs->root.mnt) { |
2810 | new_fs->root.mnt = mntget(&q->mnt); | 2876 | new_fs->root.mnt = mntget(&q->mnt); |
@@ -2843,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) | |||
2843 | struct mount *mnt = real_mount(m); | 2909 | struct mount *mnt = real_mount(m); |
2844 | mnt->mnt_ns = new_ns; | 2910 | mnt->mnt_ns = new_ns; |
2845 | new_ns->root = mnt; | 2911 | new_ns->root = mnt; |
2912 | new_ns->mounts++; | ||
2846 | list_add(&mnt->mnt_list, &new_ns->list); | 2913 | list_add(&mnt->mnt_list, &new_ns->list); |
2847 | } else { | 2914 | } else { |
2848 | mntput(m); | 2915 | mntput(m); |
@@ -3348,10 +3415,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) | |||
3348 | return 0; | 3415 | return 0; |
3349 | } | 3416 | } |
3350 | 3417 | ||
3418 | static struct user_namespace *mntns_owner(struct ns_common *ns) | ||
3419 | { | ||
3420 | return to_mnt_ns(ns)->user_ns; | ||
3421 | } | ||
3422 | |||
3351 | const struct proc_ns_operations mntns_operations = { | 3423 | const struct proc_ns_operations mntns_operations = { |
3352 | .name = "mnt", | 3424 | .name = "mnt", |
3353 | .type = CLONE_NEWNS, | 3425 | .type = CLONE_NEWNS, |
3354 | .get = mntns_get, | 3426 | .get = mntns_get, |
3355 | .put = mntns_put, | 3427 | .put = mntns_put, |
3356 | .install = mntns_install, | 3428 | .install = mntns_install, |
3429 | .owner = mntns_owner, | ||
3357 | }; | 3430 | }; |
@@ -5,11 +5,16 @@ | |||
5 | #include <linux/magic.h> | 5 | #include <linux/magic.h> |
6 | #include <linux/ktime.h> | 6 | #include <linux/ktime.h> |
7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
8 | #include <linux/user_namespace.h> | ||
9 | #include <linux/nsfs.h> | ||
8 | 10 | ||
9 | static struct vfsmount *nsfs_mnt; | 11 | static struct vfsmount *nsfs_mnt; |
10 | 12 | ||
13 | static long ns_ioctl(struct file *filp, unsigned int ioctl, | ||
14 | unsigned long arg); | ||
11 | static const struct file_operations ns_file_operations = { | 15 | static const struct file_operations ns_file_operations = { |
12 | .llseek = no_llseek, | 16 | .llseek = no_llseek, |
17 | .unlocked_ioctl = ns_ioctl, | ||
13 | }; | 18 | }; |
14 | 19 | ||
15 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | 20 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) |
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode) | |||
44 | ns->ops->put(ns); | 49 | ns->ops->put(ns); |
45 | } | 50 | } |
46 | 51 | ||
47 | void *ns_get_path(struct path *path, struct task_struct *task, | 52 | static void *__ns_get_path(struct path *path, struct ns_common *ns) |
48 | const struct proc_ns_operations *ns_ops) | ||
49 | { | 53 | { |
50 | struct vfsmount *mnt = mntget(nsfs_mnt); | 54 | struct vfsmount *mnt = nsfs_mnt; |
51 | struct qstr qname = { .name = "", }; | 55 | struct qstr qname = { .name = "", }; |
52 | struct dentry *dentry; | 56 | struct dentry *dentry; |
53 | struct inode *inode; | 57 | struct inode *inode; |
54 | struct ns_common *ns; | ||
55 | unsigned long d; | 58 | unsigned long d; |
56 | 59 | ||
57 | again: | ||
58 | ns = ns_ops->get(task); | ||
59 | if (!ns) { | ||
60 | mntput(mnt); | ||
61 | return ERR_PTR(-ENOENT); | ||
62 | } | ||
63 | rcu_read_lock(); | 60 | rcu_read_lock(); |
64 | d = atomic_long_read(&ns->stashed); | 61 | d = atomic_long_read(&ns->stashed); |
65 | if (!d) | 62 | if (!d) |
@@ -68,17 +65,16 @@ again: | |||
68 | if (!lockref_get_not_dead(&dentry->d_lockref)) | 65 | if (!lockref_get_not_dead(&dentry->d_lockref)) |
69 | goto slow; | 66 | goto slow; |
70 | rcu_read_unlock(); | 67 | rcu_read_unlock(); |
71 | ns_ops->put(ns); | 68 | ns->ops->put(ns); |
72 | got_it: | 69 | got_it: |
73 | path->mnt = mnt; | 70 | path->mnt = mntget(mnt); |
74 | path->dentry = dentry; | 71 | path->dentry = dentry; |
75 | return NULL; | 72 | return NULL; |
76 | slow: | 73 | slow: |
77 | rcu_read_unlock(); | 74 | rcu_read_unlock(); |
78 | inode = new_inode_pseudo(mnt->mnt_sb); | 75 | inode = new_inode_pseudo(mnt->mnt_sb); |
79 | if (!inode) { | 76 | if (!inode) { |
80 | ns_ops->put(ns); | 77 | ns->ops->put(ns); |
81 | mntput(mnt); | ||
82 | return ERR_PTR(-ENOMEM); | 78 | return ERR_PTR(-ENOMEM); |
83 | } | 79 | } |
84 | inode->i_ino = ns->inum; | 80 | inode->i_ino = ns->inum; |
@@ -91,21 +87,96 @@ slow: | |||
91 | dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); | 87 | dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); |
92 | if (!dentry) { | 88 | if (!dentry) { |
93 | iput(inode); | 89 | iput(inode); |
94 | mntput(mnt); | ||
95 | return ERR_PTR(-ENOMEM); | 90 | return ERR_PTR(-ENOMEM); |
96 | } | 91 | } |
97 | d_instantiate(dentry, inode); | 92 | d_instantiate(dentry, inode); |
98 | dentry->d_fsdata = (void *)ns_ops; | 93 | dentry->d_fsdata = (void *)ns->ops; |
99 | d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); | 94 | d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); |
100 | if (d) { | 95 | if (d) { |
101 | d_delete(dentry); /* make sure ->d_prune() does nothing */ | 96 | d_delete(dentry); /* make sure ->d_prune() does nothing */ |
102 | dput(dentry); | 97 | dput(dentry); |
103 | cpu_relax(); | 98 | cpu_relax(); |
104 | goto again; | 99 | return ERR_PTR(-EAGAIN); |
105 | } | 100 | } |
106 | goto got_it; | 101 | goto got_it; |
107 | } | 102 | } |
108 | 103 | ||
104 | void *ns_get_path(struct path *path, struct task_struct *task, | ||
105 | const struct proc_ns_operations *ns_ops) | ||
106 | { | ||
107 | struct ns_common *ns; | ||
108 | void *ret; | ||
109 | |||
110 | again: | ||
111 | ns = ns_ops->get(task); | ||
112 | if (!ns) | ||
113 | return ERR_PTR(-ENOENT); | ||
114 | |||
115 | ret = __ns_get_path(path, ns); | ||
116 | if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN) | ||
117 | goto again; | ||
118 | return ret; | ||
119 | } | ||
120 | |||
121 | static int open_related_ns(struct ns_common *ns, | ||
122 | struct ns_common *(*get_ns)(struct ns_common *ns)) | ||
123 | { | ||
124 | struct path path = {}; | ||
125 | struct file *f; | ||
126 | void *err; | ||
127 | int fd; | ||
128 | |||
129 | fd = get_unused_fd_flags(O_CLOEXEC); | ||
130 | if (fd < 0) | ||
131 | return fd; | ||
132 | |||
133 | while (1) { | ||
134 | struct ns_common *relative; | ||
135 | |||
136 | relative = get_ns(ns); | ||
137 | if (IS_ERR(relative)) { | ||
138 | put_unused_fd(fd); | ||
139 | return PTR_ERR(relative); | ||
140 | } | ||
141 | |||
142 | err = __ns_get_path(&path, relative); | ||
143 | if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN) | ||
144 | continue; | ||
145 | break; | ||
146 | } | ||
147 | if (IS_ERR(err)) { | ||
148 | put_unused_fd(fd); | ||
149 | return PTR_ERR(err); | ||
150 | } | ||
151 | |||
152 | f = dentry_open(&path, O_RDONLY, current_cred()); | ||
153 | path_put(&path); | ||
154 | if (IS_ERR(f)) { | ||
155 | put_unused_fd(fd); | ||
156 | fd = PTR_ERR(f); | ||
157 | } else | ||
158 | fd_install(fd, f); | ||
159 | |||
160 | return fd; | ||
161 | } | ||
162 | |||
163 | static long ns_ioctl(struct file *filp, unsigned int ioctl, | ||
164 | unsigned long arg) | ||
165 | { | ||
166 | struct ns_common *ns = get_proc_ns(file_inode(filp)); | ||
167 | |||
168 | switch (ioctl) { | ||
169 | case NS_GET_USERNS: | ||
170 | return open_related_ns(ns, ns_get_owner); | ||
171 | case NS_GET_PARENT: | ||
172 | if (!ns->ops->get_parent) | ||
173 | return -EINVAL; | ||
174 | return open_related_ns(ns, ns->ops->get_parent); | ||
175 | default: | ||
176 | return -ENOTTY; | ||
177 | } | ||
178 | } | ||
179 | |||
109 | int ns_get_name(char *buf, size_t size, struct task_struct *task, | 180 | int ns_get_name(char *buf, size_t size, struct task_struct *task, |
110 | const struct proc_ns_operations *ns_ops) | 181 | const struct proc_ns_operations *ns_ops) |
111 | { | 182 | { |
diff --git a/fs/pnode.c b/fs/pnode.c index 99899705b105..234a9ac49958 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m) | |||
259 | read_sequnlock_excl(&mount_lock); | 259 | read_sequnlock_excl(&mount_lock); |
260 | } | 260 | } |
261 | hlist_add_head(&child->mnt_hash, list); | 261 | hlist_add_head(&child->mnt_hash, list); |
262 | return 0; | 262 | return count_mounts(m->mnt_ns, child); |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
diff --git a/fs/pnode.h b/fs/pnode.h index 0fcdbe7ca648..550f5a8b4fcf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *, | |||
52 | struct mount *copy_tree(struct mount *, struct dentry *, int); | 52 | struct mount *copy_tree(struct mount *, struct dentry *, int); |
53 | bool is_path_reachable(struct mount *, struct dentry *, | 53 | bool is_path_reachable(struct mount *, struct dentry *, |
54 | const struct path *root); | 54 | const struct path *root); |
55 | int count_mounts(struct mnt_namespace *ns, struct mount *mnt); | ||
55 | #endif /* _LINUX_PNODE_H */ | 56 | #endif /* _LINUX_PNODE_H */ |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 2ed3d71d4767..71025b9e2a4e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock); | |||
72 | 72 | ||
73 | static void drop_sysctl_table(struct ctl_table_header *header); | 73 | static void drop_sysctl_table(struct ctl_table_header *header); |
74 | static int sysctl_follow_link(struct ctl_table_header **phead, | 74 | static int sysctl_follow_link(struct ctl_table_header **phead, |
75 | struct ctl_table **pentry, struct nsproxy *namespaces); | 75 | struct ctl_table **pentry); |
76 | static int insert_links(struct ctl_table_header *head); | 76 | static int insert_links(struct ctl_table_header *head); |
77 | static void put_links(struct ctl_table_header *header); | 77 | static void put_links(struct ctl_table_header *header); |
78 | 78 | ||
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head) | |||
319 | } | 319 | } |
320 | 320 | ||
321 | static struct ctl_table_set * | 321 | static struct ctl_table_set * |
322 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | 322 | lookup_header_set(struct ctl_table_root *root) |
323 | { | 323 | { |
324 | struct ctl_table_set *set = &root->default_set; | 324 | struct ctl_table_set *set = &root->default_set; |
325 | if (root->lookup) | 325 | if (root->lookup) |
326 | set = root->lookup(root, namespaces); | 326 | set = root->lookup(root); |
327 | return set; | 327 | return set; |
328 | } | 328 | } |
329 | 329 | ||
@@ -496,7 +496,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | |||
496 | goto out; | 496 | goto out; |
497 | 497 | ||
498 | if (S_ISLNK(p->mode)) { | 498 | if (S_ISLNK(p->mode)) { |
499 | ret = sysctl_follow_link(&h, &p, current->nsproxy); | 499 | ret = sysctl_follow_link(&h, &p); |
500 | err = ERR_PTR(ret); | 500 | err = ERR_PTR(ret); |
501 | if (ret) | 501 | if (ret) |
502 | goto out; | 502 | goto out; |
@@ -664,7 +664,7 @@ static bool proc_sys_link_fill_cache(struct file *file, | |||
664 | 664 | ||
665 | if (S_ISLNK(table->mode)) { | 665 | if (S_ISLNK(table->mode)) { |
666 | /* It is not an error if we can not follow the link ignore it */ | 666 | /* It is not an error if we can not follow the link ignore it */ |
667 | int err = sysctl_follow_link(&head, &table, current->nsproxy); | 667 | int err = sysctl_follow_link(&head, &table); |
668 | if (err) | 668 | if (err) |
669 | goto out; | 669 | goto out; |
670 | } | 670 | } |
@@ -981,7 +981,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) | |||
981 | } | 981 | } |
982 | 982 | ||
983 | static int sysctl_follow_link(struct ctl_table_header **phead, | 983 | static int sysctl_follow_link(struct ctl_table_header **phead, |
984 | struct ctl_table **pentry, struct nsproxy *namespaces) | 984 | struct ctl_table **pentry) |
985 | { | 985 | { |
986 | struct ctl_table_header *head; | 986 | struct ctl_table_header *head; |
987 | struct ctl_table_root *root; | 987 | struct ctl_table_root *root; |
@@ -993,7 +993,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, | |||
993 | ret = 0; | 993 | ret = 0; |
994 | spin_lock(&sysctl_lock); | 994 | spin_lock(&sysctl_lock); |
995 | root = (*pentry)->data; | 995 | root = (*pentry)->data; |
996 | set = lookup_header_set(root, namespaces); | 996 | set = lookup_header_set(root); |
997 | dir = xlate_dir(set, (*phead)->parent); | 997 | dir = xlate_dir(set, (*phead)->parent); |
998 | if (IS_ERR(dir)) | 998 | if (IS_ERR(dir)) |
999 | ret = PTR_ERR(dir); | 999 | ret = PTR_ERR(dir); |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a4414a11eea7..440a72164a11 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -644,6 +644,7 @@ struct cgroup_namespace { | |||
644 | atomic_t count; | 644 | atomic_t count; |
645 | struct ns_common ns; | 645 | struct ns_common ns; |
646 | struct user_namespace *user_ns; | 646 | struct user_namespace *user_ns; |
647 | struct ucounts *ucounts; | ||
647 | struct css_set *root_cset; | 648 | struct css_set *root_cset; |
648 | }; | 649 | }; |
649 | 650 | ||
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index d10e54f03c09..848e5796400e 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h | |||
@@ -58,6 +58,7 @@ struct ipc_namespace { | |||
58 | 58 | ||
59 | /* user_ns which owns the ipc ns */ | 59 | /* user_ns which owns the ipc ns */ |
60 | struct user_namespace *user_ns; | 60 | struct user_namespace *user_ns; |
61 | struct ucounts *ucounts; | ||
61 | 62 | ||
62 | struct ns_common ns; | 63 | struct ns_common ns; |
63 | }; | 64 | }; |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 54a594d49733..1172cce949a4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); | |||
96 | 96 | ||
97 | extern dev_t name_to_dev_t(const char *name); | 97 | extern dev_t name_to_dev_t(const char *name); |
98 | 98 | ||
99 | extern unsigned int sysctl_mount_max; | ||
100 | |||
99 | #endif /* _LINUX_MOUNT_H */ | 101 | #endif /* _LINUX_MOUNT_H */ |
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 918b117a7cd3..34cce96741bc 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h | |||
@@ -40,6 +40,7 @@ struct pid_namespace { | |||
40 | struct fs_pin *bacct; | 40 | struct fs_pin *bacct; |
41 | #endif | 41 | #endif |
42 | struct user_namespace *user_ns; | 42 | struct user_namespace *user_ns; |
43 | struct ucounts *ucounts; | ||
43 | struct work_struct proc_work; | 44 | struct work_struct proc_work; |
44 | kgid_t pid_gid; | 45 | kgid_t pid_gid; |
45 | int hide_pid; | 46 | int hide_pid; |
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index de0e7719d4c5..12cb8bd81d2d 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h | |||
@@ -18,6 +18,8 @@ struct proc_ns_operations { | |||
18 | struct ns_common *(*get)(struct task_struct *task); | 18 | struct ns_common *(*get)(struct task_struct *task); |
19 | void (*put)(struct ns_common *ns); | 19 | void (*put)(struct ns_common *ns); |
20 | int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); | 20 | int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); |
21 | struct user_namespace *(*owner)(struct ns_common *ns); | ||
22 | struct ns_common *(*get_parent)(struct ns_common *ns); | ||
21 | }; | 23 | }; |
22 | 24 | ||
23 | extern const struct proc_ns_operations netns_operations; | 25 | extern const struct proc_ns_operations netns_operations; |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ecc3e07c6e63..adf4e51cf597 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -158,8 +158,7 @@ struct ctl_table_set { | |||
158 | 158 | ||
159 | struct ctl_table_root { | 159 | struct ctl_table_root { |
160 | struct ctl_table_set default_set; | 160 | struct ctl_table_set default_set; |
161 | struct ctl_table_set *(*lookup)(struct ctl_table_root *root, | 161 | struct ctl_table_set *(*lookup)(struct ctl_table_root *root); |
162 | struct nsproxy *namespaces); | ||
163 | void (*set_ownership)(struct ctl_table_header *head, | 162 | void (*set_ownership)(struct ctl_table_header *head, |
164 | struct ctl_table *table, | 163 | struct ctl_table *table, |
165 | kuid_t *uid, kgid_t *gid); | 164 | kuid_t *uid, kgid_t *gid); |
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9217169c64cb..eb209d4523f5 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h | |||
@@ -22,6 +22,19 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ | |||
22 | 22 | ||
23 | #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED | 23 | #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED |
24 | 24 | ||
25 | struct ucounts; | ||
26 | |||
27 | enum ucount_type { | ||
28 | UCOUNT_USER_NAMESPACES, | ||
29 | UCOUNT_PID_NAMESPACES, | ||
30 | UCOUNT_UTS_NAMESPACES, | ||
31 | UCOUNT_IPC_NAMESPACES, | ||
32 | UCOUNT_NET_NAMESPACES, | ||
33 | UCOUNT_MNT_NAMESPACES, | ||
34 | UCOUNT_CGROUP_NAMESPACES, | ||
35 | UCOUNT_COUNTS, | ||
36 | }; | ||
37 | |||
25 | struct user_namespace { | 38 | struct user_namespace { |
26 | struct uid_gid_map uid_map; | 39 | struct uid_gid_map uid_map; |
27 | struct uid_gid_map gid_map; | 40 | struct uid_gid_map gid_map; |
@@ -39,10 +52,30 @@ struct user_namespace { | |||
39 | struct key *persistent_keyring_register; | 52 | struct key *persistent_keyring_register; |
40 | struct rw_semaphore persistent_keyring_register_sem; | 53 | struct rw_semaphore persistent_keyring_register_sem; |
41 | #endif | 54 | #endif |
55 | struct work_struct work; | ||
56 | #ifdef CONFIG_SYSCTL | ||
57 | struct ctl_table_set set; | ||
58 | struct ctl_table_header *sysctls; | ||
59 | #endif | ||
60 | struct ucounts *ucounts; | ||
61 | int ucount_max[UCOUNT_COUNTS]; | ||
62 | }; | ||
63 | |||
64 | struct ucounts { | ||
65 | struct hlist_node node; | ||
66 | struct user_namespace *ns; | ||
67 | kuid_t uid; | ||
68 | atomic_t count; | ||
69 | atomic_t ucount[UCOUNT_COUNTS]; | ||
42 | }; | 70 | }; |
43 | 71 | ||
44 | extern struct user_namespace init_user_ns; | 72 | extern struct user_namespace init_user_ns; |
45 | 73 | ||
74 | bool setup_userns_sysctls(struct user_namespace *ns); | ||
75 | void retire_userns_sysctls(struct user_namespace *ns); | ||
76 | struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); | ||
77 | void dec_ucount(struct ucounts *ucounts, enum ucount_type type); | ||
78 | |||
46 | #ifdef CONFIG_USER_NS | 79 | #ifdef CONFIG_USER_NS |
47 | 80 | ||
48 | static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | 81 | static inline struct user_namespace *get_user_ns(struct user_namespace *ns) |
@@ -54,12 +87,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | |||
54 | 87 | ||
55 | extern int create_user_ns(struct cred *new); | 88 | extern int create_user_ns(struct cred *new); |
56 | extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); | 89 | extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); |
57 | extern void free_user_ns(struct user_namespace *ns); | 90 | extern void __put_user_ns(struct user_namespace *ns); |
58 | 91 | ||
59 | static inline void put_user_ns(struct user_namespace *ns) | 92 | static inline void put_user_ns(struct user_namespace *ns) |
60 | { | 93 | { |
61 | if (ns && atomic_dec_and_test(&ns->count)) | 94 | if (ns && atomic_dec_and_test(&ns->count)) |
62 | free_user_ns(ns); | 95 | __put_user_ns(ns); |
63 | } | 96 | } |
64 | 97 | ||
65 | struct seq_operations; | 98 | struct seq_operations; |
@@ -73,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, | |||
73 | extern int proc_setgroups_show(struct seq_file *m, void *v); | 106 | extern int proc_setgroups_show(struct seq_file *m, void *v); |
74 | extern bool userns_may_setgroups(const struct user_namespace *ns); | 107 | extern bool userns_may_setgroups(const struct user_namespace *ns); |
75 | extern bool current_in_userns(const struct user_namespace *target_ns); | 108 | extern bool current_in_userns(const struct user_namespace *target_ns); |
109 | |||
110 | struct ns_common *ns_get_owner(struct ns_common *ns); | ||
76 | #else | 111 | #else |
77 | 112 | ||
78 | static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | 113 | static inline struct user_namespace *get_user_ns(struct user_namespace *ns) |
@@ -106,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns) | |||
106 | { | 141 | { |
107 | return true; | 142 | return true; |
108 | } | 143 | } |
144 | |||
145 | static inline struct ns_common *ns_get_owner(struct ns_common *ns) | ||
146 | { | ||
147 | return ERR_PTR(-EPERM); | ||
148 | } | ||
109 | #endif | 149 | #endif |
110 | 150 | ||
111 | #endif /* _LINUX_USER_H */ | 151 | #endif /* _LINUX_USER_H */ |
diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 5093f58ae192..60f0bb83b313 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h | |||
@@ -24,6 +24,7 @@ struct uts_namespace { | |||
24 | struct kref kref; | 24 | struct kref kref; |
25 | struct new_utsname name; | 25 | struct new_utsname name; |
26 | struct user_namespace *user_ns; | 26 | struct user_namespace *user_ns; |
27 | struct ucounts *ucounts; | ||
27 | struct ns_common ns; | 28 | struct ns_common ns; |
28 | }; | 29 | }; |
29 | extern struct uts_namespace init_uts_ns; | 30 | extern struct uts_namespace init_uts_ns; |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0933c7455a30..fc4f757107df 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -60,6 +60,7 @@ struct net { | |||
60 | struct list_head exit_list; /* Use only net_mutex */ | 60 | struct list_head exit_list; /* Use only net_mutex */ |
61 | 61 | ||
62 | struct user_namespace *user_ns; /* Owning user namespace */ | 62 | struct user_namespace *user_ns; /* Owning user namespace */ |
63 | struct ucounts *ucounts; | ||
63 | spinlock_t nsid_lock; | 64 | spinlock_t nsid_lock; |
64 | struct idr netns_ids; | 65 | struct idr netns_ids; |
65 | 66 | ||
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..3af617230d1b --- /dev/null +++ b/include/uapi/linux/nsfs.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __LINUX_NSFS_H | ||
2 | #define __LINUX_NSFS_H | ||
3 | |||
4 | #include <linux/ioctl.h> | ||
5 | |||
6 | #define NSIO 0xb7 | ||
7 | |||
8 | /* Returns a file descriptor that refers to an owning user namespace */ | ||
9 | #define NS_GET_USERNS _IO(NSIO, 0x1) | ||
10 | /* Returns a file descriptor that refers to a parent namespace */ | ||
11 | #define NS_GET_PARENT _IO(NSIO, 0x2) | ||
12 | |||
13 | #endif /* __LINUX_NSFS_H */ | ||
diff --git a/ipc/namespace.c b/ipc/namespace.c index d87e6baa1323..0abdea496493 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c | |||
@@ -16,39 +16,61 @@ | |||
16 | 16 | ||
17 | #include "util.h" | 17 | #include "util.h" |
18 | 18 | ||
19 | static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) | ||
20 | { | ||
21 | return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); | ||
22 | } | ||
23 | |||
24 | static void dec_ipc_namespaces(struct ucounts *ucounts) | ||
25 | { | ||
26 | dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES); | ||
27 | } | ||
28 | |||
19 | static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, | 29 | static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, |
20 | struct ipc_namespace *old_ns) | 30 | struct ipc_namespace *old_ns) |
21 | { | 31 | { |
22 | struct ipc_namespace *ns; | 32 | struct ipc_namespace *ns; |
33 | struct ucounts *ucounts; | ||
23 | int err; | 34 | int err; |
24 | 35 | ||
36 | err = -ENOSPC; | ||
37 | ucounts = inc_ipc_namespaces(user_ns); | ||
38 | if (!ucounts) | ||
39 | goto fail; | ||
40 | |||
41 | err = -ENOMEM; | ||
25 | ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); | 42 | ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); |
26 | if (ns == NULL) | 43 | if (ns == NULL) |
27 | return ERR_PTR(-ENOMEM); | 44 | goto fail_dec; |
28 | 45 | ||
29 | err = ns_alloc_inum(&ns->ns); | 46 | err = ns_alloc_inum(&ns->ns); |
30 | if (err) { | 47 | if (err) |
31 | kfree(ns); | 48 | goto fail_free; |
32 | return ERR_PTR(err); | ||
33 | } | ||
34 | ns->ns.ops = &ipcns_operations; | 49 | ns->ns.ops = &ipcns_operations; |
35 | 50 | ||
36 | atomic_set(&ns->count, 1); | 51 | atomic_set(&ns->count, 1); |
37 | ns->user_ns = get_user_ns(user_ns); | 52 | ns->user_ns = get_user_ns(user_ns); |
53 | ns->ucounts = ucounts; | ||
38 | 54 | ||
39 | err = mq_init_ns(ns); | 55 | err = mq_init_ns(ns); |
40 | if (err) { | 56 | if (err) |
41 | put_user_ns(ns->user_ns); | 57 | goto fail_put; |
42 | ns_free_inum(&ns->ns); | ||
43 | kfree(ns); | ||
44 | return ERR_PTR(err); | ||
45 | } | ||
46 | 58 | ||
47 | sem_init_ns(ns); | 59 | sem_init_ns(ns); |
48 | msg_init_ns(ns); | 60 | msg_init_ns(ns); |
49 | shm_init_ns(ns); | 61 | shm_init_ns(ns); |
50 | 62 | ||
51 | return ns; | 63 | return ns; |
64 | |||
65 | fail_put: | ||
66 | put_user_ns(ns->user_ns); | ||
67 | ns_free_inum(&ns->ns); | ||
68 | fail_free: | ||
69 | kfree(ns); | ||
70 | fail_dec: | ||
71 | dec_ipc_namespaces(ucounts); | ||
72 | fail: | ||
73 | return ERR_PTR(err); | ||
52 | } | 74 | } |
53 | 75 | ||
54 | struct ipc_namespace *copy_ipcs(unsigned long flags, | 76 | struct ipc_namespace *copy_ipcs(unsigned long flags, |
@@ -96,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) | |||
96 | msg_exit_ns(ns); | 118 | msg_exit_ns(ns); |
97 | shm_exit_ns(ns); | 119 | shm_exit_ns(ns); |
98 | 120 | ||
121 | dec_ipc_namespaces(ns->ucounts); | ||
99 | put_user_ns(ns->user_ns); | 122 | put_user_ns(ns->user_ns); |
100 | ns_free_inum(&ns->ns); | 123 | ns_free_inum(&ns->ns); |
101 | kfree(ns); | 124 | kfree(ns); |
@@ -165,10 +188,16 @@ static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new) | |||
165 | return 0; | 188 | return 0; |
166 | } | 189 | } |
167 | 190 | ||
191 | static struct user_namespace *ipcns_owner(struct ns_common *ns) | ||
192 | { | ||
193 | return to_ipc_ns(ns)->user_ns; | ||
194 | } | ||
195 | |||
168 | const struct proc_ns_operations ipcns_operations = { | 196 | const struct proc_ns_operations ipcns_operations = { |
169 | .name = "ipc", | 197 | .name = "ipc", |
170 | .type = CLONE_NEWIPC, | 198 | .type = CLONE_NEWIPC, |
171 | .get = ipcns_get, | 199 | .get = ipcns_get, |
172 | .put = ipcns_put, | 200 | .put = ipcns_put, |
173 | .install = ipcns_install, | 201 | .install = ipcns_install, |
202 | .owner = ipcns_owner, | ||
174 | }; | 203 | }; |
diff --git a/kernel/Makefile b/kernel/Makefile index e2ec54e2b952..eb26e12c6c2a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o \ | |||
9 | extable.o params.o \ | 9 | extable.o params.o \ |
10 | kthread.o sys_ni.o nsproxy.o \ | 10 | kthread.o sys_ni.o nsproxy.o \ |
11 | notifier.o ksysfs.o cred.o reboot.o \ | 11 | notifier.o ksysfs.o cred.o reboot.o \ |
12 | async.o range.o smpboot.o | 12 | async.o range.o smpboot.o ucount.o |
13 | 13 | ||
14 | obj-$(CONFIG_MULTIUSER) += groups.o | 14 | obj-$(CONFIG_MULTIUSER) += groups.o |
15 | 15 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9ba28310eab6..44066158f0d1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -6328,6 +6328,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) | |||
6328 | 6328 | ||
6329 | /* cgroup namespaces */ | 6329 | /* cgroup namespaces */ |
6330 | 6330 | ||
6331 | static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) | ||
6332 | { | ||
6333 | return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); | ||
6334 | } | ||
6335 | |||
6336 | static void dec_cgroup_namespaces(struct ucounts *ucounts) | ||
6337 | { | ||
6338 | dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); | ||
6339 | } | ||
6340 | |||
6331 | static struct cgroup_namespace *alloc_cgroup_ns(void) | 6341 | static struct cgroup_namespace *alloc_cgroup_ns(void) |
6332 | { | 6342 | { |
6333 | struct cgroup_namespace *new_ns; | 6343 | struct cgroup_namespace *new_ns; |
@@ -6349,6 +6359,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) | |||
6349 | void free_cgroup_ns(struct cgroup_namespace *ns) | 6359 | void free_cgroup_ns(struct cgroup_namespace *ns) |
6350 | { | 6360 | { |
6351 | put_css_set(ns->root_cset); | 6361 | put_css_set(ns->root_cset); |
6362 | dec_cgroup_namespaces(ns->ucounts); | ||
6352 | put_user_ns(ns->user_ns); | 6363 | put_user_ns(ns->user_ns); |
6353 | ns_free_inum(&ns->ns); | 6364 | ns_free_inum(&ns->ns); |
6354 | kfree(ns); | 6365 | kfree(ns); |
@@ -6360,6 +6371,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | |||
6360 | struct cgroup_namespace *old_ns) | 6371 | struct cgroup_namespace *old_ns) |
6361 | { | 6372 | { |
6362 | struct cgroup_namespace *new_ns; | 6373 | struct cgroup_namespace *new_ns; |
6374 | struct ucounts *ucounts; | ||
6363 | struct css_set *cset; | 6375 | struct css_set *cset; |
6364 | 6376 | ||
6365 | BUG_ON(!old_ns); | 6377 | BUG_ON(!old_ns); |
@@ -6373,6 +6385,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | |||
6373 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | 6385 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
6374 | return ERR_PTR(-EPERM); | 6386 | return ERR_PTR(-EPERM); |
6375 | 6387 | ||
6388 | ucounts = inc_cgroup_namespaces(user_ns); | ||
6389 | if (!ucounts) | ||
6390 | return ERR_PTR(-ENOSPC); | ||
6391 | |||
6376 | /* It is not safe to take cgroup_mutex here */ | 6392 | /* It is not safe to take cgroup_mutex here */ |
6377 | spin_lock_irq(&css_set_lock); | 6393 | spin_lock_irq(&css_set_lock); |
6378 | cset = task_css_set(current); | 6394 | cset = task_css_set(current); |
@@ -6382,10 +6398,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | |||
6382 | new_ns = alloc_cgroup_ns(); | 6398 | new_ns = alloc_cgroup_ns(); |
6383 | if (IS_ERR(new_ns)) { | 6399 | if (IS_ERR(new_ns)) { |
6384 | put_css_set(cset); | 6400 | put_css_set(cset); |
6401 | dec_cgroup_namespaces(ucounts); | ||
6385 | return new_ns; | 6402 | return new_ns; |
6386 | } | 6403 | } |
6387 | 6404 | ||
6388 | new_ns->user_ns = get_user_ns(user_ns); | 6405 | new_ns->user_ns = get_user_ns(user_ns); |
6406 | new_ns->ucounts = ucounts; | ||
6389 | new_ns->root_cset = cset; | 6407 | new_ns->root_cset = cset; |
6390 | 6408 | ||
6391 | return new_ns; | 6409 | return new_ns; |
@@ -6436,12 +6454,18 @@ static void cgroupns_put(struct ns_common *ns) | |||
6436 | put_cgroup_ns(to_cg_ns(ns)); | 6454 | put_cgroup_ns(to_cg_ns(ns)); |
6437 | } | 6455 | } |
6438 | 6456 | ||
6457 | static struct user_namespace *cgroupns_owner(struct ns_common *ns) | ||
6458 | { | ||
6459 | return to_cg_ns(ns)->user_ns; | ||
6460 | } | ||
6461 | |||
6439 | const struct proc_ns_operations cgroupns_operations = { | 6462 | const struct proc_ns_operations cgroupns_operations = { |
6440 | .name = "cgroup", | 6463 | .name = "cgroup", |
6441 | .type = CLONE_NEWCGROUP, | 6464 | .type = CLONE_NEWCGROUP, |
6442 | .get = cgroupns_get, | 6465 | .get = cgroupns_get, |
6443 | .put = cgroupns_put, | 6466 | .put = cgroupns_put, |
6444 | .install = cgroupns_install, | 6467 | .install = cgroupns_install, |
6468 | .owner = cgroupns_owner, | ||
6445 | }; | 6469 | }; |
6446 | 6470 | ||
6447 | static __init int cgroup_namespaces_init(void) | 6471 | static __init int cgroup_namespaces_init(void) |
diff --git a/kernel/fork.c b/kernel/fork.c index c060c7e7c247..9a05bd93f8e7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -418,6 +418,7 @@ int arch_task_struct_size __read_mostly; | |||
418 | 418 | ||
419 | void __init fork_init(void) | 419 | void __init fork_init(void) |
420 | { | 420 | { |
421 | int i; | ||
421 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR | 422 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
422 | #ifndef ARCH_MIN_TASKALIGN | 423 | #ifndef ARCH_MIN_TASKALIGN |
423 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | 424 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES |
@@ -437,6 +438,10 @@ void __init fork_init(void) | |||
437 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; | 438 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; |
438 | init_task.signal->rlim[RLIMIT_SIGPENDING] = | 439 | init_task.signal->rlim[RLIMIT_SIGPENDING] = |
439 | init_task.signal->rlim[RLIMIT_NPROC]; | 440 | init_task.signal->rlim[RLIMIT_NPROC]; |
441 | |||
442 | for (i = 0; i < UCOUNT_COUNTS; i++) { | ||
443 | init_user_ns.ucount_max[i] = max_threads/2; | ||
444 | } | ||
440 | } | 445 | } |
441 | 446 | ||
442 | int __weak arch_dup_task_struct(struct task_struct *dst, | 447 | int __weak arch_dup_task_struct(struct task_struct *dst, |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a65ba137fd15..df9e8e9e0be7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -79,23 +79,36 @@ static void proc_cleanup_work(struct work_struct *work) | |||
79 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 79 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
80 | #define MAX_PID_NS_LEVEL 32 | 80 | #define MAX_PID_NS_LEVEL 32 |
81 | 81 | ||
82 | static struct ucounts *inc_pid_namespaces(struct user_namespace *ns) | ||
83 | { | ||
84 | return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES); | ||
85 | } | ||
86 | |||
87 | static void dec_pid_namespaces(struct ucounts *ucounts) | ||
88 | { | ||
89 | dec_ucount(ucounts, UCOUNT_PID_NAMESPACES); | ||
90 | } | ||
91 | |||
82 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, | 92 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, |
83 | struct pid_namespace *parent_pid_ns) | 93 | struct pid_namespace *parent_pid_ns) |
84 | { | 94 | { |
85 | struct pid_namespace *ns; | 95 | struct pid_namespace *ns; |
86 | unsigned int level = parent_pid_ns->level + 1; | 96 | unsigned int level = parent_pid_ns->level + 1; |
97 | struct ucounts *ucounts; | ||
87 | int i; | 98 | int i; |
88 | int err; | 99 | int err; |
89 | 100 | ||
90 | if (level > MAX_PID_NS_LEVEL) { | 101 | err = -ENOSPC; |
91 | err = -EINVAL; | 102 | if (level > MAX_PID_NS_LEVEL) |
103 | goto out; | ||
104 | ucounts = inc_pid_namespaces(user_ns); | ||
105 | if (!ucounts) | ||
92 | goto out; | 106 | goto out; |
93 | } | ||
94 | 107 | ||
95 | err = -ENOMEM; | 108 | err = -ENOMEM; |
96 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); | 109 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); |
97 | if (ns == NULL) | 110 | if (ns == NULL) |
98 | goto out; | 111 | goto out_dec; |
99 | 112 | ||
100 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 113 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
101 | if (!ns->pidmap[0].page) | 114 | if (!ns->pidmap[0].page) |
@@ -114,6 +127,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns | |||
114 | ns->level = level; | 127 | ns->level = level; |
115 | ns->parent = get_pid_ns(parent_pid_ns); | 128 | ns->parent = get_pid_ns(parent_pid_ns); |
116 | ns->user_ns = get_user_ns(user_ns); | 129 | ns->user_ns = get_user_ns(user_ns); |
130 | ns->ucounts = ucounts; | ||
117 | ns->nr_hashed = PIDNS_HASH_ADDING; | 131 | ns->nr_hashed = PIDNS_HASH_ADDING; |
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | 132 | INIT_WORK(&ns->proc_work, proc_cleanup_work); |
119 | 133 | ||
@@ -129,6 +143,8 @@ out_free_map: | |||
129 | kfree(ns->pidmap[0].page); | 143 | kfree(ns->pidmap[0].page); |
130 | out_free: | 144 | out_free: |
131 | kmem_cache_free(pid_ns_cachep, ns); | 145 | kmem_cache_free(pid_ns_cachep, ns); |
146 | out_dec: | ||
147 | dec_pid_namespaces(ucounts); | ||
132 | out: | 148 | out: |
133 | return ERR_PTR(err); | 149 | return ERR_PTR(err); |
134 | } | 150 | } |
@@ -146,6 +162,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
146 | ns_free_inum(&ns->ns); | 162 | ns_free_inum(&ns->ns); |
147 | for (i = 0; i < PIDMAP_ENTRIES; i++) | 163 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
148 | kfree(ns->pidmap[i].page); | 164 | kfree(ns->pidmap[i].page); |
165 | dec_pid_namespaces(ns->ucounts); | ||
149 | put_user_ns(ns->user_ns); | 166 | put_user_ns(ns->user_ns); |
150 | call_rcu(&ns->rcu, delayed_free_pidns); | 167 | call_rcu(&ns->rcu, delayed_free_pidns); |
151 | } | 168 | } |
@@ -388,12 +405,37 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns) | |||
388 | return 0; | 405 | return 0; |
389 | } | 406 | } |
390 | 407 | ||
408 | static struct ns_common *pidns_get_parent(struct ns_common *ns) | ||
409 | { | ||
410 | struct pid_namespace *active = task_active_pid_ns(current); | ||
411 | struct pid_namespace *pid_ns, *p; | ||
412 | |||
413 | /* See if the parent is in the current namespace */ | ||
414 | pid_ns = p = to_pid_ns(ns)->parent; | ||
415 | for (;;) { | ||
416 | if (!p) | ||
417 | return ERR_PTR(-EPERM); | ||
418 | if (p == active) | ||
419 | break; | ||
420 | p = p->parent; | ||
421 | } | ||
422 | |||
423 | return &get_pid_ns(pid_ns)->ns; | ||
424 | } | ||
425 | |||
426 | static struct user_namespace *pidns_owner(struct ns_common *ns) | ||
427 | { | ||
428 | return to_pid_ns(ns)->user_ns; | ||
429 | } | ||
430 | |||
391 | const struct proc_ns_operations pidns_operations = { | 431 | const struct proc_ns_operations pidns_operations = { |
392 | .name = "pid", | 432 | .name = "pid", |
393 | .type = CLONE_NEWPID, | 433 | .type = CLONE_NEWPID, |
394 | .get = pidns_get, | 434 | .get = pidns_get, |
395 | .put = pidns_put, | 435 | .put = pidns_put, |
396 | .install = pidns_install, | 436 | .install = pidns_install, |
437 | .owner = pidns_owner, | ||
438 | .get_parent = pidns_get_parent, | ||
397 | }; | 439 | }; |
398 | 440 | ||
399 | static __init int pid_namespaces_init(void) | 441 | static __init int pid_namespaces_init(void) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a13bbdaab47d..a43775c6646c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/sched/sysctl.h> | 65 | #include <linux/sched/sysctl.h> |
66 | #include <linux/kexec.h> | 66 | #include <linux/kexec.h> |
67 | #include <linux/bpf.h> | 67 | #include <linux/bpf.h> |
68 | #include <linux/mount.h> | ||
68 | 69 | ||
69 | #include <asm/uaccess.h> | 70 | #include <asm/uaccess.h> |
70 | #include <asm/processor.h> | 71 | #include <asm/processor.h> |
@@ -1838,6 +1839,14 @@ static struct ctl_table fs_table[] = { | |||
1838 | .mode = 0644, | 1839 | .mode = 0644, |
1839 | .proc_handler = proc_doulongvec_minmax, | 1840 | .proc_handler = proc_doulongvec_minmax, |
1840 | }, | 1841 | }, |
1842 | { | ||
1843 | .procname = "mount-max", | ||
1844 | .data = &sysctl_mount_max, | ||
1845 | .maxlen = sizeof(unsigned int), | ||
1846 | .mode = 0644, | ||
1847 | .proc_handler = proc_dointvec_minmax, | ||
1848 | .extra1 = &one, | ||
1849 | }, | ||
1841 | { } | 1850 | { } |
1842 | }; | 1851 | }; |
1843 | 1852 | ||
diff --git a/kernel/ucount.c b/kernel/ucount.c new file mode 100644 index 000000000000..9d20d5dd298a --- /dev/null +++ b/kernel/ucount.c | |||
@@ -0,0 +1,235 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License as | ||
4 | * published by the Free Software Foundation, version 2 of the | ||
5 | * License. | ||
6 | */ | ||
7 | |||
8 | #include <linux/stat.h> | ||
9 | #include <linux/sysctl.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/hash.h> | ||
12 | #include <linux/user_namespace.h> | ||
13 | |||
14 | #define UCOUNTS_HASHTABLE_BITS 10 | ||
15 | static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; | ||
16 | static DEFINE_SPINLOCK(ucounts_lock); | ||
17 | |||
18 | #define ucounts_hashfn(ns, uid) \ | ||
19 | hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \ | ||
20 | UCOUNTS_HASHTABLE_BITS) | ||
21 | #define ucounts_hashentry(ns, uid) \ | ||
22 | (ucounts_hashtable + ucounts_hashfn(ns, uid)) | ||
23 | |||
24 | |||
25 | #ifdef CONFIG_SYSCTL | ||
26 | static struct ctl_table_set * | ||
27 | set_lookup(struct ctl_table_root *root) | ||
28 | { | ||
29 | return ¤t_user_ns()->set; | ||
30 | } | ||
31 | |||
32 | static int set_is_seen(struct ctl_table_set *set) | ||
33 | { | ||
34 | return ¤t_user_ns()->set == set; | ||
35 | } | ||
36 | |||
37 | static int set_permissions(struct ctl_table_header *head, | ||
38 | struct ctl_table *table) | ||
39 | { | ||
40 | struct user_namespace *user_ns = | ||
41 | container_of(head->set, struct user_namespace, set); | ||
42 | int mode; | ||
43 | |||
44 | /* Allow users with CAP_SYS_RESOURCE unrestrained access */ | ||
45 | if (ns_capable(user_ns, CAP_SYS_RESOURCE)) | ||
46 | mode = (table->mode & S_IRWXU) >> 6; | ||
47 | else | ||
48 | /* Allow all others at most read-only access */ | ||
49 | mode = table->mode & S_IROTH; | ||
50 | return (mode << 6) | (mode << 3) | mode; | ||
51 | } | ||
52 | |||
53 | static struct ctl_table_root set_root = { | ||
54 | .lookup = set_lookup, | ||
55 | .permissions = set_permissions, | ||
56 | }; | ||
57 | |||
58 | static int zero = 0; | ||
59 | static int int_max = INT_MAX; | ||
60 | #define UCOUNT_ENTRY(name) \ | ||
61 | { \ | ||
62 | .procname = name, \ | ||
63 | .maxlen = sizeof(int), \ | ||
64 | .mode = 0644, \ | ||
65 | .proc_handler = proc_dointvec_minmax, \ | ||
66 | .extra1 = &zero, \ | ||
67 | .extra2 = &int_max, \ | ||
68 | } | ||
69 | static struct ctl_table user_table[] = { | ||
70 | UCOUNT_ENTRY("max_user_namespaces"), | ||
71 | UCOUNT_ENTRY("max_pid_namespaces"), | ||
72 | UCOUNT_ENTRY("max_uts_namespaces"), | ||
73 | UCOUNT_ENTRY("max_ipc_namespaces"), | ||
74 | UCOUNT_ENTRY("max_net_namespaces"), | ||
75 | UCOUNT_ENTRY("max_mnt_namespaces"), | ||
76 | UCOUNT_ENTRY("max_cgroup_namespaces"), | ||
77 | { } | ||
78 | }; | ||
79 | #endif /* CONFIG_SYSCTL */ | ||
80 | |||
81 | bool setup_userns_sysctls(struct user_namespace *ns) | ||
82 | { | ||
83 | #ifdef CONFIG_SYSCTL | ||
84 | struct ctl_table *tbl; | ||
85 | setup_sysctl_set(&ns->set, &set_root, set_is_seen); | ||
86 | tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); | ||
87 | if (tbl) { | ||
88 | int i; | ||
89 | for (i = 0; i < UCOUNT_COUNTS; i++) { | ||
90 | tbl[i].data = &ns->ucount_max[i]; | ||
91 | } | ||
92 | ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl); | ||
93 | } | ||
94 | if (!ns->sysctls) { | ||
95 | kfree(tbl); | ||
96 | retire_sysctl_set(&ns->set); | ||
97 | return false; | ||
98 | } | ||
99 | #endif | ||
100 | return true; | ||
101 | } | ||
102 | |||
103 | void retire_userns_sysctls(struct user_namespace *ns) | ||
104 | { | ||
105 | #ifdef CONFIG_SYSCTL | ||
106 | struct ctl_table *tbl; | ||
107 | |||
108 | tbl = ns->sysctls->ctl_table_arg; | ||
109 | unregister_sysctl_table(ns->sysctls); | ||
110 | retire_sysctl_set(&ns->set); | ||
111 | kfree(tbl); | ||
112 | #endif | ||
113 | } | ||
114 | |||
115 | static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) | ||
116 | { | ||
117 | struct ucounts *ucounts; | ||
118 | |||
119 | hlist_for_each_entry(ucounts, hashent, node) { | ||
120 | if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) | ||
121 | return ucounts; | ||
122 | } | ||
123 | return NULL; | ||
124 | } | ||
125 | |||
126 | static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) | ||
127 | { | ||
128 | struct hlist_head *hashent = ucounts_hashentry(ns, uid); | ||
129 | struct ucounts *ucounts, *new; | ||
130 | |||
131 | spin_lock(&ucounts_lock); | ||
132 | ucounts = find_ucounts(ns, uid, hashent); | ||
133 | if (!ucounts) { | ||
134 | spin_unlock(&ucounts_lock); | ||
135 | |||
136 | new = kzalloc(sizeof(*new), GFP_KERNEL); | ||
137 | if (!new) | ||
138 | return NULL; | ||
139 | |||
140 | new->ns = ns; | ||
141 | new->uid = uid; | ||
142 | atomic_set(&new->count, 0); | ||
143 | |||
144 | spin_lock(&ucounts_lock); | ||
145 | ucounts = find_ucounts(ns, uid, hashent); | ||
146 | if (ucounts) { | ||
147 | kfree(new); | ||
148 | } else { | ||
149 | hlist_add_head(&new->node, hashent); | ||
150 | ucounts = new; | ||
151 | } | ||
152 | } | ||
153 | if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) | ||
154 | ucounts = NULL; | ||
155 | spin_unlock(&ucounts_lock); | ||
156 | return ucounts; | ||
157 | } | ||
158 | |||
159 | static void put_ucounts(struct ucounts *ucounts) | ||
160 | { | ||
161 | if (atomic_dec_and_test(&ucounts->count)) { | ||
162 | spin_lock(&ucounts_lock); | ||
163 | hlist_del_init(&ucounts->node); | ||
164 | spin_unlock(&ucounts_lock); | ||
165 | |||
166 | kfree(ucounts); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static inline bool atomic_inc_below(atomic_t *v, int u) | ||
171 | { | ||
172 | int c, old; | ||
173 | c = atomic_read(v); | ||
174 | for (;;) { | ||
175 | if (unlikely(c >= u)) | ||
176 | return false; | ||
177 | old = atomic_cmpxchg(v, c, c+1); | ||
178 | if (likely(old == c)) | ||
179 | return true; | ||
180 | c = old; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, | ||
185 | enum ucount_type type) | ||
186 | { | ||
187 | struct ucounts *ucounts, *iter, *bad; | ||
188 | struct user_namespace *tns; | ||
189 | ucounts = get_ucounts(ns, uid); | ||
190 | for (iter = ucounts; iter; iter = tns->ucounts) { | ||
191 | int max; | ||
192 | tns = iter->ns; | ||
193 | max = READ_ONCE(tns->ucount_max[type]); | ||
194 | if (!atomic_inc_below(&iter->ucount[type], max)) | ||
195 | goto fail; | ||
196 | } | ||
197 | return ucounts; | ||
198 | fail: | ||
199 | bad = iter; | ||
200 | for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) | ||
201 | atomic_dec(&iter->ucount[type]); | ||
202 | |||
203 | put_ucounts(ucounts); | ||
204 | return NULL; | ||
205 | } | ||
206 | |||
207 | void dec_ucount(struct ucounts *ucounts, enum ucount_type type) | ||
208 | { | ||
209 | struct ucounts *iter; | ||
210 | for (iter = ucounts; iter; iter = iter->ns->ucounts) { | ||
211 | int dec = atomic_dec_if_positive(&iter->ucount[type]); | ||
212 | WARN_ON_ONCE(dec < 0); | ||
213 | } | ||
214 | put_ucounts(ucounts); | ||
215 | } | ||
216 | |||
217 | static __init int user_namespace_sysctl_init(void) | ||
218 | { | ||
219 | #ifdef CONFIG_SYSCTL | ||
220 | static struct ctl_table_header *user_header; | ||
221 | static struct ctl_table empty[1]; | ||
222 | /* | ||
223 | * It is necessary to register the user directory in the | ||
224 | * default set so that registrations in the child sets work | ||
225 | * properly. | ||
226 | */ | ||
227 | user_header = register_sysctl("user", empty); | ||
228 | BUG_ON(!user_header); | ||
229 | BUG_ON(!setup_userns_sysctls(&init_user_ns)); | ||
230 | #endif | ||
231 | return 0; | ||
232 | } | ||
233 | subsys_initcall(user_namespace_sysctl_init); | ||
234 | |||
235 | |||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 68f594212759..86b7854fec8e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -29,6 +29,17 @@ static DEFINE_MUTEX(userns_state_mutex); | |||
29 | static bool new_idmap_permitted(const struct file *file, | 29 | static bool new_idmap_permitted(const struct file *file, |
30 | struct user_namespace *ns, int cap_setid, | 30 | struct user_namespace *ns, int cap_setid, |
31 | struct uid_gid_map *map); | 31 | struct uid_gid_map *map); |
32 | static void free_user_ns(struct work_struct *work); | ||
33 | |||
34 | static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) | ||
35 | { | ||
36 | return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); | ||
37 | } | ||
38 | |||
39 | static void dec_user_namespaces(struct ucounts *ucounts) | ||
40 | { | ||
41 | return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); | ||
42 | } | ||
32 | 43 | ||
33 | static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) | 44 | static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) |
34 | { | 45 | { |
@@ -62,10 +73,16 @@ int create_user_ns(struct cred *new) | |||
62 | struct user_namespace *ns, *parent_ns = new->user_ns; | 73 | struct user_namespace *ns, *parent_ns = new->user_ns; |
63 | kuid_t owner = new->euid; | 74 | kuid_t owner = new->euid; |
64 | kgid_t group = new->egid; | 75 | kgid_t group = new->egid; |
65 | int ret; | 76 | struct ucounts *ucounts; |
77 | int ret, i; | ||
66 | 78 | ||
79 | ret = -ENOSPC; | ||
67 | if (parent_ns->level > 32) | 80 | if (parent_ns->level > 32) |
68 | return -EUSERS; | 81 | goto fail; |
82 | |||
83 | ucounts = inc_user_namespaces(parent_ns, owner); | ||
84 | if (!ucounts) | ||
85 | goto fail; | ||
69 | 86 | ||
70 | /* | 87 | /* |
71 | * Verify that we can not violate the policy of which files | 88 | * Verify that we can not violate the policy of which files |
@@ -73,26 +90,27 @@ int create_user_ns(struct cred *new) | |||
73 | * by verifing that the root directory is at the root of the | 90 | * by verifing that the root directory is at the root of the |
74 | * mount namespace which allows all files to be accessed. | 91 | * mount namespace which allows all files to be accessed. |
75 | */ | 92 | */ |
93 | ret = -EPERM; | ||
76 | if (current_chrooted()) | 94 | if (current_chrooted()) |
77 | return -EPERM; | 95 | goto fail_dec; |
78 | 96 | ||
79 | /* The creator needs a mapping in the parent user namespace | 97 | /* The creator needs a mapping in the parent user namespace |
80 | * or else we won't be able to reasonably tell userspace who | 98 | * or else we won't be able to reasonably tell userspace who |
81 | * created a user_namespace. | 99 | * created a user_namespace. |
82 | */ | 100 | */ |
101 | ret = -EPERM; | ||
83 | if (!kuid_has_mapping(parent_ns, owner) || | 102 | if (!kuid_has_mapping(parent_ns, owner) || |
84 | !kgid_has_mapping(parent_ns, group)) | 103 | !kgid_has_mapping(parent_ns, group)) |
85 | return -EPERM; | 104 | goto fail_dec; |
86 | 105 | ||
106 | ret = -ENOMEM; | ||
87 | ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); | 107 | ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); |
88 | if (!ns) | 108 | if (!ns) |
89 | return -ENOMEM; | 109 | goto fail_dec; |
90 | 110 | ||
91 | ret = ns_alloc_inum(&ns->ns); | 111 | ret = ns_alloc_inum(&ns->ns); |
92 | if (ret) { | 112 | if (ret) |
93 | kmem_cache_free(user_ns_cachep, ns); | 113 | goto fail_free; |
94 | return ret; | ||
95 | } | ||
96 | ns->ns.ops = &userns_operations; | 114 | ns->ns.ops = &userns_operations; |
97 | 115 | ||
98 | atomic_set(&ns->count, 1); | 116 | atomic_set(&ns->count, 1); |
@@ -101,18 +119,37 @@ int create_user_ns(struct cred *new) | |||
101 | ns->level = parent_ns->level + 1; | 119 | ns->level = parent_ns->level + 1; |
102 | ns->owner = owner; | 120 | ns->owner = owner; |
103 | ns->group = group; | 121 | ns->group = group; |
122 | INIT_WORK(&ns->work, free_user_ns); | ||
123 | for (i = 0; i < UCOUNT_COUNTS; i++) { | ||
124 | ns->ucount_max[i] = INT_MAX; | ||
125 | } | ||
126 | ns->ucounts = ucounts; | ||
104 | 127 | ||
105 | /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ | 128 | /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ |
106 | mutex_lock(&userns_state_mutex); | 129 | mutex_lock(&userns_state_mutex); |
107 | ns->flags = parent_ns->flags; | 130 | ns->flags = parent_ns->flags; |
108 | mutex_unlock(&userns_state_mutex); | 131 | mutex_unlock(&userns_state_mutex); |
109 | 132 | ||
110 | set_cred_user_ns(new, ns); | ||
111 | |||
112 | #ifdef CONFIG_PERSISTENT_KEYRINGS | 133 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
113 | init_rwsem(&ns->persistent_keyring_register_sem); | 134 | init_rwsem(&ns->persistent_keyring_register_sem); |
114 | #endif | 135 | #endif |
136 | ret = -ENOMEM; | ||
137 | if (!setup_userns_sysctls(ns)) | ||
138 | goto fail_keyring; | ||
139 | |||
140 | set_cred_user_ns(new, ns); | ||
115 | return 0; | 141 | return 0; |
142 | fail_keyring: | ||
143 | #ifdef CONFIG_PERSISTENT_KEYRINGS | ||
144 | key_put(ns->persistent_keyring_register); | ||
145 | #endif | ||
146 | ns_free_inum(&ns->ns); | ||
147 | fail_free: | ||
148 | kmem_cache_free(user_ns_cachep, ns); | ||
149 | fail_dec: | ||
150 | dec_user_namespaces(ucounts); | ||
151 | fail: | ||
152 | return ret; | ||
116 | } | 153 | } |
117 | 154 | ||
118 | int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) | 155 | int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) |
@@ -135,21 +172,30 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) | |||
135 | return err; | 172 | return err; |
136 | } | 173 | } |
137 | 174 | ||
138 | void free_user_ns(struct user_namespace *ns) | 175 | static void free_user_ns(struct work_struct *work) |
139 | { | 176 | { |
140 | struct user_namespace *parent; | 177 | struct user_namespace *parent, *ns = |
178 | container_of(work, struct user_namespace, work); | ||
141 | 179 | ||
142 | do { | 180 | do { |
181 | struct ucounts *ucounts = ns->ucounts; | ||
143 | parent = ns->parent; | 182 | parent = ns->parent; |
183 | retire_userns_sysctls(ns); | ||
144 | #ifdef CONFIG_PERSISTENT_KEYRINGS | 184 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
145 | key_put(ns->persistent_keyring_register); | 185 | key_put(ns->persistent_keyring_register); |
146 | #endif | 186 | #endif |
147 | ns_free_inum(&ns->ns); | 187 | ns_free_inum(&ns->ns); |
148 | kmem_cache_free(user_ns_cachep, ns); | 188 | kmem_cache_free(user_ns_cachep, ns); |
189 | dec_user_namespaces(ucounts); | ||
149 | ns = parent; | 190 | ns = parent; |
150 | } while (atomic_dec_and_test(&parent->count)); | 191 | } while (atomic_dec_and_test(&parent->count)); |
151 | } | 192 | } |
152 | EXPORT_SYMBOL(free_user_ns); | 193 | |
194 | void __put_user_ns(struct user_namespace *ns) | ||
195 | { | ||
196 | schedule_work(&ns->work); | ||
197 | } | ||
198 | EXPORT_SYMBOL(__put_user_ns); | ||
153 | 199 | ||
154 | static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) | 200 | static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) |
155 | { | 201 | { |
@@ -1004,12 +1050,37 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) | |||
1004 | return commit_creds(cred); | 1050 | return commit_creds(cred); |
1005 | } | 1051 | } |
1006 | 1052 | ||
1053 | struct ns_common *ns_get_owner(struct ns_common *ns) | ||
1054 | { | ||
1055 | struct user_namespace *my_user_ns = current_user_ns(); | ||
1056 | struct user_namespace *owner, *p; | ||
1057 | |||
1058 | /* See if the owner is in the current user namespace */ | ||
1059 | owner = p = ns->ops->owner(ns); | ||
1060 | for (;;) { | ||
1061 | if (!p) | ||
1062 | return ERR_PTR(-EPERM); | ||
1063 | if (p == my_user_ns) | ||
1064 | break; | ||
1065 | p = p->parent; | ||
1066 | } | ||
1067 | |||
1068 | return &get_user_ns(owner)->ns; | ||
1069 | } | ||
1070 | |||
1071 | static struct user_namespace *userns_owner(struct ns_common *ns) | ||
1072 | { | ||
1073 | return to_user_ns(ns)->parent; | ||
1074 | } | ||
1075 | |||
1007 | const struct proc_ns_operations userns_operations = { | 1076 | const struct proc_ns_operations userns_operations = { |
1008 | .name = "user", | 1077 | .name = "user", |
1009 | .type = CLONE_NEWUSER, | 1078 | .type = CLONE_NEWUSER, |
1010 | .get = userns_get, | 1079 | .get = userns_get, |
1011 | .put = userns_put, | 1080 | .put = userns_put, |
1012 | .install = userns_install, | 1081 | .install = userns_install, |
1082 | .owner = userns_owner, | ||
1083 | .get_parent = ns_get_owner, | ||
1013 | }; | 1084 | }; |
1014 | 1085 | ||
1015 | static __init int user_namespaces_init(void) | 1086 | static __init int user_namespaces_init(void) |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 831ea7108232..6976cd47dcf6 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -17,6 +17,16 @@ | |||
17 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
18 | #include <linux/proc_ns.h> | 18 | #include <linux/proc_ns.h> |
19 | 19 | ||
20 | static struct ucounts *inc_uts_namespaces(struct user_namespace *ns) | ||
21 | { | ||
22 | return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES); | ||
23 | } | ||
24 | |||
25 | static void dec_uts_namespaces(struct ucounts *ucounts) | ||
26 | { | ||
27 | dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES); | ||
28 | } | ||
29 | |||
20 | static struct uts_namespace *create_uts_ns(void) | 30 | static struct uts_namespace *create_uts_ns(void) |
21 | { | 31 | { |
22 | struct uts_namespace *uts_ns; | 32 | struct uts_namespace *uts_ns; |
@@ -36,18 +46,24 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, | |||
36 | struct uts_namespace *old_ns) | 46 | struct uts_namespace *old_ns) |
37 | { | 47 | { |
38 | struct uts_namespace *ns; | 48 | struct uts_namespace *ns; |
49 | struct ucounts *ucounts; | ||
39 | int err; | 50 | int err; |
40 | 51 | ||
52 | err = -ENOSPC; | ||
53 | ucounts = inc_uts_namespaces(user_ns); | ||
54 | if (!ucounts) | ||
55 | goto fail; | ||
56 | |||
57 | err = -ENOMEM; | ||
41 | ns = create_uts_ns(); | 58 | ns = create_uts_ns(); |
42 | if (!ns) | 59 | if (!ns) |
43 | return ERR_PTR(-ENOMEM); | 60 | goto fail_dec; |
44 | 61 | ||
45 | err = ns_alloc_inum(&ns->ns); | 62 | err = ns_alloc_inum(&ns->ns); |
46 | if (err) { | 63 | if (err) |
47 | kfree(ns); | 64 | goto fail_free; |
48 | return ERR_PTR(err); | ||
49 | } | ||
50 | 65 | ||
66 | ns->ucounts = ucounts; | ||
51 | ns->ns.ops = &utsns_operations; | 67 | ns->ns.ops = &utsns_operations; |
52 | 68 | ||
53 | down_read(&uts_sem); | 69 | down_read(&uts_sem); |
@@ -55,6 +71,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, | |||
55 | ns->user_ns = get_user_ns(user_ns); | 71 | ns->user_ns = get_user_ns(user_ns); |
56 | up_read(&uts_sem); | 72 | up_read(&uts_sem); |
57 | return ns; | 73 | return ns; |
74 | |||
75 | fail_free: | ||
76 | kfree(ns); | ||
77 | fail_dec: | ||
78 | dec_uts_namespaces(ucounts); | ||
79 | fail: | ||
80 | return ERR_PTR(err); | ||
58 | } | 81 | } |
59 | 82 | ||
60 | /* | 83 | /* |
@@ -85,6 +108,7 @@ void free_uts_ns(struct kref *kref) | |||
85 | struct uts_namespace *ns; | 108 | struct uts_namespace *ns; |
86 | 109 | ||
87 | ns = container_of(kref, struct uts_namespace, kref); | 110 | ns = container_of(kref, struct uts_namespace, kref); |
111 | dec_uts_namespaces(ns->ucounts); | ||
88 | put_user_ns(ns->user_ns); | 112 | put_user_ns(ns->user_ns); |
89 | ns_free_inum(&ns->ns); | 113 | ns_free_inum(&ns->ns); |
90 | kfree(ns); | 114 | kfree(ns); |
@@ -130,10 +154,16 @@ static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new) | |||
130 | return 0; | 154 | return 0; |
131 | } | 155 | } |
132 | 156 | ||
157 | static struct user_namespace *utsns_owner(struct ns_common *ns) | ||
158 | { | ||
159 | return to_uts_ns(ns)->user_ns; | ||
160 | } | ||
161 | |||
133 | const struct proc_ns_operations utsns_operations = { | 162 | const struct proc_ns_operations utsns_operations = { |
134 | .name = "uts", | 163 | .name = "uts", |
135 | .type = CLONE_NEWUTS, | 164 | .type = CLONE_NEWUTS, |
136 | .get = utsns_get, | 165 | .get = utsns_get, |
137 | .put = utsns_put, | 166 | .put = utsns_put, |
138 | .install = utsns_install, | 167 | .install = utsns_install, |
168 | .owner = utsns_owner, | ||
139 | }; | 169 | }; |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 42bdda0e616b..989434f36f96 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -309,6 +309,16 @@ out_undo: | |||
309 | 309 | ||
310 | 310 | ||
311 | #ifdef CONFIG_NET_NS | 311 | #ifdef CONFIG_NET_NS |
312 | static struct ucounts *inc_net_namespaces(struct user_namespace *ns) | ||
313 | { | ||
314 | return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); | ||
315 | } | ||
316 | |||
317 | static void dec_net_namespaces(struct ucounts *ucounts) | ||
318 | { | ||
319 | dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); | ||
320 | } | ||
321 | |||
312 | static struct kmem_cache *net_cachep; | 322 | static struct kmem_cache *net_cachep; |
313 | static struct workqueue_struct *netns_wq; | 323 | static struct workqueue_struct *netns_wq; |
314 | 324 | ||
@@ -350,19 +360,27 @@ void net_drop_ns(void *p) | |||
350 | struct net *copy_net_ns(unsigned long flags, | 360 | struct net *copy_net_ns(unsigned long flags, |
351 | struct user_namespace *user_ns, struct net *old_net) | 361 | struct user_namespace *user_ns, struct net *old_net) |
352 | { | 362 | { |
363 | struct ucounts *ucounts; | ||
353 | struct net *net; | 364 | struct net *net; |
354 | int rv; | 365 | int rv; |
355 | 366 | ||
356 | if (!(flags & CLONE_NEWNET)) | 367 | if (!(flags & CLONE_NEWNET)) |
357 | return get_net(old_net); | 368 | return get_net(old_net); |
358 | 369 | ||
370 | ucounts = inc_net_namespaces(user_ns); | ||
371 | if (!ucounts) | ||
372 | return ERR_PTR(-ENOSPC); | ||
373 | |||
359 | net = net_alloc(); | 374 | net = net_alloc(); |
360 | if (!net) | 375 | if (!net) { |
376 | dec_net_namespaces(ucounts); | ||
361 | return ERR_PTR(-ENOMEM); | 377 | return ERR_PTR(-ENOMEM); |
378 | } | ||
362 | 379 | ||
363 | get_user_ns(user_ns); | 380 | get_user_ns(user_ns); |
364 | 381 | ||
365 | mutex_lock(&net_mutex); | 382 | mutex_lock(&net_mutex); |
383 | net->ucounts = ucounts; | ||
366 | rv = setup_net(net, user_ns); | 384 | rv = setup_net(net, user_ns); |
367 | if (rv == 0) { | 385 | if (rv == 0) { |
368 | rtnl_lock(); | 386 | rtnl_lock(); |
@@ -371,6 +389,7 @@ struct net *copy_net_ns(unsigned long flags, | |||
371 | } | 389 | } |
372 | mutex_unlock(&net_mutex); | 390 | mutex_unlock(&net_mutex); |
373 | if (rv < 0) { | 391 | if (rv < 0) { |
392 | dec_net_namespaces(ucounts); | ||
374 | put_user_ns(user_ns); | 393 | put_user_ns(user_ns); |
375 | net_drop_ns(net); | 394 | net_drop_ns(net); |
376 | return ERR_PTR(rv); | 395 | return ERR_PTR(rv); |
@@ -443,6 +462,7 @@ static void cleanup_net(struct work_struct *work) | |||
443 | /* Finally it is safe to free my network namespace structure */ | 462 | /* Finally it is safe to free my network namespace structure */ |
444 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { | 463 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
445 | list_del_init(&net->exit_list); | 464 | list_del_init(&net->exit_list); |
465 | dec_net_namespaces(net->ucounts); | ||
446 | put_user_ns(net->user_ns); | 466 | put_user_ns(net->user_ns); |
447 | net_drop_ns(net); | 467 | net_drop_ns(net); |
448 | } | 468 | } |
@@ -1004,11 +1024,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) | |||
1004 | return 0; | 1024 | return 0; |
1005 | } | 1025 | } |
1006 | 1026 | ||
1027 | static struct user_namespace *netns_owner(struct ns_common *ns) | ||
1028 | { | ||
1029 | return to_net_ns(ns)->user_ns; | ||
1030 | } | ||
1031 | |||
1007 | const struct proc_ns_operations netns_operations = { | 1032 | const struct proc_ns_operations netns_operations = { |
1008 | .name = "net", | 1033 | .name = "net", |
1009 | .type = CLONE_NEWNET, | 1034 | .type = CLONE_NEWNET, |
1010 | .get = netns_get, | 1035 | .get = netns_get, |
1011 | .put = netns_put, | 1036 | .put = netns_put, |
1012 | .install = netns_install, | 1037 | .install = netns_install, |
1038 | .owner = netns_owner, | ||
1013 | }; | 1039 | }; |
1014 | #endif | 1040 | #endif |
diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e0c71bd8f7cf..919981324171 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c | |||
@@ -27,9 +27,9 @@ | |||
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | static struct ctl_table_set * | 29 | static struct ctl_table_set * |
30 | net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) | 30 | net_ctl_header_lookup(struct ctl_table_root *root) |
31 | { | 31 | { |
32 | return &namespaces->net_ns->sysctls; | 32 | return ¤t->nsproxy->net_ns->sysctls; |
33 | } | 33 | } |
34 | 34 | ||
35 | static int is_seen(struct ctl_table_set *set) | 35 | static int is_seen(struct ctl_table_set *set) |
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index ff9e5f20a5a7..f770dba2a6f6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile | |||
@@ -15,6 +15,7 @@ TARGETS += memory-hotplug | |||
15 | TARGETS += mount | 15 | TARGETS += mount |
16 | TARGETS += mqueue | 16 | TARGETS += mqueue |
17 | TARGETS += net | 17 | TARGETS += net |
18 | TARGETS += nsfs | ||
18 | TARGETS += powerpc | 19 | TARGETS += powerpc |
19 | TARGETS += pstore | 20 | TARGETS += pstore |
20 | TARGETS += ptrace | 21 | TARGETS += ptrace |
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile new file mode 100644 index 000000000000..2306054a901a --- /dev/null +++ b/tools/testing/selftests/nsfs/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | TEST_PROGS := owner pidns | ||
2 | |||
3 | CFLAGS := -Wall -Werror | ||
4 | |||
5 | all: owner pidns | ||
6 | owner: owner.c | ||
7 | pidns: pidns.c | ||
8 | |||
9 | clean: | ||
10 | $(RM) owner pidns | ||
11 | |||
12 | include ../lib.mk | ||
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c new file mode 100644 index 000000000000..437205f8b714 --- /dev/null +++ b/tools/testing/selftests/nsfs/owner.c | |||
@@ -0,0 +1,91 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <sched.h> | ||
3 | #include <unistd.h> | ||
4 | #include <stdio.h> | ||
5 | #include <stdlib.h> | ||
6 | #include <signal.h> | ||
7 | #include <errno.h> | ||
8 | #include <sys/types.h> | ||
9 | #include <sys/stat.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <sys/ioctl.h> | ||
12 | #include <sys/prctl.h> | ||
13 | #include <sys/wait.h> | ||
14 | |||
15 | #define NSIO 0xb7 | ||
16 | #define NS_GET_USERNS _IO(NSIO, 0x1) | ||
17 | |||
18 | #define pr_err(fmt, ...) \ | ||
19 | ({ \ | ||
20 | fprintf(stderr, "%s:%d:" fmt ": %m\n", \ | ||
21 | __func__, __LINE__, ##__VA_ARGS__); \ | ||
22 | 1; \ | ||
23 | }) | ||
24 | |||
25 | int main(int argc, char *argvp[]) | ||
26 | { | ||
27 | int pfd[2], ns, uns, init_uns; | ||
28 | struct stat st1, st2; | ||
29 | char path[128]; | ||
30 | pid_t pid; | ||
31 | char c; | ||
32 | |||
33 | if (pipe(pfd)) | ||
34 | return 1; | ||
35 | |||
36 | pid = fork(); | ||
37 | if (pid < 0) | ||
38 | return pr_err("fork"); | ||
39 | if (pid == 0) { | ||
40 | prctl(PR_SET_PDEATHSIG, SIGKILL); | ||
41 | if (unshare(CLONE_NEWUTS | CLONE_NEWUSER)) | ||
42 | return pr_err("unshare"); | ||
43 | close(pfd[0]); | ||
44 | close(pfd[1]); | ||
45 | while (1) | ||
46 | sleep(1); | ||
47 | return 0; | ||
48 | } | ||
49 | close(pfd[1]); | ||
50 | if (read(pfd[0], &c, 1) != 0) | ||
51 | return pr_err("Unable to read from pipe"); | ||
52 | close(pfd[0]); | ||
53 | |||
54 | snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid); | ||
55 | ns = open(path, O_RDONLY); | ||
56 | if (ns < 0) | ||
57 | return pr_err("Unable to open %s", path); | ||
58 | |||
59 | uns = ioctl(ns, NS_GET_USERNS); | ||
60 | if (uns < 0) | ||
61 | return pr_err("Unable to get an owning user namespace"); | ||
62 | |||
63 | if (fstat(uns, &st1)) | ||
64 | return pr_err("fstat"); | ||
65 | |||
66 | snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); | ||
67 | if (stat(path, &st2)) | ||
68 | return pr_err("stat"); | ||
69 | |||
70 | if (st1.st_ino != st2.st_ino) | ||
71 | return pr_err("NS_GET_USERNS returned a wrong namespace"); | ||
72 | |||
73 | init_uns = ioctl(uns, NS_GET_USERNS); | ||
74 | if (uns < 0) | ||
75 | return pr_err("Unable to get an owning user namespace"); | ||
76 | |||
77 | if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) | ||
78 | return pr_err("Don't get EPERM"); | ||
79 | |||
80 | if (unshare(CLONE_NEWUSER)) | ||
81 | return pr_err("unshare"); | ||
82 | |||
83 | if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM) | ||
84 | return pr_err("Don't get EPERM"); | ||
85 | if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) | ||
86 | return pr_err("Don't get EPERM"); | ||
87 | |||
88 | kill(pid, SIGKILL); | ||
89 | wait(NULL); | ||
90 | return 0; | ||
91 | } | ||
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c new file mode 100644 index 000000000000..ae3a0d68e966 --- /dev/null +++ b/tools/testing/selftests/nsfs/pidns.c | |||
@@ -0,0 +1,78 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <sched.h> | ||
3 | #include <unistd.h> | ||
4 | #include <stdio.h> | ||
5 | #include <stdlib.h> | ||
6 | #include <signal.h> | ||
7 | #include <errno.h> | ||
8 | #include <sys/types.h> | ||
9 | #include <sys/stat.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <sys/ioctl.h> | ||
12 | #include <sys/prctl.h> | ||
13 | #include <sys/wait.h> | ||
14 | |||
15 | #define pr_err(fmt, ...) \ | ||
16 | ({ \ | ||
17 | fprintf(stderr, "%s:%d:" fmt ": %m\n", \ | ||
18 | __func__, __LINE__, ##__VA_ARGS__); \ | ||
19 | 1; \ | ||
20 | }) | ||
21 | |||
22 | #define NSIO 0xb7 | ||
23 | #define NS_GET_USERNS _IO(NSIO, 0x1) | ||
24 | #define NS_GET_PARENT _IO(NSIO, 0x2) | ||
25 | |||
26 | #define __stack_aligned__ __attribute__((aligned(16))) | ||
27 | struct cr_clone_arg { | ||
28 | char stack[128] __stack_aligned__; | ||
29 | char stack_ptr[0]; | ||
30 | }; | ||
31 | |||
32 | static int child(void *args) | ||
33 | { | ||
34 | prctl(PR_SET_PDEATHSIG, SIGKILL); | ||
35 | while (1) | ||
36 | sleep(1); | ||
37 | exit(0); | ||
38 | } | ||
39 | |||
40 | int main(int argc, char *argv[]) | ||
41 | { | ||
42 | char *ns_strs[] = {"pid", "user"}; | ||
43 | char path[] = "/proc/0123456789/ns/pid"; | ||
44 | struct cr_clone_arg ca; | ||
45 | struct stat st1, st2; | ||
46 | int ns, pns, i; | ||
47 | pid_t pid; | ||
48 | |||
49 | pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL); | ||
50 | if (pid < 0) | ||
51 | return pr_err("clone"); | ||
52 | |||
53 | for (i = 0; i < 2; i++) { | ||
54 | snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]); | ||
55 | ns = open(path, O_RDONLY); | ||
56 | if (ns < 0) | ||
57 | return pr_err("Unable to open %s", path); | ||
58 | |||
59 | pns = ioctl(ns, NS_GET_PARENT); | ||
60 | if (pns < 0) | ||
61 | return pr_err("Unable to get a parent pidns"); | ||
62 | |||
63 | snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]); | ||
64 | if (stat(path, &st2)) | ||
65 | return pr_err("Unable to stat %s", path); | ||
66 | if (fstat(pns, &st1)) | ||
67 | return pr_err("Unable to stat the parent pidns"); | ||
68 | if (st1.st_ino != st2.st_ino) | ||
69 | return pr_err("NS_GET_PARENT returned a wrong namespace"); | ||
70 | |||
71 | if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) | ||
72 | return pr_err("Don't get EPERM");; | ||
73 | } | ||
74 | |||
75 | kill(pid, SIGKILL); | ||
76 | wait(NULL); | ||
77 | return 0; | ||
78 | } | ||