diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2016-09-28 01:27:17 -0400 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2016-09-30 13:46:48 -0400 |
commit | d29216842a85c7970c536108e093963f02714498 (patch) | |
tree | acf8843f0c807e80fc3b15653f3b22136984c69a | |
parent | 2ed6afdee798658fe3c33b50c4a79d1bde45f1d8 (diff) |
mnt: Add a per mount namespace limit on the number of mounts
CAI Qian <caiqian@redhat.com> pointed out that the semantics
of shared subtrees make it possible to create an exponentially
increasing number of mounts in a mount namespace.
mkdir /tmp/1 /tmp/2
mount --make-rshared /
for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done
Will create create 2^20 or 1048576 mounts, which is a practical problem
as some people have managed to hit this by accident.
As such CVE-2016-6213 was assigned.
Ian Kent <raven@themaw.net> described the situation for autofs users
as follows:
> The number of mounts for direct mount maps is usually not very large because of
> the way they are implemented, large direct mount maps can have performance
> problems. There can be anywhere from a few (likely case a few hundred) to less
> than 10000, plus mounts that have been triggered and not yet expired.
>
> Indirect mounts have one autofs mount at the root plus the number of mounts that
> have been triggered and not yet expired.
>
> The number of autofs indirect map entries can range from a few to the common
> case of several thousand and in rare cases up to between 30000 and 50000. I've
> not heard of people with maps larger than 50000 entries.
>
> The larger the number of map entries the greater the possibility for a large
> number of active mounts so it's not hard to expect cases of a 1000 or somewhat
> more active mounts.
So I am setting the default number of mounts allowed per mount
namespace at 100,000. This is more than enough for any use case I
know of, but small enough to quickly stop an exponential increase
in mounts. Which should be perfect to catch misconfigurations and
malfunctioning programs.
For anyone who needs a higher limit this can be changed by writing
to the new /proc/sys/fs/mount-max sysctl.
Tested-by: CAI Qian <caiqian@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r-- | Documentation/sysctl/fs.txt | 7 | ||||
-rw-r--r-- | fs/mount.h | 2 | ||||
-rw-r--r-- | fs/namespace.c | 49 | ||||
-rw-r--r-- | fs/pnode.c | 2 | ||||
-rw-r--r-- | fs/pnode.h | 1 | ||||
-rw-r--r-- | include/linux/mount.h | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 |
7 files changed, 70 insertions, 2 deletions
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 302b5ed616a6..35e17f748ca7 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -265,6 +265,13 @@ aio-nr can grow to. | |||
265 | 265 | ||
266 | ============================================================== | 266 | ============================================================== |
267 | 267 | ||
268 | mount-max: | ||
269 | |||
270 | This denotes the maximum number of mounts that may exist | ||
271 | in a mount namespace. | ||
272 | |||
273 | ============================================================== | ||
274 | |||
268 | 275 | ||
269 | 2. /proc/sys/fs/binfmt_misc | 276 | 2. /proc/sys/fs/binfmt_misc |
270 | ---------------------------------------------------------- | 277 | ---------------------------------------------------------- |
diff --git a/fs/mount.h b/fs/mount.h index e037981d8351..d2e25d7b64b3 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -14,6 +14,8 @@ struct mnt_namespace { | |||
14 | u64 seq; /* Sequence number to prevent loops */ | 14 | u64 seq; /* Sequence number to prevent loops */ |
15 | wait_queue_head_t poll; | 15 | wait_queue_head_t poll; |
16 | u64 event; | 16 | u64 event; |
17 | unsigned int mounts; /* # of mounts in the namespace */ | ||
18 | unsigned int pending_mounts; | ||
17 | }; | 19 | }; |
18 | 20 | ||
19 | struct mnt_pcp { | 21 | struct mnt_pcp { |
diff --git a/fs/namespace.c b/fs/namespace.c index 8a0e90eb81d3..db1b5a38864e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -27,6 +27,9 @@ | |||
27 | #include "pnode.h" | 27 | #include "pnode.h" |
28 | #include "internal.h" | 28 | #include "internal.h" |
29 | 29 | ||
30 | /* Maximum number of mounts in a mount namespace */ | ||
31 | unsigned int sysctl_mount_max __read_mostly = 100000; | ||
32 | |||
30 | static unsigned int m_hash_mask __read_mostly; | 33 | static unsigned int m_hash_mask __read_mostly; |
31 | static unsigned int m_hash_shift __read_mostly; | 34 | static unsigned int m_hash_shift __read_mostly; |
32 | static unsigned int mp_hash_mask __read_mostly; | 35 | static unsigned int mp_hash_mask __read_mostly; |
@@ -899,6 +902,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) | |||
899 | 902 | ||
900 | list_splice(&head, n->list.prev); | 903 | list_splice(&head, n->list.prev); |
901 | 904 | ||
905 | n->mounts += n->pending_mounts; | ||
906 | n->pending_mounts = 0; | ||
907 | |||
902 | attach_shadowed(mnt, parent, shadows); | 908 | attach_shadowed(mnt, parent, shadows); |
903 | touch_mnt_namespace(n); | 909 | touch_mnt_namespace(n); |
904 | } | 910 | } |
@@ -1419,11 +1425,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) | |||
1419 | propagate_umount(&tmp_list); | 1425 | propagate_umount(&tmp_list); |
1420 | 1426 | ||
1421 | while (!list_empty(&tmp_list)) { | 1427 | while (!list_empty(&tmp_list)) { |
1428 | struct mnt_namespace *ns; | ||
1422 | bool disconnect; | 1429 | bool disconnect; |
1423 | p = list_first_entry(&tmp_list, struct mount, mnt_list); | 1430 | p = list_first_entry(&tmp_list, struct mount, mnt_list); |
1424 | list_del_init(&p->mnt_expire); | 1431 | list_del_init(&p->mnt_expire); |
1425 | list_del_init(&p->mnt_list); | 1432 | list_del_init(&p->mnt_list); |
1426 | __touch_mnt_namespace(p->mnt_ns); | 1433 | ns = p->mnt_ns; |
1434 | if (ns) { | ||
1435 | ns->mounts--; | ||
1436 | __touch_mnt_namespace(ns); | ||
1437 | } | ||
1427 | p->mnt_ns = NULL; | 1438 | p->mnt_ns = NULL; |
1428 | if (how & UMOUNT_SYNC) | 1439 | if (how & UMOUNT_SYNC) |
1429 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | 1440 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
@@ -1840,6 +1851,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse) | |||
1840 | return 0; | 1851 | return 0; |
1841 | } | 1852 | } |
1842 | 1853 | ||
1854 | int count_mounts(struct mnt_namespace *ns, struct mount *mnt) | ||
1855 | { | ||
1856 | unsigned int max = READ_ONCE(sysctl_mount_max); | ||
1857 | unsigned int mounts = 0, old, pending, sum; | ||
1858 | struct mount *p; | ||
1859 | |||
1860 | for (p = mnt; p; p = next_mnt(p, mnt)) | ||
1861 | mounts++; | ||
1862 | |||
1863 | old = ns->mounts; | ||
1864 | pending = ns->pending_mounts; | ||
1865 | sum = old + pending; | ||
1866 | if ((old > sum) || | ||
1867 | (pending > sum) || | ||
1868 | (max < sum) || | ||
1869 | (mounts > (max - sum))) | ||
1870 | return -ENOSPC; | ||
1871 | |||
1872 | ns->pending_mounts = pending + mounts; | ||
1873 | return 0; | ||
1874 | } | ||
1875 | |||
1843 | /* | 1876 | /* |
1844 | * @source_mnt : mount tree to be attached | 1877 | * @source_mnt : mount tree to be attached |
1845 | * @nd : place the mount tree @source_mnt is attached | 1878 | * @nd : place the mount tree @source_mnt is attached |
@@ -1909,10 +1942,18 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1909 | struct path *parent_path) | 1942 | struct path *parent_path) |
1910 | { | 1943 | { |
1911 | HLIST_HEAD(tree_list); | 1944 | HLIST_HEAD(tree_list); |
1945 | struct mnt_namespace *ns = dest_mnt->mnt_ns; | ||
1912 | struct mount *child, *p; | 1946 | struct mount *child, *p; |
1913 | struct hlist_node *n; | 1947 | struct hlist_node *n; |
1914 | int err; | 1948 | int err; |
1915 | 1949 | ||
1950 | /* Is there space to add these mounts to the mount namespace? */ | ||
1951 | if (!parent_path) { | ||
1952 | err = count_mounts(ns, source_mnt); | ||
1953 | if (err) | ||
1954 | goto out; | ||
1955 | } | ||
1956 | |||
1916 | if (IS_MNT_SHARED(dest_mnt)) { | 1957 | if (IS_MNT_SHARED(dest_mnt)) { |
1917 | err = invent_group_ids(source_mnt, true); | 1958 | err = invent_group_ids(source_mnt, true); |
1918 | if (err) | 1959 | if (err) |
@@ -1949,11 +1990,13 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1949 | out_cleanup_ids: | 1990 | out_cleanup_ids: |
1950 | while (!hlist_empty(&tree_list)) { | 1991 | while (!hlist_empty(&tree_list)) { |
1951 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); | 1992 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); |
1993 | child->mnt_parent->mnt_ns->pending_mounts = 0; | ||
1952 | umount_tree(child, UMOUNT_SYNC); | 1994 | umount_tree(child, UMOUNT_SYNC); |
1953 | } | 1995 | } |
1954 | unlock_mount_hash(); | 1996 | unlock_mount_hash(); |
1955 | cleanup_group_ids(source_mnt, NULL); | 1997 | cleanup_group_ids(source_mnt, NULL); |
1956 | out: | 1998 | out: |
1999 | ns->pending_mounts = 0; | ||
1957 | return err; | 2000 | return err; |
1958 | } | 2001 | } |
1959 | 2002 | ||
@@ -2776,6 +2819,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | |||
2776 | new_ns->event = 0; | 2819 | new_ns->event = 0; |
2777 | new_ns->user_ns = get_user_ns(user_ns); | 2820 | new_ns->user_ns = get_user_ns(user_ns); |
2778 | new_ns->ucounts = ucounts; | 2821 | new_ns->ucounts = ucounts; |
2822 | new_ns->mounts = 0; | ||
2823 | new_ns->pending_mounts = 0; | ||
2779 | return new_ns; | 2824 | return new_ns; |
2780 | } | 2825 | } |
2781 | 2826 | ||
@@ -2825,6 +2870,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
2825 | q = new; | 2870 | q = new; |
2826 | while (p) { | 2871 | while (p) { |
2827 | q->mnt_ns = new_ns; | 2872 | q->mnt_ns = new_ns; |
2873 | new_ns->mounts++; | ||
2828 | if (new_fs) { | 2874 | if (new_fs) { |
2829 | if (&p->mnt == new_fs->root.mnt) { | 2875 | if (&p->mnt == new_fs->root.mnt) { |
2830 | new_fs->root.mnt = mntget(&q->mnt); | 2876 | new_fs->root.mnt = mntget(&q->mnt); |
@@ -2863,6 +2909,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) | |||
2863 | struct mount *mnt = real_mount(m); | 2909 | struct mount *mnt = real_mount(m); |
2864 | mnt->mnt_ns = new_ns; | 2910 | mnt->mnt_ns = new_ns; |
2865 | new_ns->root = mnt; | 2911 | new_ns->root = mnt; |
2912 | new_ns->mounts++; | ||
2866 | list_add(&mnt->mnt_list, &new_ns->list); | 2913 | list_add(&mnt->mnt_list, &new_ns->list); |
2867 | } else { | 2914 | } else { |
2868 | mntput(m); | 2915 | mntput(m); |
diff --git a/fs/pnode.c b/fs/pnode.c index 99899705b105..234a9ac49958 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m) | |||
259 | read_sequnlock_excl(&mount_lock); | 259 | read_sequnlock_excl(&mount_lock); |
260 | } | 260 | } |
261 | hlist_add_head(&child->mnt_hash, list); | 261 | hlist_add_head(&child->mnt_hash, list); |
262 | return 0; | 262 | return count_mounts(m->mnt_ns, child); |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
diff --git a/fs/pnode.h b/fs/pnode.h index 0fcdbe7ca648..550f5a8b4fcf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *, | |||
52 | struct mount *copy_tree(struct mount *, struct dentry *, int); | 52 | struct mount *copy_tree(struct mount *, struct dentry *, int); |
53 | bool is_path_reachable(struct mount *, struct dentry *, | 53 | bool is_path_reachable(struct mount *, struct dentry *, |
54 | const struct path *root); | 54 | const struct path *root); |
55 | int count_mounts(struct mnt_namespace *ns, struct mount *mnt); | ||
55 | #endif /* _LINUX_PNODE_H */ | 56 | #endif /* _LINUX_PNODE_H */ |
diff --git a/include/linux/mount.h b/include/linux/mount.h index 54a594d49733..1172cce949a4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h | |||
@@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); | |||
96 | 96 | ||
97 | extern dev_t name_to_dev_t(const char *name); | 97 | extern dev_t name_to_dev_t(const char *name); |
98 | 98 | ||
99 | extern unsigned int sysctl_mount_max; | ||
100 | |||
99 | #endif /* _LINUX_MOUNT_H */ | 101 | #endif /* _LINUX_MOUNT_H */ |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b43d0b27c1fe..03f18cc15697 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/sched/sysctl.h> | 65 | #include <linux/sched/sysctl.h> |
66 | #include <linux/kexec.h> | 66 | #include <linux/kexec.h> |
67 | #include <linux/bpf.h> | 67 | #include <linux/bpf.h> |
68 | #include <linux/mount.h> | ||
68 | 69 | ||
69 | #include <asm/uaccess.h> | 70 | #include <asm/uaccess.h> |
70 | #include <asm/processor.h> | 71 | #include <asm/processor.h> |
@@ -1838,6 +1839,14 @@ static struct ctl_table fs_table[] = { | |||
1838 | .mode = 0644, | 1839 | .mode = 0644, |
1839 | .proc_handler = proc_doulongvec_minmax, | 1840 | .proc_handler = proc_doulongvec_minmax, |
1840 | }, | 1841 | }, |
1842 | { | ||
1843 | .procname = "mount-max", | ||
1844 | .data = &sysctl_mount_max, | ||
1845 | .maxlen = sizeof(unsigned int), | ||
1846 | .mode = 0644, | ||
1847 | .proc_handler = proc_dointvec_minmax, | ||
1848 | .extra1 = &one, | ||
1849 | }, | ||
1841 | { } | 1850 | { } |
1842 | }; | 1851 | }; |
1843 | 1852 | ||