aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2017-01-18 09:14:17 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-18 17:12:26 -0500
commitd407bd25a204bd66b7346dde24bd3d37ef0e0b05 (patch)
tree01e49e08ca4f4eb258a2e2d9c67d03d503498696 /kernel
parent9ed59592e3e379b2e9557dc1d9e9ec8fcbb33f16 (diff)
bpf: don't trigger OOM killer under pressure with map alloc
This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(), that are to be used for map allocations. Using kmalloc() for very large allocations can cause excessive work within the page allocator, so i) fall back earlier to vmalloc() when the attempt is considered costly anyway, and even more importantly ii) don't trigger OOM killer with any of the allocators. Since this is based on a user space request, for example, when creating maps with element pre-allocation, we really want such requests to fail instead of killing other user space processes. Also, don't spam the kernel log with warnings should any of the allocations fail under pressure. Given that, we can make backend selection in bpf_map_area_alloc() generic, and convert all maps over to use this API for spots with potentially large allocation requests. Note, replacing the one kmalloc_array() is fine as overflow checks happen earlier in htab_map_alloc(), since it must also protect the multiplication for vmalloc() should kmalloc_array() fail. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c18
-rw-r--r--kernel/bpf/hashtab.c22
-rw-r--r--kernel/bpf/stackmap.c20
-rw-r--r--kernel/bpf/syscall.c26
4 files changed, 50 insertions, 36 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 229a5d5df977..3d55d95dcf49 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -11,7 +11,6 @@
11 */ 11 */
12#include <linux/bpf.h> 12#include <linux/bpf.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/vmalloc.h>
15#include <linux/slab.h> 14#include <linux/slab.h>
16#include <linux/mm.h> 15#include <linux/mm.h>
17#include <linux/filter.h> 16#include <linux/filter.h>
@@ -74,14 +73,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
74 if (array_size >= U32_MAX - PAGE_SIZE) 73 if (array_size >= U32_MAX - PAGE_SIZE)
75 return ERR_PTR(-ENOMEM); 74 return ERR_PTR(-ENOMEM);
76 75
77
78 /* allocate all map elements and zero-initialize them */ 76 /* allocate all map elements and zero-initialize them */
79 array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); 77 array = bpf_map_area_alloc(array_size);
80 if (!array) { 78 if (!array)
81 array = vzalloc(array_size); 79 return ERR_PTR(-ENOMEM);
82 if (!array)
83 return ERR_PTR(-ENOMEM);
84 }
85 80
86 /* copy mandatory map attributes */ 81 /* copy mandatory map attributes */
87 array->map.map_type = attr->map_type; 82 array->map.map_type = attr->map_type;
@@ -97,7 +92,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
97 92
98 if (array_size >= U32_MAX - PAGE_SIZE || 93 if (array_size >= U32_MAX - PAGE_SIZE ||
99 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { 94 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
100 kvfree(array); 95 bpf_map_area_free(array);
101 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
102 } 97 }
103out: 98out:
@@ -262,7 +257,7 @@ static void array_map_free(struct bpf_map *map)
262 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 257 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
263 bpf_array_free_percpu(array); 258 bpf_array_free_percpu(array);
264 259
265 kvfree(array); 260 bpf_map_area_free(array);
266} 261}
267 262
268static const struct bpf_map_ops array_ops = { 263static const struct bpf_map_ops array_ops = {
@@ -319,7 +314,8 @@ static void fd_array_map_free(struct bpf_map *map)
319 /* make sure it's empty */ 314 /* make sure it's empty */
320 for (i = 0; i < array->map.max_entries; i++) 315 for (i = 0; i < array->map.max_entries; i++)
321 BUG_ON(array->ptrs[i] != NULL); 316 BUG_ON(array->ptrs[i] != NULL);
322 kvfree(array); 317
318 bpf_map_area_free(array);
323} 319}
324 320
325static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 321static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3f2bb58952d8..a753bbe7df0a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,7 +13,6 @@
13#include <linux/bpf.h> 13#include <linux/bpf.h>
14#include <linux/jhash.h> 14#include <linux/jhash.h>
15#include <linux/filter.h> 15#include <linux/filter.h>
16#include <linux/vmalloc.h>
17#include "percpu_freelist.h" 16#include "percpu_freelist.h"
18#include "bpf_lru_list.h" 17#include "bpf_lru_list.h"
19 18
@@ -103,7 +102,7 @@ static void htab_free_elems(struct bpf_htab *htab)
103 free_percpu(pptr); 102 free_percpu(pptr);
104 } 103 }
105free_elems: 104free_elems:
106 vfree(htab->elems); 105 bpf_map_area_free(htab->elems);
107} 106}
108 107
109static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 108static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
@@ -125,7 +124,8 @@ static int prealloc_init(struct bpf_htab *htab)
125{ 124{
126 int err = -ENOMEM, i; 125 int err = -ENOMEM, i;
127 126
128 htab->elems = vzalloc(htab->elem_size * htab->map.max_entries); 127 htab->elems = bpf_map_area_alloc(htab->elem_size *
128 htab->map.max_entries);
129 if (!htab->elems) 129 if (!htab->elems)
130 return -ENOMEM; 130 return -ENOMEM;
131 131
@@ -320,14 +320,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
320 goto free_htab; 320 goto free_htab;
321 321
322 err = -ENOMEM; 322 err = -ENOMEM;
323 htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket), 323 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
324 GFP_USER | __GFP_NOWARN); 324 sizeof(struct bucket));
325 325 if (!htab->buckets)
326 if (!htab->buckets) { 326 goto free_htab;
327 htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket));
328 if (!htab->buckets)
329 goto free_htab;
330 }
331 327
332 for (i = 0; i < htab->n_buckets; i++) { 328 for (i = 0; i < htab->n_buckets; i++) {
333 INIT_HLIST_HEAD(&htab->buckets[i].head); 329 INIT_HLIST_HEAD(&htab->buckets[i].head);
@@ -354,7 +350,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
354free_extra_elems: 350free_extra_elems:
355 free_percpu(htab->extra_elems); 351 free_percpu(htab->extra_elems);
356free_buckets: 352free_buckets:
357 kvfree(htab->buckets); 353 bpf_map_area_free(htab->buckets);
358free_htab: 354free_htab:
359 kfree(htab); 355 kfree(htab);
360 return ERR_PTR(err); 356 return ERR_PTR(err);
@@ -1014,7 +1010,7 @@ static void htab_map_free(struct bpf_map *map)
1014 prealloc_destroy(htab); 1010 prealloc_destroy(htab);
1015 1011
1016 free_percpu(htab->extra_elems); 1012 free_percpu(htab->extra_elems);
1017 kvfree(htab->buckets); 1013 bpf_map_area_free(htab->buckets);
1018 kfree(htab); 1014 kfree(htab);
1019} 1015}
1020 1016
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 732ae16d12b7..be8519148c25 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -7,7 +7,6 @@
7#include <linux/bpf.h> 7#include <linux/bpf.h>
8#include <linux/jhash.h> 8#include <linux/jhash.h>
9#include <linux/filter.h> 9#include <linux/filter.h>
10#include <linux/vmalloc.h>
11#include <linux/stacktrace.h> 10#include <linux/stacktrace.h>
12#include <linux/perf_event.h> 11#include <linux/perf_event.h>
13#include "percpu_freelist.h" 12#include "percpu_freelist.h"
@@ -32,7 +31,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
32 u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; 31 u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
33 int err; 32 int err;
34 33
35 smap->elems = vzalloc(elem_size * smap->map.max_entries); 34 smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries);
36 if (!smap->elems) 35 if (!smap->elems)
37 return -ENOMEM; 36 return -ENOMEM;
38 37
@@ -45,7 +44,7 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
45 return 0; 44 return 0;
46 45
47free_elems: 46free_elems:
48 vfree(smap->elems); 47 bpf_map_area_free(smap->elems);
49 return err; 48 return err;
50} 49}
51 50
@@ -76,12 +75,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
76 if (cost >= U32_MAX - PAGE_SIZE) 75 if (cost >= U32_MAX - PAGE_SIZE)
77 return ERR_PTR(-E2BIG); 76 return ERR_PTR(-E2BIG);
78 77
79 smap = kzalloc(cost, GFP_USER | __GFP_NOWARN); 78 smap = bpf_map_area_alloc(cost);
80 if (!smap) { 79 if (!smap)
81 smap = vzalloc(cost); 80 return ERR_PTR(-ENOMEM);
82 if (!smap)
83 return ERR_PTR(-ENOMEM);
84 }
85 81
86 err = -E2BIG; 82 err = -E2BIG;
87 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); 83 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
@@ -112,7 +108,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
112put_buffers: 108put_buffers:
113 put_callchain_buffers(); 109 put_callchain_buffers();
114free_smap: 110free_smap:
115 kvfree(smap); 111 bpf_map_area_free(smap);
116 return ERR_PTR(err); 112 return ERR_PTR(err);
117} 113}
118 114
@@ -262,9 +258,9 @@ static void stack_map_free(struct bpf_map *map)
262 /* wait for bpf programs to complete before freeing stack map */ 258 /* wait for bpf programs to complete before freeing stack map */
263 synchronize_rcu(); 259 synchronize_rcu();
264 260
265 vfree(smap->elems); 261 bpf_map_area_free(smap->elems);
266 pcpu_freelist_destroy(&smap->freelist); 262 pcpu_freelist_destroy(&smap->freelist);
267 kvfree(smap); 263 bpf_map_area_free(smap);
268 put_callchain_buffers(); 264 put_callchain_buffers();
269} 265}
270 266
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1d6b29e4e2c3..19b6129eab23 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -12,6 +12,8 @@
12#include <linux/bpf.h> 12#include <linux/bpf.h>
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16#include <linux/mmzone.h>
15#include <linux/anon_inodes.h> 17#include <linux/anon_inodes.h>
16#include <linux/file.h> 18#include <linux/file.h>
17#include <linux/license.h> 19#include <linux/license.h>
@@ -49,6 +51,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
49 list_add(&tl->list_node, &bpf_map_types); 51 list_add(&tl->list_node, &bpf_map_types);
50} 52}
51 53
54void *bpf_map_area_alloc(size_t size)
55{
56 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
57 * trigger under memory pressure as we really just want to
58 * fail instead.
59 */
60 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
61 void *area;
62
63 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
64 area = kmalloc(size, GFP_USER | flags);
65 if (area != NULL)
66 return area;
67 }
68
69 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
70 PAGE_KERNEL);
71}
72
73void bpf_map_area_free(void *area)
74{
75 kvfree(area);
76}
77
52int bpf_map_precharge_memlock(u32 pages) 78int bpf_map_precharge_memlock(u32 pages)
53{ 79{
54 struct user_struct *user = get_current_user(); 80 struct user_struct *user = get_current_user();