aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2015-01-12 04:51:13 -0500
committerThomas Gleixner <tglx@linutronix.de>2015-01-12 04:51:13 -0500
commit2f5eaf66e580f64032b365a00157b6b58c266b37 (patch)
tree7852017c864f0eb3833782e2a017952bd8531458 /kernel/bpf
parentc291ee622165cb2c8d4e7af63fffd499354a23be (diff)
parent91d1179212161f220938198b742c328ad38fd0a3 (diff)
Merge tag 'irqchip-urgent-3.19' of git://git.infradead.org/users/jcooper/linux into irq/urgent
irqchip urgent fixes for v3.19 from Jason Cooper - mtk-sysirq: Fix error handling - hip04: Fix cpu map for 16bit value - gic-v3-its: Clear a warning regarding decimal constants - omap-intc: Fix legacy DMA regression - atmel-aic-common: Retain priority when changing type
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c156
-rw-r--r--kernel/bpf/hashtab.c367
-rw-r--r--kernel/bpf/helpers.c89
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/test_stub.c56
-rw-r--r--kernel/bpf/verifier.c171
7 files changed, 750 insertions, 97 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0daf7f6ae7df..a5ae60f0b0a2 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,5 @@
1obj-y := core.o 1obj-y := core.o
2obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o 2obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
3ifdef CONFIG_TEST_BPF 3ifdef CONFIG_TEST_BPF
4obj-$(CONFIG_BPF_SYSCALL) += test_stub.o 4obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
5endif 5endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
new file mode 100644
index 000000000000..9eb4d8a7cd87
--- /dev/null
+++ b/kernel/bpf/arraymap.c
@@ -0,0 +1,156 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/err.h>
14#include <linux/vmalloc.h>
15#include <linux/slab.h>
16#include <linux/mm.h>
17
18struct bpf_array {
19 struct bpf_map map;
20 u32 elem_size;
21 char value[0] __aligned(8);
22};
23
24/* Called from syscall */
25static struct bpf_map *array_map_alloc(union bpf_attr *attr)
26{
27 struct bpf_array *array;
28 u32 elem_size, array_size;
29
30 /* check sanity of attributes */
31 if (attr->max_entries == 0 || attr->key_size != 4 ||
32 attr->value_size == 0)
33 return ERR_PTR(-EINVAL);
34
35 elem_size = round_up(attr->value_size, 8);
36
37 /* check round_up into zero and u32 overflow */
38 if (elem_size == 0 ||
39 attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
40 return ERR_PTR(-ENOMEM);
41
42 array_size = sizeof(*array) + attr->max_entries * elem_size;
43
44 /* allocate all map elements and zero-initialize them */
45 array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
46 if (!array) {
47 array = vzalloc(array_size);
48 if (!array)
49 return ERR_PTR(-ENOMEM);
50 }
51
52 /* copy mandatory map attributes */
53 array->map.key_size = attr->key_size;
54 array->map.value_size = attr->value_size;
55 array->map.max_entries = attr->max_entries;
56
57 array->elem_size = elem_size;
58
59 return &array->map;
60}
61
62/* Called from syscall or from eBPF program */
63static void *array_map_lookup_elem(struct bpf_map *map, void *key)
64{
65 struct bpf_array *array = container_of(map, struct bpf_array, map);
66 u32 index = *(u32 *)key;
67
68 if (index >= array->map.max_entries)
69 return NULL;
70
71 return array->value + array->elem_size * index;
72}
73
74/* Called from syscall */
75static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
76{
77 struct bpf_array *array = container_of(map, struct bpf_array, map);
78 u32 index = *(u32 *)key;
79 u32 *next = (u32 *)next_key;
80
81 if (index >= array->map.max_entries) {
82 *next = 0;
83 return 0;
84 }
85
86 if (index == array->map.max_entries - 1)
87 return -ENOENT;
88
89 *next = index + 1;
90 return 0;
91}
92
93/* Called from syscall or from eBPF program */
94static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
95 u64 map_flags)
96{
97 struct bpf_array *array = container_of(map, struct bpf_array, map);
98 u32 index = *(u32 *)key;
99
100 if (map_flags > BPF_EXIST)
101 /* unknown flags */
102 return -EINVAL;
103
104 if (index >= array->map.max_entries)
105 /* all elements were pre-allocated, cannot insert a new one */
106 return -E2BIG;
107
108 if (map_flags == BPF_NOEXIST)
109 /* all elements already exist */
110 return -EEXIST;
111
112 memcpy(array->value + array->elem_size * index, value, array->elem_size);
113 return 0;
114}
115
116/* Called from syscall or from eBPF program */
117static int array_map_delete_elem(struct bpf_map *map, void *key)
118{
119 return -EINVAL;
120}
121
122/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
123static void array_map_free(struct bpf_map *map)
124{
125 struct bpf_array *array = container_of(map, struct bpf_array, map);
126
127 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
128 * so the programs (can be more than one that used this map) were
129 * disconnected from events. Wait for outstanding programs to complete
130 * and free the array
131 */
132 synchronize_rcu();
133
134 kvfree(array);
135}
136
137static struct bpf_map_ops array_ops = {
138 .map_alloc = array_map_alloc,
139 .map_free = array_map_free,
140 .map_get_next_key = array_map_get_next_key,
141 .map_lookup_elem = array_map_lookup_elem,
142 .map_update_elem = array_map_update_elem,
143 .map_delete_elem = array_map_delete_elem,
144};
145
146static struct bpf_map_type_list tl = {
147 .ops = &array_ops,
148 .type = BPF_MAP_TYPE_ARRAY,
149};
150
151static int __init register_array_map(void)
152{
153 bpf_register_map_type(&tl);
154 return 0;
155}
156late_initcall(register_array_map);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
new file mode 100644
index 000000000000..b3ba43674310
--- /dev/null
+++ b/kernel/bpf/hashtab.c
@@ -0,0 +1,367 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/jhash.h>
14#include <linux/filter.h>
15#include <linux/vmalloc.h>
16
17struct bpf_htab {
18 struct bpf_map map;
19 struct hlist_head *buckets;
20 spinlock_t lock;
21 u32 count; /* number of elements in this hashtable */
22 u32 n_buckets; /* number of hash buckets */
23 u32 elem_size; /* size of each element in bytes */
24};
25
26/* each htab element is struct htab_elem + key + value */
27struct htab_elem {
28 struct hlist_node hash_node;
29 struct rcu_head rcu;
30 u32 hash;
31 char key[0] __aligned(8);
32};
33
34/* Called from syscall */
35static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
36{
37 struct bpf_htab *htab;
38 int err, i;
39
40 htab = kzalloc(sizeof(*htab), GFP_USER);
41 if (!htab)
42 return ERR_PTR(-ENOMEM);
43
44 /* mandatory map attributes */
45 htab->map.key_size = attr->key_size;
46 htab->map.value_size = attr->value_size;
47 htab->map.max_entries = attr->max_entries;
48
49 /* check sanity of attributes.
50 * value_size == 0 may be allowed in the future to use map as a set
51 */
52 err = -EINVAL;
53 if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
54 htab->map.value_size == 0)
55 goto free_htab;
56
57 /* hash table size must be power of 2 */
58 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
59
60 err = -E2BIG;
61 if (htab->map.key_size > MAX_BPF_STACK)
62 /* eBPF programs initialize keys on stack, so they cannot be
63 * larger than max stack size
64 */
65 goto free_htab;
66
67 err = -ENOMEM;
68 /* prevent zero size kmalloc and check for u32 overflow */
69 if (htab->n_buckets == 0 ||
70 htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
71 goto free_htab;
72
73 htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
74 GFP_USER | __GFP_NOWARN);
75
76 if (!htab->buckets) {
77 htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head));
78 if (!htab->buckets)
79 goto free_htab;
80 }
81
82 for (i = 0; i < htab->n_buckets; i++)
83 INIT_HLIST_HEAD(&htab->buckets[i]);
84
85 spin_lock_init(&htab->lock);
86 htab->count = 0;
87
88 htab->elem_size = sizeof(struct htab_elem) +
89 round_up(htab->map.key_size, 8) +
90 htab->map.value_size;
91 return &htab->map;
92
93free_htab:
94 kfree(htab);
95 return ERR_PTR(err);
96}
97
98static inline u32 htab_map_hash(const void *key, u32 key_len)
99{
100 return jhash(key, key_len, 0);
101}
102
103static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
104{
105 return &htab->buckets[hash & (htab->n_buckets - 1)];
106}
107
108static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
109 void *key, u32 key_size)
110{
111 struct htab_elem *l;
112
113 hlist_for_each_entry_rcu(l, head, hash_node)
114 if (l->hash == hash && !memcmp(&l->key, key, key_size))
115 return l;
116
117 return NULL;
118}
119
120/* Called from syscall or from eBPF program */
121static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
122{
123 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
124 struct hlist_head *head;
125 struct htab_elem *l;
126 u32 hash, key_size;
127
128 /* Must be called with rcu_read_lock. */
129 WARN_ON_ONCE(!rcu_read_lock_held());
130
131 key_size = map->key_size;
132
133 hash = htab_map_hash(key, key_size);
134
135 head = select_bucket(htab, hash);
136
137 l = lookup_elem_raw(head, hash, key, key_size);
138
139 if (l)
140 return l->key + round_up(map->key_size, 8);
141
142 return NULL;
143}
144
145/* Called from syscall */
146static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
147{
148 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
149 struct hlist_head *head;
150 struct htab_elem *l, *next_l;
151 u32 hash, key_size;
152 int i;
153
154 WARN_ON_ONCE(!rcu_read_lock_held());
155
156 key_size = map->key_size;
157
158 hash = htab_map_hash(key, key_size);
159
160 head = select_bucket(htab, hash);
161
162 /* lookup the key */
163 l = lookup_elem_raw(head, hash, key, key_size);
164
165 if (!l) {
166 i = 0;
167 goto find_first_elem;
168 }
169
170 /* key was found, get next key in the same bucket */
171 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
172 struct htab_elem, hash_node);
173
174 if (next_l) {
175 /* if next elem in this hash list is non-zero, just return it */
176 memcpy(next_key, next_l->key, key_size);
177 return 0;
178 }
179
180 /* no more elements in this hash list, go to the next bucket */
181 i = hash & (htab->n_buckets - 1);
182 i++;
183
184find_first_elem:
185 /* iterate over buckets */
186 for (; i < htab->n_buckets; i++) {
187 head = select_bucket(htab, i);
188
189 /* pick first element in the bucket */
190 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
191 struct htab_elem, hash_node);
192 if (next_l) {
193 /* if it's not empty, just return it */
194 memcpy(next_key, next_l->key, key_size);
195 return 0;
196 }
197 }
198
199 /* itereated over all buckets and all elements */
200 return -ENOENT;
201}
202
203/* Called from syscall or from eBPF program */
204static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
205 u64 map_flags)
206{
207 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
208 struct htab_elem *l_new, *l_old;
209 struct hlist_head *head;
210 unsigned long flags;
211 u32 key_size;
212 int ret;
213
214 if (map_flags > BPF_EXIST)
215 /* unknown flags */
216 return -EINVAL;
217
218 WARN_ON_ONCE(!rcu_read_lock_held());
219
220 /* allocate new element outside of lock */
221 l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
222 if (!l_new)
223 return -ENOMEM;
224
225 key_size = map->key_size;
226
227 memcpy(l_new->key, key, key_size);
228 memcpy(l_new->key + round_up(key_size, 8), value, map->value_size);
229
230 l_new->hash = htab_map_hash(l_new->key, key_size);
231
232 /* bpf_map_update_elem() can be called in_irq() */
233 spin_lock_irqsave(&htab->lock, flags);
234
235 head = select_bucket(htab, l_new->hash);
236
237 l_old = lookup_elem_raw(head, l_new->hash, key, key_size);
238
239 if (!l_old && unlikely(htab->count >= map->max_entries)) {
240 /* if elem with this 'key' doesn't exist and we've reached
241 * max_entries limit, fail insertion of new elem
242 */
243 ret = -E2BIG;
244 goto err;
245 }
246
247 if (l_old && map_flags == BPF_NOEXIST) {
248 /* elem already exists */
249 ret = -EEXIST;
250 goto err;
251 }
252
253 if (!l_old && map_flags == BPF_EXIST) {
254 /* elem doesn't exist, cannot update it */
255 ret = -ENOENT;
256 goto err;
257 }
258
259 /* add new element to the head of the list, so that concurrent
260 * search will find it before old elem
261 */
262 hlist_add_head_rcu(&l_new->hash_node, head);
263 if (l_old) {
264 hlist_del_rcu(&l_old->hash_node);
265 kfree_rcu(l_old, rcu);
266 } else {
267 htab->count++;
268 }
269 spin_unlock_irqrestore(&htab->lock, flags);
270
271 return 0;
272err:
273 spin_unlock_irqrestore(&htab->lock, flags);
274 kfree(l_new);
275 return ret;
276}
277
278/* Called from syscall or from eBPF program */
279static int htab_map_delete_elem(struct bpf_map *map, void *key)
280{
281 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
282 struct hlist_head *head;
283 struct htab_elem *l;
284 unsigned long flags;
285 u32 hash, key_size;
286 int ret = -ENOENT;
287
288 WARN_ON_ONCE(!rcu_read_lock_held());
289
290 key_size = map->key_size;
291
292 hash = htab_map_hash(key, key_size);
293
294 spin_lock_irqsave(&htab->lock, flags);
295
296 head = select_bucket(htab, hash);
297
298 l = lookup_elem_raw(head, hash, key, key_size);
299
300 if (l) {
301 hlist_del_rcu(&l->hash_node);
302 htab->count--;
303 kfree_rcu(l, rcu);
304 ret = 0;
305 }
306
307 spin_unlock_irqrestore(&htab->lock, flags);
308 return ret;
309}
310
311static void delete_all_elements(struct bpf_htab *htab)
312{
313 int i;
314
315 for (i = 0; i < htab->n_buckets; i++) {
316 struct hlist_head *head = select_bucket(htab, i);
317 struct hlist_node *n;
318 struct htab_elem *l;
319
320 hlist_for_each_entry_safe(l, n, head, hash_node) {
321 hlist_del_rcu(&l->hash_node);
322 htab->count--;
323 kfree(l);
324 }
325 }
326}
327
328/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
329static void htab_map_free(struct bpf_map *map)
330{
331 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
332
333 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
334 * so the programs (can be more than one that used this map) were
335 * disconnected from events. Wait for outstanding critical sections in
336 * these programs to complete
337 */
338 synchronize_rcu();
339
340 /* some of kfree_rcu() callbacks for elements of this map may not have
341 * executed. It's ok. Proceed to free residual elements and map itself
342 */
343 delete_all_elements(htab);
344 kvfree(htab->buckets);
345 kfree(htab);
346}
347
348static struct bpf_map_ops htab_ops = {
349 .map_alloc = htab_map_alloc,
350 .map_free = htab_map_free,
351 .map_get_next_key = htab_map_get_next_key,
352 .map_lookup_elem = htab_map_lookup_elem,
353 .map_update_elem = htab_map_update_elem,
354 .map_delete_elem = htab_map_delete_elem,
355};
356
357static struct bpf_map_type_list tl = {
358 .ops = &htab_ops,
359 .type = BPF_MAP_TYPE_HASH,
360};
361
362static int __init register_htab_map(void)
363{
364 bpf_register_map_type(&tl);
365 return 0;
366}
367late_initcall(register_htab_map);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
new file mode 100644
index 000000000000..9e3414d85459
--- /dev/null
+++ b/kernel/bpf/helpers.c
@@ -0,0 +1,89 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/rcupdate.h>
14
15/* If kernel subsystem is allowing eBPF programs to call this function,
16 * inside its own verifier_ops->get_func_proto() callback it should return
17 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
18 *
19 * Different map implementations will rely on rcu in map methods
20 * lookup/update/delete, therefore eBPF programs must run under rcu lock
21 * if program is allowed to access maps, so check rcu_read_lock_held in
22 * all three functions.
23 */
24static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
25{
26 /* verifier checked that R1 contains a valid pointer to bpf_map
27 * and R2 points to a program stack and map->key_size bytes were
28 * initialized
29 */
30 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
31 void *key = (void *) (unsigned long) r2;
32 void *value;
33
34 WARN_ON_ONCE(!rcu_read_lock_held());
35
36 value = map->ops->map_lookup_elem(map, key);
37
38 /* lookup() returns either pointer to element value or NULL
39 * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
40 */
41 return (unsigned long) value;
42}
43
44struct bpf_func_proto bpf_map_lookup_elem_proto = {
45 .func = bpf_map_lookup_elem,
46 .gpl_only = false,
47 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
48 .arg1_type = ARG_CONST_MAP_PTR,
49 .arg2_type = ARG_PTR_TO_MAP_KEY,
50};
51
52static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
53{
54 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
55 void *key = (void *) (unsigned long) r2;
56 void *value = (void *) (unsigned long) r3;
57
58 WARN_ON_ONCE(!rcu_read_lock_held());
59
60 return map->ops->map_update_elem(map, key, value, r4);
61}
62
63struct bpf_func_proto bpf_map_update_elem_proto = {
64 .func = bpf_map_update_elem,
65 .gpl_only = false,
66 .ret_type = RET_INTEGER,
67 .arg1_type = ARG_CONST_MAP_PTR,
68 .arg2_type = ARG_PTR_TO_MAP_KEY,
69 .arg3_type = ARG_PTR_TO_MAP_VALUE,
70 .arg4_type = ARG_ANYTHING,
71};
72
73static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
74{
75 struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
76 void *key = (void *) (unsigned long) r2;
77
78 WARN_ON_ONCE(!rcu_read_lock_held());
79
80 return map->ops->map_delete_elem(map, key);
81}
82
83struct bpf_func_proto bpf_map_delete_elem_proto = {
84 .func = bpf_map_delete_elem,
85 .gpl_only = false,
86 .ret_type = RET_INTEGER,
87 .arg1_type = ARG_CONST_MAP_PTR,
88 .arg2_type = ARG_PTR_TO_MAP_KEY,
89};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ba61c8c16032..088ac0b1b106 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -169,7 +169,7 @@ static int map_lookup_elem(union bpf_attr *attr)
169 if (copy_from_user(key, ukey, map->key_size) != 0) 169 if (copy_from_user(key, ukey, map->key_size) != 0)
170 goto free_key; 170 goto free_key;
171 171
172 err = -ESRCH; 172 err = -ENOENT;
173 rcu_read_lock(); 173 rcu_read_lock();
174 value = map->ops->map_lookup_elem(map, key); 174 value = map->ops->map_lookup_elem(map, key);
175 if (!value) 175 if (!value)
@@ -190,7 +190,7 @@ err_put:
190 return err; 190 return err;
191} 191}
192 192
193#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value 193#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
194 194
195static int map_update_elem(union bpf_attr *attr) 195static int map_update_elem(union bpf_attr *attr)
196{ 196{
@@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr)
231 * therefore all map accessors rely on this fact, so do the same here 231 * therefore all map accessors rely on this fact, so do the same here
232 */ 232 */
233 rcu_read_lock(); 233 rcu_read_lock();
234 err = map->ops->map_update_elem(map, key, value); 234 err = map->ops->map_update_elem(map, key, value, attr->flags);
235 rcu_read_unlock(); 235 rcu_read_unlock();
236 236
237free_value: 237free_value:
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c
index fcaddff4003e..0ceae1e6e8b5 100644
--- a/kernel/bpf/test_stub.c
+++ b/kernel/bpf/test_stub.c
@@ -18,26 +18,18 @@ struct bpf_context {
18 u64 arg2; 18 u64 arg2;
19}; 19};
20 20
21static u64 test_func(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
22{
23 return 0;
24}
25
26static struct bpf_func_proto test_funcs[] = {
27 [BPF_FUNC_unspec] = {
28 .func = test_func,
29 .gpl_only = true,
30 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
31 .arg1_type = ARG_CONST_MAP_PTR,
32 .arg2_type = ARG_PTR_TO_MAP_KEY,
33 },
34};
35
36static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id) 21static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id)
37{ 22{
38 if (func_id < 0 || func_id >= ARRAY_SIZE(test_funcs)) 23 switch (func_id) {
24 case BPF_FUNC_map_lookup_elem:
25 return &bpf_map_lookup_elem_proto;
26 case BPF_FUNC_map_update_elem:
27 return &bpf_map_update_elem_proto;
28 case BPF_FUNC_map_delete_elem:
29 return &bpf_map_delete_elem_proto;
30 default:
39 return NULL; 31 return NULL;
40 return &test_funcs[func_id]; 32 }
41} 33}
42 34
43static const struct bpf_context_access { 35static const struct bpf_context_access {
@@ -78,38 +70,8 @@ static struct bpf_prog_type_list tl_prog = {
78 .type = BPF_PROG_TYPE_UNSPEC, 70 .type = BPF_PROG_TYPE_UNSPEC,
79}; 71};
80 72
81static struct bpf_map *test_map_alloc(union bpf_attr *attr)
82{
83 struct bpf_map *map;
84
85 map = kzalloc(sizeof(*map), GFP_USER);
86 if (!map)
87 return ERR_PTR(-ENOMEM);
88
89 map->key_size = attr->key_size;
90 map->value_size = attr->value_size;
91 map->max_entries = attr->max_entries;
92 return map;
93}
94
95static void test_map_free(struct bpf_map *map)
96{
97 kfree(map);
98}
99
100static struct bpf_map_ops test_map_ops = {
101 .map_alloc = test_map_alloc,
102 .map_free = test_map_free,
103};
104
105static struct bpf_map_type_list tl_map = {
106 .ops = &test_map_ops,
107 .type = BPF_MAP_TYPE_UNSPEC,
108};
109
110static int __init register_test_ops(void) 73static int __init register_test_ops(void)
111{ 74{
112 bpf_register_map_type(&tl_map);
113 bpf_register_prog_type(&tl_prog); 75 bpf_register_prog_type(&tl_prog);
114 return 0; 76 return 0;
115} 77}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9f81818f2941..a28e09c7825d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -153,22 +153,19 @@ struct reg_state {
153 153
154enum bpf_stack_slot_type { 154enum bpf_stack_slot_type {
155 STACK_INVALID, /* nothing was stored in this stack slot */ 155 STACK_INVALID, /* nothing was stored in this stack slot */
156 STACK_SPILL, /* 1st byte of register spilled into stack */ 156 STACK_SPILL, /* register spilled into stack */
157 STACK_SPILL_PART, /* other 7 bytes of register spill */
158 STACK_MISC /* BPF program wrote some data into this slot */ 157 STACK_MISC /* BPF program wrote some data into this slot */
159}; 158};
160 159
161struct bpf_stack_slot { 160#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
162 enum bpf_stack_slot_type stype;
163 struct reg_state reg_st;
164};
165 161
166/* state of the program: 162/* state of the program:
167 * type of all registers and stack info 163 * type of all registers and stack info
168 */ 164 */
169struct verifier_state { 165struct verifier_state {
170 struct reg_state regs[MAX_BPF_REG]; 166 struct reg_state regs[MAX_BPF_REG];
171 struct bpf_stack_slot stack[MAX_BPF_STACK]; 167 u8 stack_slot_type[MAX_BPF_STACK];
168 struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
172}; 169};
173 170
174/* linked list of verifier states used to prune search */ 171/* linked list of verifier states used to prune search */
@@ -259,10 +256,10 @@ static void print_verifier_state(struct verifier_env *env)
259 env->cur_state.regs[i].map_ptr->key_size, 256 env->cur_state.regs[i].map_ptr->key_size,
260 env->cur_state.regs[i].map_ptr->value_size); 257 env->cur_state.regs[i].map_ptr->value_size);
261 } 258 }
262 for (i = 0; i < MAX_BPF_STACK; i++) { 259 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
263 if (env->cur_state.stack[i].stype == STACK_SPILL) 260 if (env->cur_state.stack_slot_type[i] == STACK_SPILL)
264 verbose(" fp%d=%s", -MAX_BPF_STACK + i, 261 verbose(" fp%d=%s", -MAX_BPF_STACK + i,
265 reg_type_str[env->cur_state.stack[i].reg_st.type]); 262 reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]);
266 } 263 }
267 verbose("\n"); 264 verbose("\n");
268} 265}
@@ -539,8 +536,10 @@ static int bpf_size_to_bytes(int bpf_size)
539static int check_stack_write(struct verifier_state *state, int off, int size, 536static int check_stack_write(struct verifier_state *state, int off, int size,
540 int value_regno) 537 int value_regno)
541{ 538{
542 struct bpf_stack_slot *slot;
543 int i; 539 int i;
540 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
541 * so it's aligned access and [off, off + size) are within stack limits
542 */
544 543
545 if (value_regno >= 0 && 544 if (value_regno >= 0 &&
546 (state->regs[value_regno].type == PTR_TO_MAP_VALUE || 545 (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
@@ -548,30 +547,24 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
548 state->regs[value_regno].type == PTR_TO_CTX)) { 547 state->regs[value_regno].type == PTR_TO_CTX)) {
549 548
550 /* register containing pointer is being spilled into stack */ 549 /* register containing pointer is being spilled into stack */
551 if (size != 8) { 550 if (size != BPF_REG_SIZE) {
552 verbose("invalid size of register spill\n"); 551 verbose("invalid size of register spill\n");
553 return -EACCES; 552 return -EACCES;
554 } 553 }
555 554
556 slot = &state->stack[MAX_BPF_STACK + off];
557 slot->stype = STACK_SPILL;
558 /* save register state */ 555 /* save register state */
559 slot->reg_st = state->regs[value_regno]; 556 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
560 for (i = 1; i < 8; i++) { 557 state->regs[value_regno];
561 slot = &state->stack[MAX_BPF_STACK + off + i];
562 slot->stype = STACK_SPILL_PART;
563 slot->reg_st.type = UNKNOWN_VALUE;
564 slot->reg_st.map_ptr = NULL;
565 }
566 } else {
567 558
559 for (i = 0; i < BPF_REG_SIZE; i++)
560 state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
561 } else {
568 /* regular write of data into stack */ 562 /* regular write of data into stack */
569 for (i = 0; i < size; i++) { 563 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
570 slot = &state->stack[MAX_BPF_STACK + off + i]; 564 (struct reg_state) {};
571 slot->stype = STACK_MISC; 565
572 slot->reg_st.type = UNKNOWN_VALUE; 566 for (i = 0; i < size; i++)
573 slot->reg_st.map_ptr = NULL; 567 state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
574 }
575 } 568 }
576 return 0; 569 return 0;
577} 570}
@@ -579,19 +572,18 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
579static int check_stack_read(struct verifier_state *state, int off, int size, 572static int check_stack_read(struct verifier_state *state, int off, int size,
580 int value_regno) 573 int value_regno)
581{ 574{
575 u8 *slot_type;
582 int i; 576 int i;
583 struct bpf_stack_slot *slot;
584 577
585 slot = &state->stack[MAX_BPF_STACK + off]; 578 slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
586 579
587 if (slot->stype == STACK_SPILL) { 580 if (slot_type[0] == STACK_SPILL) {
588 if (size != 8) { 581 if (size != BPF_REG_SIZE) {
589 verbose("invalid size of register spill\n"); 582 verbose("invalid size of register spill\n");
590 return -EACCES; 583 return -EACCES;
591 } 584 }
592 for (i = 1; i < 8; i++) { 585 for (i = 1; i < BPF_REG_SIZE; i++) {
593 if (state->stack[MAX_BPF_STACK + off + i].stype != 586 if (slot_type[i] != STACK_SPILL) {
594 STACK_SPILL_PART) {
595 verbose("corrupted spill memory\n"); 587 verbose("corrupted spill memory\n");
596 return -EACCES; 588 return -EACCES;
597 } 589 }
@@ -599,12 +591,12 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
599 591
600 if (value_regno >= 0) 592 if (value_regno >= 0)
601 /* restore register state from stack */ 593 /* restore register state from stack */
602 state->regs[value_regno] = slot->reg_st; 594 state->regs[value_regno] =
595 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
603 return 0; 596 return 0;
604 } else { 597 } else {
605 for (i = 0; i < size; i++) { 598 for (i = 0; i < size; i++) {
606 if (state->stack[MAX_BPF_STACK + off + i].stype != 599 if (slot_type[i] != STACK_MISC) {
607 STACK_MISC) {
608 verbose("invalid read from stack off %d+%d size %d\n", 600 verbose("invalid read from stack off %d+%d size %d\n",
609 off, i, size); 601 off, i, size);
610 return -EACCES; 602 return -EACCES;
@@ -747,7 +739,7 @@ static int check_stack_boundary(struct verifier_env *env,
747 } 739 }
748 740
749 for (i = 0; i < access_size; i++) { 741 for (i = 0; i < access_size; i++) {
750 if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) { 742 if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
751 verbose("invalid indirect read from stack off %d+%d size %d\n", 743 verbose("invalid indirect read from stack off %d+%d size %d\n",
752 off, i, access_size); 744 off, i, access_size);
753 return -EACCES; 745 return -EACCES;
@@ -1180,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
1180 return 0; 1172 return 0;
1181} 1173}
1182 1174
1175/* verify safety of LD_ABS|LD_IND instructions:
1176 * - they can only appear in the programs where ctx == skb
1177 * - since they are wrappers of function calls, they scratch R1-R5 registers,
1178 * preserve R6-R9, and store return value into R0
1179 *
1180 * Implicit input:
1181 * ctx == skb == R6 == CTX
1182 *
1183 * Explicit input:
1184 * SRC == any register
1185 * IMM == 32-bit immediate
1186 *
1187 * Output:
1188 * R0 - 8/16/32-bit skb data converted to cpu endianness
1189 */
1190static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
1191{
1192 struct reg_state *regs = env->cur_state.regs;
1193 u8 mode = BPF_MODE(insn->code);
1194 struct reg_state *reg;
1195 int i, err;
1196
1197 if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
1198 verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
1199 return -EINVAL;
1200 }
1201
1202 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
1203 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
1204 verbose("BPF_LD_ABS uses reserved fields\n");
1205 return -EINVAL;
1206 }
1207
1208 /* check whether implicit source operand (register R6) is readable */
1209 err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
1210 if (err)
1211 return err;
1212
1213 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
1214 verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
1215 return -EINVAL;
1216 }
1217
1218 if (mode == BPF_IND) {
1219 /* check explicit source operand */
1220 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1221 if (err)
1222 return err;
1223 }
1224
1225 /* reset caller saved regs to unreadable */
1226 for (i = 0; i < CALLER_SAVED_REGS; i++) {
1227 reg = regs + caller_saved[i];
1228 reg->type = NOT_INIT;
1229 reg->imm = 0;
1230 }
1231
1232 /* mark destination R0 register as readable, since it contains
1233 * the value fetched from the packet
1234 */
1235 regs[BPF_REG_0].type = UNKNOWN_VALUE;
1236 return 0;
1237}
1238
1183/* non-recursive DFS pseudo code 1239/* non-recursive DFS pseudo code
1184 * 1 procedure DFS-iterative(G,v): 1240 * 1 procedure DFS-iterative(G,v):
1185 * 2 label v as discovered 1241 * 2 label v as discovered
@@ -1417,12 +1473,33 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
1417 } 1473 }
1418 1474
1419 for (i = 0; i < MAX_BPF_STACK; i++) { 1475 for (i = 0; i < MAX_BPF_STACK; i++) {
1420 if (memcmp(&old->stack[i], &cur->stack[i], 1476 if (old->stack_slot_type[i] == STACK_INVALID)
1421 sizeof(old->stack[0])) != 0) { 1477 continue;
1422 if (old->stack[i].stype == STACK_INVALID) 1478 if (old->stack_slot_type[i] != cur->stack_slot_type[i])
1423 continue; 1479 /* Ex: old explored (safe) state has STACK_SPILL in
1480 * this stack slot, but current has has STACK_MISC ->
1481 * this verifier states are not equivalent,
1482 * return false to continue verification of this path
1483 */
1424 return false; 1484 return false;
1425 } 1485 if (i % BPF_REG_SIZE)
1486 continue;
1487 if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
1488 &cur->spilled_regs[i / BPF_REG_SIZE],
1489 sizeof(old->spilled_regs[0])))
1490 /* when explored and current stack slot types are
1491 * the same, check that stored pointers types
1492 * are the same as well.
1493 * Ex: explored safe path could have stored
1494 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
1495 * but current path has stored:
1496 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
1497 * such verifier states are not equivalent.
1498 * return false to continue verification of this path
1499 */
1500 return false;
1501 else
1502 continue;
1426 } 1503 }
1427 return true; 1504 return true;
1428} 1505}
@@ -1664,8 +1741,10 @@ process_bpf_exit:
1664 u8 mode = BPF_MODE(insn->code); 1741 u8 mode = BPF_MODE(insn->code);
1665 1742
1666 if (mode == BPF_ABS || mode == BPF_IND) { 1743 if (mode == BPF_ABS || mode == BPF_IND) {
1667 verbose("LD_ABS is not supported yet\n"); 1744 err = check_ld_abs(env, insn);
1668 return -EINVAL; 1745 if (err)
1746 return err;
1747
1669 } else if (mode == BPF_IMM) { 1748 } else if (mode == BPF_IMM) {
1670 err = check_ld_imm(env, insn); 1749 err = check_ld_imm(env, insn);
1671 if (err) 1750 if (err)