aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2014-09-26 03:16:59 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-26 15:05:14 -0400
commitdb20fd2b01087bdfbe30bce314a198eefedcc42e (patch)
treec43a4e3edb46deae542c8e79d74d24703ffbdcca
parent749730ce42a2121e1c88350d69478bff3994b10a (diff)
bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel and userspace. The maps are accessed from user space via BPF syscall, which has commands: - create a map with given type and attributes fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) returns fd or negative error - lookup key in a given map referenced by fd err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero and stores found elem into value or negative error - create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero or negative error - find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key - iterate map elements (based on input key return next_key) err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->next_key - close(fd) deletes the map Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf.h8
-rw-r--r--include/uapi/linux/bpf.h38
-rw-r--r--kernel/bpf/syscall.c235
3 files changed, 281 insertions, 0 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a71f0fe..2887f3f9da59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
9 9
10#include <uapi/linux/bpf.h> 10#include <uapi/linux/bpf.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/file.h>
12 13
13struct bpf_map; 14struct bpf_map;
14 15
@@ -17,6 +18,12 @@ struct bpf_map_ops {
17 /* funcs callable from userspace (via syscall) */ 18 /* funcs callable from userspace (via syscall) */
18 struct bpf_map *(*map_alloc)(union bpf_attr *attr); 19 struct bpf_map *(*map_alloc)(union bpf_attr *attr);
19 void (*map_free)(struct bpf_map *); 20 void (*map_free)(struct bpf_map *);
21 int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
22
23 /* funcs callable from userspace and from eBPF programs */
24 void *(*map_lookup_elem)(struct bpf_map *map, void *key);
25 int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
26 int (*map_delete_elem)(struct bpf_map *map, void *key);
20}; 27};
21 28
22struct bpf_map { 29struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
37 44
38void bpf_register_map_type(struct bpf_map_type_list *tl); 45void bpf_register_map_type(struct bpf_map_type_list *tl);
39void bpf_map_put(struct bpf_map *map); 46void bpf_map_put(struct bpf_map *map);
47struct bpf_map *bpf_map_get(struct fd f);
40 48
41#endif /* _LINUX_BPF_H */ 49#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f9670c..395cabd2ca0a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
70 * map is deleted when fd is closed 70 * map is deleted when fd is closed
71 */ 71 */
72 BPF_MAP_CREATE, 72 BPF_MAP_CREATE,
73
74 /* lookup key in a given map
75 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
76 * Using attr->map_fd, attr->key, attr->value
77 * returns zero and stores found elem into value
78 * or negative error
79 */
80 BPF_MAP_LOOKUP_ELEM,
81
82 /* create or update key/value pair in a given map
83 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
84 * Using attr->map_fd, attr->key, attr->value
85 * returns zero or negative error
86 */
87 BPF_MAP_UPDATE_ELEM,
88
89 /* find and delete elem by key in a given map
90 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
91 * Using attr->map_fd, attr->key
92 * returns zero or negative error
93 */
94 BPF_MAP_DELETE_ELEM,
95
96 /* lookup key in a given map and return next key
97 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
98 * Using attr->map_fd, attr->key, attr->next_key
99 * returns zero and stores next key or negative error
100 */
101 BPF_MAP_GET_NEXT_KEY,
73}; 102};
74 103
75enum bpf_map_type { 104enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
83 __u32 value_size; /* size of value in bytes */ 112 __u32 value_size; /* size of value in bytes */
84 __u32 max_entries; /* max number of entries in a map */ 113 __u32 max_entries; /* max number of entries in a map */
85 }; 114 };
115
116 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
117 __u32 map_fd;
118 __aligned_u64 key;
119 union {
120 __aligned_u64 value;
121 __aligned_u64 next_key;
122 };
123 };
86} __attribute__((aligned(8))); 124} __attribute__((aligned(8)));
87 125
88#endif /* _UAPI__LINUX_BPF_H__ */ 126#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 428a0e23adc0..f94349ecaf61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/anon_inodes.h> 15#include <linux/anon_inodes.h>
16#include <linux/file.h>
16 17
17static LIST_HEAD(bpf_map_types); 18static LIST_HEAD(bpf_map_types);
18 19
@@ -111,6 +112,228 @@ free_map:
111 return err; 112 return err;
112} 113}
113 114
115/* if error is returned, fd is released.
116 * On success caller should complete fd access with matching fdput()
117 */
118struct bpf_map *bpf_map_get(struct fd f)
119{
120 struct bpf_map *map;
121
122 if (!f.file)
123 return ERR_PTR(-EBADF);
124
125 if (f.file->f_op != &bpf_map_fops) {
126 fdput(f);
127 return ERR_PTR(-EINVAL);
128 }
129
130 map = f.file->private_data;
131
132 return map;
133}
134
135/* helper to convert user pointers passed inside __aligned_u64 fields */
136static void __user *u64_to_ptr(__u64 val)
137{
138 return (void __user *) (unsigned long) val;
139}
140
141/* last field in 'union bpf_attr' used by this command */
142#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
143
144static int map_lookup_elem(union bpf_attr *attr)
145{
146 void __user *ukey = u64_to_ptr(attr->key);
147 void __user *uvalue = u64_to_ptr(attr->value);
148 int ufd = attr->map_fd;
149 struct fd f = fdget(ufd);
150 struct bpf_map *map;
151 void *key, *value;
152 int err;
153
154 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
155 return -EINVAL;
156
157 map = bpf_map_get(f);
158 if (IS_ERR(map))
159 return PTR_ERR(map);
160
161 err = -ENOMEM;
162 key = kmalloc(map->key_size, GFP_USER);
163 if (!key)
164 goto err_put;
165
166 err = -EFAULT;
167 if (copy_from_user(key, ukey, map->key_size) != 0)
168 goto free_key;
169
170 err = -ESRCH;
171 rcu_read_lock();
172 value = map->ops->map_lookup_elem(map, key);
173 if (!value)
174 goto err_unlock;
175
176 err = -EFAULT;
177 if (copy_to_user(uvalue, value, map->value_size) != 0)
178 goto err_unlock;
179
180 err = 0;
181
182err_unlock:
183 rcu_read_unlock();
184free_key:
185 kfree(key);
186err_put:
187 fdput(f);
188 return err;
189}
190
191#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
192
193static int map_update_elem(union bpf_attr *attr)
194{
195 void __user *ukey = u64_to_ptr(attr->key);
196 void __user *uvalue = u64_to_ptr(attr->value);
197 int ufd = attr->map_fd;
198 struct fd f = fdget(ufd);
199 struct bpf_map *map;
200 void *key, *value;
201 int err;
202
203 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
204 return -EINVAL;
205
206 map = bpf_map_get(f);
207 if (IS_ERR(map))
208 return PTR_ERR(map);
209
210 err = -ENOMEM;
211 key = kmalloc(map->key_size, GFP_USER);
212 if (!key)
213 goto err_put;
214
215 err = -EFAULT;
216 if (copy_from_user(key, ukey, map->key_size) != 0)
217 goto free_key;
218
219 err = -ENOMEM;
220 value = kmalloc(map->value_size, GFP_USER);
221 if (!value)
222 goto free_key;
223
224 err = -EFAULT;
225 if (copy_from_user(value, uvalue, map->value_size) != 0)
226 goto free_value;
227
228 /* eBPF program that use maps are running under rcu_read_lock(),
229 * therefore all map accessors rely on this fact, so do the same here
230 */
231 rcu_read_lock();
232 err = map->ops->map_update_elem(map, key, value);
233 rcu_read_unlock();
234
235free_value:
236 kfree(value);
237free_key:
238 kfree(key);
239err_put:
240 fdput(f);
241 return err;
242}
243
244#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
245
246static int map_delete_elem(union bpf_attr *attr)
247{
248 void __user *ukey = u64_to_ptr(attr->key);
249 int ufd = attr->map_fd;
250 struct fd f = fdget(ufd);
251 struct bpf_map *map;
252 void *key;
253 int err;
254
255 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
256 return -EINVAL;
257
258 map = bpf_map_get(f);
259 if (IS_ERR(map))
260 return PTR_ERR(map);
261
262 err = -ENOMEM;
263 key = kmalloc(map->key_size, GFP_USER);
264 if (!key)
265 goto err_put;
266
267 err = -EFAULT;
268 if (copy_from_user(key, ukey, map->key_size) != 0)
269 goto free_key;
270
271 rcu_read_lock();
272 err = map->ops->map_delete_elem(map, key);
273 rcu_read_unlock();
274
275free_key:
276 kfree(key);
277err_put:
278 fdput(f);
279 return err;
280}
281
282/* last field in 'union bpf_attr' used by this command */
283#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
284
285static int map_get_next_key(union bpf_attr *attr)
286{
287 void __user *ukey = u64_to_ptr(attr->key);
288 void __user *unext_key = u64_to_ptr(attr->next_key);
289 int ufd = attr->map_fd;
290 struct fd f = fdget(ufd);
291 struct bpf_map *map;
292 void *key, *next_key;
293 int err;
294
295 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
296 return -EINVAL;
297
298 map = bpf_map_get(f);
299 if (IS_ERR(map))
300 return PTR_ERR(map);
301
302 err = -ENOMEM;
303 key = kmalloc(map->key_size, GFP_USER);
304 if (!key)
305 goto err_put;
306
307 err = -EFAULT;
308 if (copy_from_user(key, ukey, map->key_size) != 0)
309 goto free_key;
310
311 err = -ENOMEM;
312 next_key = kmalloc(map->key_size, GFP_USER);
313 if (!next_key)
314 goto free_key;
315
316 rcu_read_lock();
317 err = map->ops->map_get_next_key(map, key, next_key);
318 rcu_read_unlock();
319 if (err)
320 goto free_next_key;
321
322 err = -EFAULT;
323 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
324 goto free_next_key;
325
326 err = 0;
327
328free_next_key:
329 kfree(next_key);
330free_key:
331 kfree(key);
332err_put:
333 fdput(f);
334 return err;
335}
336
114SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 337SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
115{ 338{
116 union bpf_attr attr = {}; 339 union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
160 case BPF_MAP_CREATE: 383 case BPF_MAP_CREATE:
161 err = map_create(&attr); 384 err = map_create(&attr);
162 break; 385 break;
386 case BPF_MAP_LOOKUP_ELEM:
387 err = map_lookup_elem(&attr);
388 break;
389 case BPF_MAP_UPDATE_ELEM:
390 err = map_update_elem(&attr);
391 break;
392 case BPF_MAP_DELETE_ELEM:
393 err = map_delete_elem(&attr);
394 break;
395 case BPF_MAP_GET_NEXT_KEY:
396 err = map_get_next_key(&attr);
397 break;
163 default: 398 default:
164 err = -EINVAL; 399 err = -EINVAL;
165 break; 400 break;