diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2014-09-26 03:16:59 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-26 15:05:14 -0400 |
commit | db20fd2b01087bdfbe30bce314a198eefedcc42e (patch) | |
tree | c43a4e3edb46deae542c8e79d74d24703ffbdcca /kernel/bpf | |
parent | 749730ce42a2121e1c88350d69478bff3994b10a (diff) |
bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel
and userspace.
The maps are accessed from user space via BPF syscall, which has commands:
- create a map with given type and attributes
fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
returns fd or negative error
- lookup key in a given map referenced by fd
err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero and stores found elem into value or negative error
- create or update key/value pair in a given map
err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero or negative error
- find and delete element by key in a given map
err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key
- iterate map elements (based on input key return next_key)
err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->next_key
- close(fd) deletes the map
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/syscall.c | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 428a0e23adc0..f94349ecaf61 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/anon_inodes.h> | 15 | #include <linux/anon_inodes.h> |
16 | #include <linux/file.h> | ||
16 | 17 | ||
17 | static LIST_HEAD(bpf_map_types); | 18 | static LIST_HEAD(bpf_map_types); |
18 | 19 | ||
@@ -111,6 +112,228 @@ free_map: | |||
111 | return err; | 112 | return err; |
112 | } | 113 | } |
113 | 114 | ||
115 | /* if error is returned, fd is released. | ||
116 | * On success caller should complete fd access with matching fdput() | ||
117 | */ | ||
118 | struct bpf_map *bpf_map_get(struct fd f) | ||
119 | { | ||
120 | struct bpf_map *map; | ||
121 | |||
122 | if (!f.file) | ||
123 | return ERR_PTR(-EBADF); | ||
124 | |||
125 | if (f.file->f_op != &bpf_map_fops) { | ||
126 | fdput(f); | ||
127 | return ERR_PTR(-EINVAL); | ||
128 | } | ||
129 | |||
130 | map = f.file->private_data; | ||
131 | |||
132 | return map; | ||
133 | } | ||
134 | |||
135 | /* helper to convert user pointers passed inside __aligned_u64 fields */ | ||
136 | static void __user *u64_to_ptr(__u64 val) | ||
137 | { | ||
138 | return (void __user *) (unsigned long) val; | ||
139 | } | ||
140 | |||
141 | /* last field in 'union bpf_attr' used by this command */ | ||
142 | #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value | ||
143 | |||
144 | static int map_lookup_elem(union bpf_attr *attr) | ||
145 | { | ||
146 | void __user *ukey = u64_to_ptr(attr->key); | ||
147 | void __user *uvalue = u64_to_ptr(attr->value); | ||
148 | int ufd = attr->map_fd; | ||
149 | struct fd f = fdget(ufd); | ||
150 | struct bpf_map *map; | ||
151 | void *key, *value; | ||
152 | int err; | ||
153 | |||
154 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) | ||
155 | return -EINVAL; | ||
156 | |||
157 | map = bpf_map_get(f); | ||
158 | if (IS_ERR(map)) | ||
159 | return PTR_ERR(map); | ||
160 | |||
161 | err = -ENOMEM; | ||
162 | key = kmalloc(map->key_size, GFP_USER); | ||
163 | if (!key) | ||
164 | goto err_put; | ||
165 | |||
166 | err = -EFAULT; | ||
167 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
168 | goto free_key; | ||
169 | |||
170 | err = -ESRCH; | ||
171 | rcu_read_lock(); | ||
172 | value = map->ops->map_lookup_elem(map, key); | ||
173 | if (!value) | ||
174 | goto err_unlock; | ||
175 | |||
176 | err = -EFAULT; | ||
177 | if (copy_to_user(uvalue, value, map->value_size) != 0) | ||
178 | goto err_unlock; | ||
179 | |||
180 | err = 0; | ||
181 | |||
182 | err_unlock: | ||
183 | rcu_read_unlock(); | ||
184 | free_key: | ||
185 | kfree(key); | ||
186 | err_put: | ||
187 | fdput(f); | ||
188 | return err; | ||
189 | } | ||
190 | |||
191 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | ||
192 | |||
193 | static int map_update_elem(union bpf_attr *attr) | ||
194 | { | ||
195 | void __user *ukey = u64_to_ptr(attr->key); | ||
196 | void __user *uvalue = u64_to_ptr(attr->value); | ||
197 | int ufd = attr->map_fd; | ||
198 | struct fd f = fdget(ufd); | ||
199 | struct bpf_map *map; | ||
200 | void *key, *value; | ||
201 | int err; | ||
202 | |||
203 | if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) | ||
204 | return -EINVAL; | ||
205 | |||
206 | map = bpf_map_get(f); | ||
207 | if (IS_ERR(map)) | ||
208 | return PTR_ERR(map); | ||
209 | |||
210 | err = -ENOMEM; | ||
211 | key = kmalloc(map->key_size, GFP_USER); | ||
212 | if (!key) | ||
213 | goto err_put; | ||
214 | |||
215 | err = -EFAULT; | ||
216 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
217 | goto free_key; | ||
218 | |||
219 | err = -ENOMEM; | ||
220 | value = kmalloc(map->value_size, GFP_USER); | ||
221 | if (!value) | ||
222 | goto free_key; | ||
223 | |||
224 | err = -EFAULT; | ||
225 | if (copy_from_user(value, uvalue, map->value_size) != 0) | ||
226 | goto free_value; | ||
227 | |||
228 | /* eBPF program that use maps are running under rcu_read_lock(), | ||
229 | * therefore all map accessors rely on this fact, so do the same here | ||
230 | */ | ||
231 | rcu_read_lock(); | ||
232 | err = map->ops->map_update_elem(map, key, value); | ||
233 | rcu_read_unlock(); | ||
234 | |||
235 | free_value: | ||
236 | kfree(value); | ||
237 | free_key: | ||
238 | kfree(key); | ||
239 | err_put: | ||
240 | fdput(f); | ||
241 | return err; | ||
242 | } | ||
243 | |||
244 | #define BPF_MAP_DELETE_ELEM_LAST_FIELD key | ||
245 | |||
246 | static int map_delete_elem(union bpf_attr *attr) | ||
247 | { | ||
248 | void __user *ukey = u64_to_ptr(attr->key); | ||
249 | int ufd = attr->map_fd; | ||
250 | struct fd f = fdget(ufd); | ||
251 | struct bpf_map *map; | ||
252 | void *key; | ||
253 | int err; | ||
254 | |||
255 | if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) | ||
256 | return -EINVAL; | ||
257 | |||
258 | map = bpf_map_get(f); | ||
259 | if (IS_ERR(map)) | ||
260 | return PTR_ERR(map); | ||
261 | |||
262 | err = -ENOMEM; | ||
263 | key = kmalloc(map->key_size, GFP_USER); | ||
264 | if (!key) | ||
265 | goto err_put; | ||
266 | |||
267 | err = -EFAULT; | ||
268 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
269 | goto free_key; | ||
270 | |||
271 | rcu_read_lock(); | ||
272 | err = map->ops->map_delete_elem(map, key); | ||
273 | rcu_read_unlock(); | ||
274 | |||
275 | free_key: | ||
276 | kfree(key); | ||
277 | err_put: | ||
278 | fdput(f); | ||
279 | return err; | ||
280 | } | ||
281 | |||
282 | /* last field in 'union bpf_attr' used by this command */ | ||
283 | #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key | ||
284 | |||
285 | static int map_get_next_key(union bpf_attr *attr) | ||
286 | { | ||
287 | void __user *ukey = u64_to_ptr(attr->key); | ||
288 | void __user *unext_key = u64_to_ptr(attr->next_key); | ||
289 | int ufd = attr->map_fd; | ||
290 | struct fd f = fdget(ufd); | ||
291 | struct bpf_map *map; | ||
292 | void *key, *next_key; | ||
293 | int err; | ||
294 | |||
295 | if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) | ||
296 | return -EINVAL; | ||
297 | |||
298 | map = bpf_map_get(f); | ||
299 | if (IS_ERR(map)) | ||
300 | return PTR_ERR(map); | ||
301 | |||
302 | err = -ENOMEM; | ||
303 | key = kmalloc(map->key_size, GFP_USER); | ||
304 | if (!key) | ||
305 | goto err_put; | ||
306 | |||
307 | err = -EFAULT; | ||
308 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
309 | goto free_key; | ||
310 | |||
311 | err = -ENOMEM; | ||
312 | next_key = kmalloc(map->key_size, GFP_USER); | ||
313 | if (!next_key) | ||
314 | goto free_key; | ||
315 | |||
316 | rcu_read_lock(); | ||
317 | err = map->ops->map_get_next_key(map, key, next_key); | ||
318 | rcu_read_unlock(); | ||
319 | if (err) | ||
320 | goto free_next_key; | ||
321 | |||
322 | err = -EFAULT; | ||
323 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | ||
324 | goto free_next_key; | ||
325 | |||
326 | err = 0; | ||
327 | |||
328 | free_next_key: | ||
329 | kfree(next_key); | ||
330 | free_key: | ||
331 | kfree(key); | ||
332 | err_put: | ||
333 | fdput(f); | ||
334 | return err; | ||
335 | } | ||
336 | |||
114 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | 337 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
115 | { | 338 | { |
116 | union bpf_attr attr = {}; | 339 | union bpf_attr attr = {}; |
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz | |||
160 | case BPF_MAP_CREATE: | 383 | case BPF_MAP_CREATE: |
161 | err = map_create(&attr); | 384 | err = map_create(&attr); |
162 | break; | 385 | break; |
386 | case BPF_MAP_LOOKUP_ELEM: | ||
387 | err = map_lookup_elem(&attr); | ||
388 | break; | ||
389 | case BPF_MAP_UPDATE_ELEM: | ||
390 | err = map_update_elem(&attr); | ||
391 | break; | ||
392 | case BPF_MAP_DELETE_ELEM: | ||
393 | err = map_delete_elem(&attr); | ||
394 | break; | ||
395 | case BPF_MAP_GET_NEXT_KEY: | ||
396 | err = map_get_next_key(&attr); | ||
397 | break; | ||
163 | default: | 398 | default: |
164 | err = -EINVAL; | 399 | err = -EINVAL; |
165 | break; | 400 | break; |