diff options
author | Alexei Starovoitov <ast@fb.com> | 2016-02-02 01:39:55 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-02-06 03:34:36 -0500 |
commit | 15a07b33814d14ca817887dbea8530728dc0fbe4 (patch) | |
tree | cf33026c34f2fbbe72e6ba3dd8079e3b0afa00ad /kernel/bpf/syscall.c | |
parent | a10423b87a7eae75da79ce80a8d9475047a674ee (diff) |
bpf: add lookup/update support for per-cpu hash and array maps
The functions bpf_map_lookup_elem(map, key, value) and
bpf_map_update_elem(map, key, value, flags) need to get/set
values from all-cpus for per-cpu hash and array maps,
so that user space can aggregate/update them as necessary.
Example of single counter aggregation in user space:
unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
long values[nr_cpus];
long value = 0;
bpf_lookup_elem(fd, key, values);
for (i = 0; i < nr_cpus; i++)
value += values[i];
The user space must provide round_up(value_size, 8) * nr_cpus
array to get/set values, since kernel will use 'long' copy
of per-cpu values to try to copy good counters atomically.
It's a best-effort, since bpf programs and user space are racing
to access the same memory.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 57 |
1 files changed, 40 insertions, 17 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 637397059f76..c95a753c2007 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -239,6 +239,7 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
239 | int ufd = attr->map_fd; | 239 | int ufd = attr->map_fd; |
240 | struct bpf_map *map; | 240 | struct bpf_map *map; |
241 | void *key, *value, *ptr; | 241 | void *key, *value, *ptr; |
242 | u32 value_size; | ||
242 | struct fd f; | 243 | struct fd f; |
243 | int err; | 244 | int err; |
244 | 245 | ||
@@ -259,23 +260,35 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
259 | if (copy_from_user(key, ukey, map->key_size) != 0) | 260 | if (copy_from_user(key, ukey, map->key_size) != 0) |
260 | goto free_key; | 261 | goto free_key; |
261 | 262 | ||
263 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
264 | map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) | ||
265 | value_size = round_up(map->value_size, 8) * num_possible_cpus(); | ||
266 | else | ||
267 | value_size = map->value_size; | ||
268 | |||
262 | err = -ENOMEM; | 269 | err = -ENOMEM; |
263 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); | 270 | value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); |
264 | if (!value) | 271 | if (!value) |
265 | goto free_key; | 272 | goto free_key; |
266 | 273 | ||
267 | rcu_read_lock(); | 274 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { |
268 | ptr = map->ops->map_lookup_elem(map, key); | 275 | err = bpf_percpu_hash_copy(map, key, value); |
269 | if (ptr) | 276 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
270 | memcpy(value, ptr, map->value_size); | 277 | err = bpf_percpu_array_copy(map, key, value); |
271 | rcu_read_unlock(); | 278 | } else { |
279 | rcu_read_lock(); | ||
280 | ptr = map->ops->map_lookup_elem(map, key); | ||
281 | if (ptr) | ||
282 | memcpy(value, ptr, value_size); | ||
283 | rcu_read_unlock(); | ||
284 | err = ptr ? 0 : -ENOENT; | ||
285 | } | ||
272 | 286 | ||
273 | err = -ENOENT; | 287 | if (err) |
274 | if (!ptr) | ||
275 | goto free_value; | 288 | goto free_value; |
276 | 289 | ||
277 | err = -EFAULT; | 290 | err = -EFAULT; |
278 | if (copy_to_user(uvalue, value, map->value_size) != 0) | 291 | if (copy_to_user(uvalue, value, value_size) != 0) |
279 | goto free_value; | 292 | goto free_value; |
280 | 293 | ||
281 | err = 0; | 294 | err = 0; |
@@ -298,6 +311,7 @@ static int map_update_elem(union bpf_attr *attr) | |||
298 | int ufd = attr->map_fd; | 311 | int ufd = attr->map_fd; |
299 | struct bpf_map *map; | 312 | struct bpf_map *map; |
300 | void *key, *value; | 313 | void *key, *value; |
314 | u32 value_size; | ||
301 | struct fd f; | 315 | struct fd f; |
302 | int err; | 316 | int err; |
303 | 317 | ||
@@ -318,21 +332,30 @@ static int map_update_elem(union bpf_attr *attr) | |||
318 | if (copy_from_user(key, ukey, map->key_size) != 0) | 332 | if (copy_from_user(key, ukey, map->key_size) != 0) |
319 | goto free_key; | 333 | goto free_key; |
320 | 334 | ||
335 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
336 | map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) | ||
337 | value_size = round_up(map->value_size, 8) * num_possible_cpus(); | ||
338 | else | ||
339 | value_size = map->value_size; | ||
340 | |||
321 | err = -ENOMEM; | 341 | err = -ENOMEM; |
322 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); | 342 | value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); |
323 | if (!value) | 343 | if (!value) |
324 | goto free_key; | 344 | goto free_key; |
325 | 345 | ||
326 | err = -EFAULT; | 346 | err = -EFAULT; |
327 | if (copy_from_user(value, uvalue, map->value_size) != 0) | 347 | if (copy_from_user(value, uvalue, value_size) != 0) |
328 | goto free_value; | 348 | goto free_value; |
329 | 349 | ||
330 | /* eBPF program that use maps are running under rcu_read_lock(), | 350 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { |
331 | * therefore all map accessors rely on this fact, so do the same here | 351 | err = bpf_percpu_hash_update(map, key, value, attr->flags); |
332 | */ | 352 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
333 | rcu_read_lock(); | 353 | err = bpf_percpu_array_update(map, key, value, attr->flags); |
334 | err = map->ops->map_update_elem(map, key, value, attr->flags); | 354 | } else { |
335 | rcu_read_unlock(); | 355 | rcu_read_lock(); |
356 | err = map->ops->map_update_elem(map, key, value, attr->flags); | ||
357 | rcu_read_unlock(); | ||
358 | } | ||
336 | 359 | ||
337 | free_value: | 360 | free_value: |
338 | kfree(value); | 361 | kfree(value); |