diff options
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r-- | kernel/bpf/hashtab.c | 84 |
1 files changed, 73 insertions, 11 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fff3650d52fc..570eeca7bdfa 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -26,11 +26,18 @@ struct bpf_htab { | |||
26 | struct bucket *buckets; | 26 | struct bucket *buckets; |
27 | void *elems; | 27 | void *elems; |
28 | struct pcpu_freelist freelist; | 28 | struct pcpu_freelist freelist; |
29 | void __percpu *extra_elems; | ||
29 | atomic_t count; /* number of elements in this hashtable */ | 30 | atomic_t count; /* number of elements in this hashtable */ |
30 | u32 n_buckets; /* number of hash buckets */ | 31 | u32 n_buckets; /* number of hash buckets */ |
31 | u32 elem_size; /* size of each element in bytes */ | 32 | u32 elem_size; /* size of each element in bytes */ |
32 | }; | 33 | }; |
33 | 34 | ||
35 | enum extra_elem_state { | ||
36 | HTAB_NOT_AN_EXTRA_ELEM = 0, | ||
37 | HTAB_EXTRA_ELEM_FREE, | ||
38 | HTAB_EXTRA_ELEM_USED | ||
39 | }; | ||
40 | |||
34 | /* each htab element is struct htab_elem + key + value */ | 41 | /* each htab element is struct htab_elem + key + value */ |
35 | struct htab_elem { | 42 | struct htab_elem { |
36 | union { | 43 | union { |
@@ -38,7 +45,10 @@ struct htab_elem { | |||
38 | struct bpf_htab *htab; | 45 | struct bpf_htab *htab; |
39 | struct pcpu_freelist_node fnode; | 46 | struct pcpu_freelist_node fnode; |
40 | }; | 47 | }; |
41 | struct rcu_head rcu; | 48 | union { |
49 | struct rcu_head rcu; | ||
50 | enum extra_elem_state state; | ||
51 | }; | ||
42 | u32 hash; | 52 | u32 hash; |
43 | char key[0] __aligned(8); | 53 | char key[0] __aligned(8); |
44 | }; | 54 | }; |
@@ -113,6 +123,23 @@ free_elems: | |||
113 | return err; | 123 | return err; |
114 | } | 124 | } |
115 | 125 | ||
126 | static int alloc_extra_elems(struct bpf_htab *htab) | ||
127 | { | ||
128 | void __percpu *pptr; | ||
129 | int cpu; | ||
130 | |||
131 | pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); | ||
132 | if (!pptr) | ||
133 | return -ENOMEM; | ||
134 | |||
135 | for_each_possible_cpu(cpu) { | ||
136 | ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = | ||
137 | HTAB_EXTRA_ELEM_FREE; | ||
138 | } | ||
139 | htab->extra_elems = pptr; | ||
140 | return 0; | ||
141 | } | ||
142 | |||
116 | /* Called from syscall */ | 143 | /* Called from syscall */ |
117 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | 144 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) |
118 | { | 145 | { |
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
185 | if (percpu) | 212 | if (percpu) |
186 | cost += (u64) round_up(htab->map.value_size, 8) * | 213 | cost += (u64) round_up(htab->map.value_size, 8) * |
187 | num_possible_cpus() * htab->map.max_entries; | 214 | num_possible_cpus() * htab->map.max_entries; |
215 | else | ||
216 | cost += (u64) htab->elem_size * num_possible_cpus(); | ||
188 | 217 | ||
189 | if (cost >= U32_MAX - PAGE_SIZE) | 218 | if (cost >= U32_MAX - PAGE_SIZE) |
190 | /* make sure page count doesn't overflow */ | 219 | /* make sure page count doesn't overflow */ |
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
212 | raw_spin_lock_init(&htab->buckets[i].lock); | 241 | raw_spin_lock_init(&htab->buckets[i].lock); |
213 | } | 242 | } |
214 | 243 | ||
244 | if (!percpu) { | ||
245 | err = alloc_extra_elems(htab); | ||
246 | if (err) | ||
247 | goto free_buckets; | ||
248 | } | ||
249 | |||
215 | if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { | 250 | if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { |
216 | err = prealloc_elems_and_freelist(htab); | 251 | err = prealloc_elems_and_freelist(htab); |
217 | if (err) | 252 | if (err) |
218 | goto free_buckets; | 253 | goto free_extra_elems; |
219 | } | 254 | } |
220 | 255 | ||
221 | return &htab->map; | 256 | return &htab->map; |
222 | 257 | ||
258 | free_extra_elems: | ||
259 | free_percpu(htab->extra_elems); | ||
223 | free_buckets: | 260 | free_buckets: |
224 | kvfree(htab->buckets); | 261 | kvfree(htab->buckets); |
225 | free_htab: | 262 | free_htab: |
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) | |||
349 | if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) | 386 | if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) |
350 | free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); | 387 | free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); |
351 | kfree(l); | 388 | kfree(l); |
352 | |||
353 | } | 389 | } |
354 | 390 | ||
355 | static void htab_elem_free_rcu(struct rcu_head *head) | 391 | static void htab_elem_free_rcu(struct rcu_head *head) |
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head) | |||
370 | 406 | ||
371 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | 407 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) |
372 | { | 408 | { |
409 | if (l->state == HTAB_EXTRA_ELEM_USED) { | ||
410 | l->state = HTAB_EXTRA_ELEM_FREE; | ||
411 | return; | ||
412 | } | ||
413 | |||
373 | if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { | 414 | if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { |
374 | pcpu_freelist_push(&htab->freelist, &l->fnode); | 415 | pcpu_freelist_push(&htab->freelist, &l->fnode); |
375 | } else { | 416 | } else { |
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | |||
381 | 422 | ||
382 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | 423 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, |
383 | void *value, u32 key_size, u32 hash, | 424 | void *value, u32 key_size, u32 hash, |
384 | bool percpu, bool onallcpus) | 425 | bool percpu, bool onallcpus, |
426 | bool old_elem_exists) | ||
385 | { | 427 | { |
386 | u32 size = htab->map.value_size; | 428 | u32 size = htab->map.value_size; |
387 | bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); | 429 | bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); |
388 | struct htab_elem *l_new; | 430 | struct htab_elem *l_new; |
389 | void __percpu *pptr; | 431 | void __percpu *pptr; |
432 | int err = 0; | ||
390 | 433 | ||
391 | if (prealloc) { | 434 | if (prealloc) { |
392 | l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); | 435 | l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); |
393 | if (!l_new) | 436 | if (!l_new) |
394 | return ERR_PTR(-E2BIG); | 437 | err = -E2BIG; |
395 | } else { | 438 | } else { |
396 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { | 439 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { |
397 | atomic_dec(&htab->count); | 440 | atomic_dec(&htab->count); |
398 | return ERR_PTR(-E2BIG); | 441 | err = -E2BIG; |
442 | } else { | ||
443 | l_new = kmalloc(htab->elem_size, | ||
444 | GFP_ATOMIC | __GFP_NOWARN); | ||
445 | if (!l_new) | ||
446 | return ERR_PTR(-ENOMEM); | ||
399 | } | 447 | } |
400 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); | 448 | } |
401 | if (!l_new) | 449 | |
402 | return ERR_PTR(-ENOMEM); | 450 | if (err) { |
451 | if (!old_elem_exists) | ||
452 | return ERR_PTR(err); | ||
453 | |||
454 | /* if we're updating the existing element and the hash table | ||
455 | * is full, use per-cpu extra elems | ||
456 | */ | ||
457 | l_new = this_cpu_ptr(htab->extra_elems); | ||
458 | if (l_new->state != HTAB_EXTRA_ELEM_FREE) | ||
459 | return ERR_PTR(-E2BIG); | ||
460 | l_new->state = HTAB_EXTRA_ELEM_USED; | ||
461 | } else { | ||
462 | l_new->state = HTAB_NOT_AN_EXTRA_ELEM; | ||
403 | } | 463 | } |
404 | 464 | ||
405 | memcpy(l_new->key, key, key_size); | 465 | memcpy(l_new->key, key, key_size); |
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
489 | if (ret) | 549 | if (ret) |
490 | goto err; | 550 | goto err; |
491 | 551 | ||
492 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); | 552 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, |
553 | !!l_old); | ||
493 | if (IS_ERR(l_new)) { | 554 | if (IS_ERR(l_new)) { |
494 | /* all pre-allocated elements are in use or memory exhausted */ | 555 | /* all pre-allocated elements are in use or memory exhausted */ |
495 | ret = PTR_ERR(l_new); | 556 | ret = PTR_ERR(l_new); |
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, | |||
563 | } | 624 | } |
564 | } else { | 625 | } else { |
565 | l_new = alloc_htab_elem(htab, key, value, key_size, | 626 | l_new = alloc_htab_elem(htab, key, value, key_size, |
566 | hash, true, onallcpus); | 627 | hash, true, onallcpus, false); |
567 | if (IS_ERR(l_new)) { | 628 | if (IS_ERR(l_new)) { |
568 | ret = PTR_ERR(l_new); | 629 | ret = PTR_ERR(l_new); |
569 | goto err; | 630 | goto err; |
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map) | |||
652 | htab_free_elems(htab); | 713 | htab_free_elems(htab); |
653 | pcpu_freelist_destroy(&htab->freelist); | 714 | pcpu_freelist_destroy(&htab->freelist); |
654 | } | 715 | } |
716 | free_percpu(htab->extra_elems); | ||
655 | kvfree(htab->buckets); | 717 | kvfree(htab->buckets); |
656 | kfree(htab); | 718 | kfree(htab); |
657 | } | 719 | } |