aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/hashtab.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r--kernel/bpf/hashtab.c84
1 files changed, 73 insertions, 11 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650d52fc..570eeca7bdfa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
26 struct bucket *buckets; 26 struct bucket *buckets;
27 void *elems; 27 void *elems;
28 struct pcpu_freelist freelist; 28 struct pcpu_freelist freelist;
29 void __percpu *extra_elems;
29 atomic_t count; /* number of elements in this hashtable */ 30 atomic_t count; /* number of elements in this hashtable */
30 u32 n_buckets; /* number of hash buckets */ 31 u32 n_buckets; /* number of hash buckets */
31 u32 elem_size; /* size of each element in bytes */ 32 u32 elem_size; /* size of each element in bytes */
32}; 33};
33 34
35enum extra_elem_state {
36 HTAB_NOT_AN_EXTRA_ELEM = 0,
37 HTAB_EXTRA_ELEM_FREE,
38 HTAB_EXTRA_ELEM_USED
39};
40
34/* each htab element is struct htab_elem + key + value */ 41/* each htab element is struct htab_elem + key + value */
35struct htab_elem { 42struct htab_elem {
36 union { 43 union {
@@ -38,7 +45,10 @@ struct htab_elem {
38 struct bpf_htab *htab; 45 struct bpf_htab *htab;
39 struct pcpu_freelist_node fnode; 46 struct pcpu_freelist_node fnode;
40 }; 47 };
41 struct rcu_head rcu; 48 union {
49 struct rcu_head rcu;
50 enum extra_elem_state state;
51 };
42 u32 hash; 52 u32 hash;
43 char key[0] __aligned(8); 53 char key[0] __aligned(8);
44}; 54};
@@ -113,6 +123,23 @@ free_elems:
113 return err; 123 return err;
114} 124}
115 125
126static int alloc_extra_elems(struct bpf_htab *htab)
127{
128 void __percpu *pptr;
129 int cpu;
130
131 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
132 if (!pptr)
133 return -ENOMEM;
134
135 for_each_possible_cpu(cpu) {
136 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
137 HTAB_EXTRA_ELEM_FREE;
138 }
139 htab->extra_elems = pptr;
140 return 0;
141}
142
116/* Called from syscall */ 143/* Called from syscall */
117static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 144static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
118{ 145{
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
185 if (percpu) 212 if (percpu)
186 cost += (u64) round_up(htab->map.value_size, 8) * 213 cost += (u64) round_up(htab->map.value_size, 8) *
187 num_possible_cpus() * htab->map.max_entries; 214 num_possible_cpus() * htab->map.max_entries;
215 else
216 cost += (u64) htab->elem_size * num_possible_cpus();
188 217
189 if (cost >= U32_MAX - PAGE_SIZE) 218 if (cost >= U32_MAX - PAGE_SIZE)
190 /* make sure page count doesn't overflow */ 219 /* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
212 raw_spin_lock_init(&htab->buckets[i].lock); 241 raw_spin_lock_init(&htab->buckets[i].lock);
213 } 242 }
214 243
244 if (!percpu) {
245 err = alloc_extra_elems(htab);
246 if (err)
247 goto free_buckets;
248 }
249
215 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { 250 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
216 err = prealloc_elems_and_freelist(htab); 251 err = prealloc_elems_and_freelist(htab);
217 if (err) 252 if (err)
218 goto free_buckets; 253 goto free_extra_elems;
219 } 254 }
220 255
221 return &htab->map; 256 return &htab->map;
222 257
258free_extra_elems:
259 free_percpu(htab->extra_elems);
223free_buckets: 260free_buckets:
224 kvfree(htab->buckets); 261 kvfree(htab->buckets);
225free_htab: 262free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
349 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 386 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
350 free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); 387 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
351 kfree(l); 388 kfree(l);
352
353} 389}
354 390
355static void htab_elem_free_rcu(struct rcu_head *head) 391static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
370 406
371static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 407static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
372{ 408{
409 if (l->state == HTAB_EXTRA_ELEM_USED) {
410 l->state = HTAB_EXTRA_ELEM_FREE;
411 return;
412 }
413
373 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { 414 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
374 pcpu_freelist_push(&htab->freelist, &l->fnode); 415 pcpu_freelist_push(&htab->freelist, &l->fnode);
375 } else { 416 } else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
381 422
382static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 423static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
383 void *value, u32 key_size, u32 hash, 424 void *value, u32 key_size, u32 hash,
384 bool percpu, bool onallcpus) 425 bool percpu, bool onallcpus,
426 bool old_elem_exists)
385{ 427{
386 u32 size = htab->map.value_size; 428 u32 size = htab->map.value_size;
387 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); 429 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
388 struct htab_elem *l_new; 430 struct htab_elem *l_new;
389 void __percpu *pptr; 431 void __percpu *pptr;
432 int err = 0;
390 433
391 if (prealloc) { 434 if (prealloc) {
392 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); 435 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
393 if (!l_new) 436 if (!l_new)
394 return ERR_PTR(-E2BIG); 437 err = -E2BIG;
395 } else { 438 } else {
396 if (atomic_inc_return(&htab->count) > htab->map.max_entries) { 439 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
397 atomic_dec(&htab->count); 440 atomic_dec(&htab->count);
398 return ERR_PTR(-E2BIG); 441 err = -E2BIG;
442 } else {
443 l_new = kmalloc(htab->elem_size,
444 GFP_ATOMIC | __GFP_NOWARN);
445 if (!l_new)
446 return ERR_PTR(-ENOMEM);
399 } 447 }
400 l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); 448 }
401 if (!l_new) 449
402 return ERR_PTR(-ENOMEM); 450 if (err) {
451 if (!old_elem_exists)
452 return ERR_PTR(err);
453
454 /* if we're updating the existing element and the hash table
455 * is full, use per-cpu extra elems
456 */
457 l_new = this_cpu_ptr(htab->extra_elems);
458 if (l_new->state != HTAB_EXTRA_ELEM_FREE)
459 return ERR_PTR(-E2BIG);
460 l_new->state = HTAB_EXTRA_ELEM_USED;
461 } else {
462 l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
403 } 463 }
404 464
405 memcpy(l_new->key, key, key_size); 465 memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
489 if (ret) 549 if (ret)
490 goto err; 550 goto err;
491 551
492 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); 552 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
553 !!l_old);
493 if (IS_ERR(l_new)) { 554 if (IS_ERR(l_new)) {
494 /* all pre-allocated elements are in use or memory exhausted */ 555 /* all pre-allocated elements are in use or memory exhausted */
495 ret = PTR_ERR(l_new); 556 ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
563 } 624 }
564 } else { 625 } else {
565 l_new = alloc_htab_elem(htab, key, value, key_size, 626 l_new = alloc_htab_elem(htab, key, value, key_size,
566 hash, true, onallcpus); 627 hash, true, onallcpus, false);
567 if (IS_ERR(l_new)) { 628 if (IS_ERR(l_new)) {
568 ret = PTR_ERR(l_new); 629 ret = PTR_ERR(l_new);
569 goto err; 630 goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
652 htab_free_elems(htab); 713 htab_free_elems(htab);
653 pcpu_freelist_destroy(&htab->freelist); 714 pcpu_freelist_destroy(&htab->freelist);
654 } 715 }
716 free_percpu(htab->extra_elems);
655 kvfree(htab->buckets); 717 kvfree(htab->buckets);
656 kfree(htab); 718 kfree(htab);
657} 719}