aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/hashtab.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r--kernel/bpf/hashtab.c144
1 files changed, 71 insertions, 73 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c9..361a69dfe543 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
30 struct pcpu_freelist freelist; 30 struct pcpu_freelist freelist;
31 struct bpf_lru lru; 31 struct bpf_lru lru;
32 }; 32 };
33 void __percpu *extra_elems; 33 struct htab_elem *__percpu *extra_elems;
34 atomic_t count; /* number of elements in this hashtable */ 34 atomic_t count; /* number of elements in this hashtable */
35 u32 n_buckets; /* number of hash buckets */ 35 u32 n_buckets; /* number of hash buckets */
36 u32 elem_size; /* size of each element in bytes */ 36 u32 elem_size; /* size of each element in bytes */
37}; 37};
38 38
39enum extra_elem_state {
40 HTAB_NOT_AN_EXTRA_ELEM = 0,
41 HTAB_EXTRA_ELEM_FREE,
42 HTAB_EXTRA_ELEM_USED
43};
44
45/* each htab element is struct htab_elem + key + value */ 39/* each htab element is struct htab_elem + key + value */
46struct htab_elem { 40struct htab_elem {
47 union { 41 union {
@@ -56,7 +50,6 @@ struct htab_elem {
56 }; 50 };
57 union { 51 union {
58 struct rcu_head rcu; 52 struct rcu_head rcu;
59 enum extra_elem_state state;
60 struct bpf_lru_node lru_node; 53 struct bpf_lru_node lru_node;
61 }; 54 };
62 u32 hash; 55 u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
77 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 70 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
78} 71}
79 72
73static bool htab_is_prealloc(const struct bpf_htab *htab)
74{
75 return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
76}
77
80static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 78static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
81 void __percpu *pptr) 79 void __percpu *pptr)
82{ 80{
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
128 126
129static int prealloc_init(struct bpf_htab *htab) 127static int prealloc_init(struct bpf_htab *htab)
130{ 128{
129 u32 num_entries = htab->map.max_entries;
131 int err = -ENOMEM, i; 130 int err = -ENOMEM, i;
132 131
133 htab->elems = bpf_map_area_alloc(htab->elem_size * 132 if (!htab_is_percpu(htab) && !htab_is_lru(htab))
134 htab->map.max_entries); 133 num_entries += num_possible_cpus();
134
135 htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
135 if (!htab->elems) 136 if (!htab->elems)
136 return -ENOMEM; 137 return -ENOMEM;
137 138
138 if (!htab_is_percpu(htab)) 139 if (!htab_is_percpu(htab))
139 goto skip_percpu_elems; 140 goto skip_percpu_elems;
140 141
141 for (i = 0; i < htab->map.max_entries; i++) { 142 for (i = 0; i < num_entries; i++) {
142 u32 size = round_up(htab->map.value_size, 8); 143 u32 size = round_up(htab->map.value_size, 8);
143 void __percpu *pptr; 144 void __percpu *pptr;
144 145
@@ -166,11 +167,11 @@ skip_percpu_elems:
166 if (htab_is_lru(htab)) 167 if (htab_is_lru(htab))
167 bpf_lru_populate(&htab->lru, htab->elems, 168 bpf_lru_populate(&htab->lru, htab->elems,
168 offsetof(struct htab_elem, lru_node), 169 offsetof(struct htab_elem, lru_node),
169 htab->elem_size, htab->map.max_entries); 170 htab->elem_size, num_entries);
170 else 171 else
171 pcpu_freelist_populate(&htab->freelist, 172 pcpu_freelist_populate(&htab->freelist,
172 htab->elems + offsetof(struct htab_elem, fnode), 173 htab->elems + offsetof(struct htab_elem, fnode),
173 htab->elem_size, htab->map.max_entries); 174 htab->elem_size, num_entries);
174 175
175 return 0; 176 return 0;
176 177
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
191 192
192static int alloc_extra_elems(struct bpf_htab *htab) 193static int alloc_extra_elems(struct bpf_htab *htab)
193{ 194{
194 void __percpu *pptr; 195 struct htab_elem *__percpu *pptr, *l_new;
196 struct pcpu_freelist_node *l;
195 int cpu; 197 int cpu;
196 198
197 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); 199 pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
200 GFP_USER | __GFP_NOWARN);
198 if (!pptr) 201 if (!pptr)
199 return -ENOMEM; 202 return -ENOMEM;
200 203
201 for_each_possible_cpu(cpu) { 204 for_each_possible_cpu(cpu) {
202 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = 205 l = pcpu_freelist_pop(&htab->freelist);
203 HTAB_EXTRA_ELEM_FREE; 206 /* pop will succeed, since prealloc_init()
207 * preallocated extra num_possible_cpus elements
208 */
209 l_new = container_of(l, struct htab_elem, fnode);
210 *per_cpu_ptr(pptr, cpu) = l_new;
204 } 211 }
205 htab->extra_elems = pptr; 212 htab->extra_elems = pptr;
206 return 0; 213 return 0;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
342 raw_spin_lock_init(&htab->buckets[i].lock); 349 raw_spin_lock_init(&htab->buckets[i].lock);
343 } 350 }
344 351
345 if (!percpu && !lru) {
346 /* lru itself can remove the least used element, so
347 * there is no need for an extra elem during map_update.
348 */
349 err = alloc_extra_elems(htab);
350 if (err)
351 goto free_buckets;
352 }
353
354 if (prealloc) { 352 if (prealloc) {
355 err = prealloc_init(htab); 353 err = prealloc_init(htab);
356 if (err) 354 if (err)
357 goto free_extra_elems; 355 goto free_buckets;
356
357 if (!percpu && !lru) {
358 /* lru itself can remove the least used element, so
359 * there is no need for an extra elem during map_update.
360 */
361 err = alloc_extra_elems(htab);
362 if (err)
363 goto free_prealloc;
364 }
358 } 365 }
359 366
360 return &htab->map; 367 return &htab->map;
361 368
362free_extra_elems: 369free_prealloc:
363 free_percpu(htab->extra_elems); 370 prealloc_destroy(htab);
364free_buckets: 371free_buckets:
365 bpf_map_area_free(htab->buckets); 372 bpf_map_area_free(htab->buckets);
366free_htab: 373free_htab:
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
575 582
576static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 583static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
577{ 584{
578 if (l->state == HTAB_EXTRA_ELEM_USED) { 585 if (htab_is_prealloc(htab)) {
579 l->state = HTAB_EXTRA_ELEM_FREE;
580 return;
581 }
582
583 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
584 pcpu_freelist_push(&htab->freelist, &l->fnode); 586 pcpu_freelist_push(&htab->freelist, &l->fnode);
585 } else { 587 } else {
586 atomic_dec(&htab->count); 588 atomic_dec(&htab->count);
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
610static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 612static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
611 void *value, u32 key_size, u32 hash, 613 void *value, u32 key_size, u32 hash,
612 bool percpu, bool onallcpus, 614 bool percpu, bool onallcpus,
613 bool old_elem_exists) 615 struct htab_elem *old_elem)
614{ 616{
615 u32 size = htab->map.value_size; 617 u32 size = htab->map.value_size;
616 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); 618 bool prealloc = htab_is_prealloc(htab);
617 struct htab_elem *l_new; 619 struct htab_elem *l_new, **pl_new;
618 void __percpu *pptr; 620 void __percpu *pptr;
619 int err = 0;
620 621
621 if (prealloc) { 622 if (prealloc) {
622 struct pcpu_freelist_node *l; 623 if (old_elem) {
624 /* if we're updating the existing element,
625 * use per-cpu extra elems to avoid freelist_pop/push
626 */
627 pl_new = this_cpu_ptr(htab->extra_elems);
628 l_new = *pl_new;
629 *pl_new = old_elem;
630 } else {
631 struct pcpu_freelist_node *l;
623 632
624 l = pcpu_freelist_pop(&htab->freelist); 633 l = pcpu_freelist_pop(&htab->freelist);
625 if (!l) 634 if (!l)
626 err = -E2BIG; 635 return ERR_PTR(-E2BIG);
627 else
628 l_new = container_of(l, struct htab_elem, fnode); 636 l_new = container_of(l, struct htab_elem, fnode);
629 } else {
630 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
631 atomic_dec(&htab->count);
632 err = -E2BIG;
633 } else {
634 l_new = kmalloc(htab->elem_size,
635 GFP_ATOMIC | __GFP_NOWARN);
636 if (!l_new)
637 return ERR_PTR(-ENOMEM);
638 } 637 }
639 }
640
641 if (err) {
642 if (!old_elem_exists)
643 return ERR_PTR(err);
644
645 /* if we're updating the existing element and the hash table
646 * is full, use per-cpu extra elems
647 */
648 l_new = this_cpu_ptr(htab->extra_elems);
649 if (l_new->state != HTAB_EXTRA_ELEM_FREE)
650 return ERR_PTR(-E2BIG);
651 l_new->state = HTAB_EXTRA_ELEM_USED;
652 } else { 638 } else {
653 l_new->state = HTAB_NOT_AN_EXTRA_ELEM; 639 if (atomic_inc_return(&htab->count) > htab->map.max_entries)
640 if (!old_elem) {
641 /* when map is full and update() is replacing
642 * old element, it's ok to allocate, since
643 * old element will be freed immediately.
644 * Otherwise return an error
645 */
646 atomic_dec(&htab->count);
647 return ERR_PTR(-E2BIG);
648 }
649 l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
650 if (!l_new)
651 return ERR_PTR(-ENOMEM);
654 } 652 }
655 653
656 memcpy(l_new->key, key, key_size); 654 memcpy(l_new->key, key, key_size);
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
731 goto err; 729 goto err;
732 730
733 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 731 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
734 !!l_old); 732 l_old);
735 if (IS_ERR(l_new)) { 733 if (IS_ERR(l_new)) {
736 /* all pre-allocated elements are in use or memory exhausted */ 734 /* all pre-allocated elements are in use or memory exhausted */
737 ret = PTR_ERR(l_new); 735 ret = PTR_ERR(l_new);
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
744 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 742 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
745 if (l_old) { 743 if (l_old) {
746 hlist_nulls_del_rcu(&l_old->hash_node); 744 hlist_nulls_del_rcu(&l_old->hash_node);
747 free_htab_elem(htab, l_old); 745 if (!htab_is_prealloc(htab))
746 free_htab_elem(htab, l_old);
748 } 747 }
749 ret = 0; 748 ret = 0;
750err: 749err:
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
856 value, onallcpus); 855 value, onallcpus);
857 } else { 856 } else {
858 l_new = alloc_htab_elem(htab, key, value, key_size, 857 l_new = alloc_htab_elem(htab, key, value, key_size,
859 hash, true, onallcpus, false); 858 hash, true, onallcpus, NULL);
860 if (IS_ERR(l_new)) { 859 if (IS_ERR(l_new)) {
861 ret = PTR_ERR(l_new); 860 ret = PTR_ERR(l_new);
862 goto err; 861 goto err;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
1024 1023
1025 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1024 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1026 hlist_nulls_del_rcu(&l->hash_node); 1025 hlist_nulls_del_rcu(&l->hash_node);
1027 if (l->state != HTAB_EXTRA_ELEM_USED) 1026 htab_elem_free(htab, l);
1028 htab_elem_free(htab, l);
1029 } 1027 }
1030 } 1028 }
1031} 1029}
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
1045 * not have executed. Wait for them. 1043 * not have executed. Wait for them.
1046 */ 1044 */
1047 rcu_barrier(); 1045 rcu_barrier();
1048 if (htab->map.map_flags & BPF_F_NO_PREALLOC) 1046 if (!htab_is_prealloc(htab))
1049 delete_all_elements(htab); 1047 delete_all_elements(htab);
1050 else 1048 else
1051 prealloc_destroy(htab); 1049 prealloc_destroy(htab);