diff options
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r-- | kernel/bpf/hashtab.c | 144 |
1 files changed, 71 insertions, 73 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index afe5bab376c9..361a69dfe543 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -30,18 +30,12 @@ struct bpf_htab { | |||
30 | struct pcpu_freelist freelist; | 30 | struct pcpu_freelist freelist; |
31 | struct bpf_lru lru; | 31 | struct bpf_lru lru; |
32 | }; | 32 | }; |
33 | void __percpu *extra_elems; | 33 | struct htab_elem *__percpu *extra_elems; |
34 | atomic_t count; /* number of elements in this hashtable */ | 34 | atomic_t count; /* number of elements in this hashtable */ |
35 | u32 n_buckets; /* number of hash buckets */ | 35 | u32 n_buckets; /* number of hash buckets */ |
36 | u32 elem_size; /* size of each element in bytes */ | 36 | u32 elem_size; /* size of each element in bytes */ |
37 | }; | 37 | }; |
38 | 38 | ||
39 | enum extra_elem_state { | ||
40 | HTAB_NOT_AN_EXTRA_ELEM = 0, | ||
41 | HTAB_EXTRA_ELEM_FREE, | ||
42 | HTAB_EXTRA_ELEM_USED | ||
43 | }; | ||
44 | |||
45 | /* each htab element is struct htab_elem + key + value */ | 39 | /* each htab element is struct htab_elem + key + value */ |
46 | struct htab_elem { | 40 | struct htab_elem { |
47 | union { | 41 | union { |
@@ -56,7 +50,6 @@ struct htab_elem { | |||
56 | }; | 50 | }; |
57 | union { | 51 | union { |
58 | struct rcu_head rcu; | 52 | struct rcu_head rcu; |
59 | enum extra_elem_state state; | ||
60 | struct bpf_lru_node lru_node; | 53 | struct bpf_lru_node lru_node; |
61 | }; | 54 | }; |
62 | u32 hash; | 55 | u32 hash; |
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab) | |||
77 | htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; | 70 | htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; |
78 | } | 71 | } |
79 | 72 | ||
73 | static bool htab_is_prealloc(const struct bpf_htab *htab) | ||
74 | { | ||
75 | return !(htab->map.map_flags & BPF_F_NO_PREALLOC); | ||
76 | } | ||
77 | |||
80 | static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, | 78 | static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, |
81 | void __percpu *pptr) | 79 | void __percpu *pptr) |
82 | { | 80 | { |
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, | |||
128 | 126 | ||
129 | static int prealloc_init(struct bpf_htab *htab) | 127 | static int prealloc_init(struct bpf_htab *htab) |
130 | { | 128 | { |
129 | u32 num_entries = htab->map.max_entries; | ||
131 | int err = -ENOMEM, i; | 130 | int err = -ENOMEM, i; |
132 | 131 | ||
133 | htab->elems = bpf_map_area_alloc(htab->elem_size * | 132 | if (!htab_is_percpu(htab) && !htab_is_lru(htab)) |
134 | htab->map.max_entries); | 133 | num_entries += num_possible_cpus(); |
134 | |||
135 | htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries); | ||
135 | if (!htab->elems) | 136 | if (!htab->elems) |
136 | return -ENOMEM; | 137 | return -ENOMEM; |
137 | 138 | ||
138 | if (!htab_is_percpu(htab)) | 139 | if (!htab_is_percpu(htab)) |
139 | goto skip_percpu_elems; | 140 | goto skip_percpu_elems; |
140 | 141 | ||
141 | for (i = 0; i < htab->map.max_entries; i++) { | 142 | for (i = 0; i < num_entries; i++) { |
142 | u32 size = round_up(htab->map.value_size, 8); | 143 | u32 size = round_up(htab->map.value_size, 8); |
143 | void __percpu *pptr; | 144 | void __percpu *pptr; |
144 | 145 | ||
@@ -166,11 +167,11 @@ skip_percpu_elems: | |||
166 | if (htab_is_lru(htab)) | 167 | if (htab_is_lru(htab)) |
167 | bpf_lru_populate(&htab->lru, htab->elems, | 168 | bpf_lru_populate(&htab->lru, htab->elems, |
168 | offsetof(struct htab_elem, lru_node), | 169 | offsetof(struct htab_elem, lru_node), |
169 | htab->elem_size, htab->map.max_entries); | 170 | htab->elem_size, num_entries); |
170 | else | 171 | else |
171 | pcpu_freelist_populate(&htab->freelist, | 172 | pcpu_freelist_populate(&htab->freelist, |
172 | htab->elems + offsetof(struct htab_elem, fnode), | 173 | htab->elems + offsetof(struct htab_elem, fnode), |
173 | htab->elem_size, htab->map.max_entries); | 174 | htab->elem_size, num_entries); |
174 | 175 | ||
175 | return 0; | 176 | return 0; |
176 | 177 | ||
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab) | |||
191 | 192 | ||
192 | static int alloc_extra_elems(struct bpf_htab *htab) | 193 | static int alloc_extra_elems(struct bpf_htab *htab) |
193 | { | 194 | { |
194 | void __percpu *pptr; | 195 | struct htab_elem *__percpu *pptr, *l_new; |
196 | struct pcpu_freelist_node *l; | ||
195 | int cpu; | 197 | int cpu; |
196 | 198 | ||
197 | pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); | 199 | pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8, |
200 | GFP_USER | __GFP_NOWARN); | ||
198 | if (!pptr) | 201 | if (!pptr) |
199 | return -ENOMEM; | 202 | return -ENOMEM; |
200 | 203 | ||
201 | for_each_possible_cpu(cpu) { | 204 | for_each_possible_cpu(cpu) { |
202 | ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = | 205 | l = pcpu_freelist_pop(&htab->freelist); |
203 | HTAB_EXTRA_ELEM_FREE; | 206 | /* pop will succeed, since prealloc_init() |
207 | * preallocated extra num_possible_cpus elements | ||
208 | */ | ||
209 | l_new = container_of(l, struct htab_elem, fnode); | ||
210 | *per_cpu_ptr(pptr, cpu) = l_new; | ||
204 | } | 211 | } |
205 | htab->extra_elems = pptr; | 212 | htab->extra_elems = pptr; |
206 | return 0; | 213 | return 0; |
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
342 | raw_spin_lock_init(&htab->buckets[i].lock); | 349 | raw_spin_lock_init(&htab->buckets[i].lock); |
343 | } | 350 | } |
344 | 351 | ||
345 | if (!percpu && !lru) { | ||
346 | /* lru itself can remove the least used element, so | ||
347 | * there is no need for an extra elem during map_update. | ||
348 | */ | ||
349 | err = alloc_extra_elems(htab); | ||
350 | if (err) | ||
351 | goto free_buckets; | ||
352 | } | ||
353 | |||
354 | if (prealloc) { | 352 | if (prealloc) { |
355 | err = prealloc_init(htab); | 353 | err = prealloc_init(htab); |
356 | if (err) | 354 | if (err) |
357 | goto free_extra_elems; | 355 | goto free_buckets; |
356 | |||
357 | if (!percpu && !lru) { | ||
358 | /* lru itself can remove the least used element, so | ||
359 | * there is no need for an extra elem during map_update. | ||
360 | */ | ||
361 | err = alloc_extra_elems(htab); | ||
362 | if (err) | ||
363 | goto free_prealloc; | ||
364 | } | ||
358 | } | 365 | } |
359 | 366 | ||
360 | return &htab->map; | 367 | return &htab->map; |
361 | 368 | ||
362 | free_extra_elems: | 369 | free_prealloc: |
363 | free_percpu(htab->extra_elems); | 370 | prealloc_destroy(htab); |
364 | free_buckets: | 371 | free_buckets: |
365 | bpf_map_area_free(htab->buckets); | 372 | bpf_map_area_free(htab->buckets); |
366 | free_htab: | 373 | free_htab: |
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head) | |||
575 | 582 | ||
576 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | 583 | static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) |
577 | { | 584 | { |
578 | if (l->state == HTAB_EXTRA_ELEM_USED) { | 585 | if (htab_is_prealloc(htab)) { |
579 | l->state = HTAB_EXTRA_ELEM_FREE; | ||
580 | return; | ||
581 | } | ||
582 | |||
583 | if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { | ||
584 | pcpu_freelist_push(&htab->freelist, &l->fnode); | 586 | pcpu_freelist_push(&htab->freelist, &l->fnode); |
585 | } else { | 587 | } else { |
586 | atomic_dec(&htab->count); | 588 | atomic_dec(&htab->count); |
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, | |||
610 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | 612 | static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, |
611 | void *value, u32 key_size, u32 hash, | 613 | void *value, u32 key_size, u32 hash, |
612 | bool percpu, bool onallcpus, | 614 | bool percpu, bool onallcpus, |
613 | bool old_elem_exists) | 615 | struct htab_elem *old_elem) |
614 | { | 616 | { |
615 | u32 size = htab->map.value_size; | 617 | u32 size = htab->map.value_size; |
616 | bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); | 618 | bool prealloc = htab_is_prealloc(htab); |
617 | struct htab_elem *l_new; | 619 | struct htab_elem *l_new, **pl_new; |
618 | void __percpu *pptr; | 620 | void __percpu *pptr; |
619 | int err = 0; | ||
620 | 621 | ||
621 | if (prealloc) { | 622 | if (prealloc) { |
622 | struct pcpu_freelist_node *l; | 623 | if (old_elem) { |
624 | /* if we're updating the existing element, | ||
625 | * use per-cpu extra elems to avoid freelist_pop/push | ||
626 | */ | ||
627 | pl_new = this_cpu_ptr(htab->extra_elems); | ||
628 | l_new = *pl_new; | ||
629 | *pl_new = old_elem; | ||
630 | } else { | ||
631 | struct pcpu_freelist_node *l; | ||
623 | 632 | ||
624 | l = pcpu_freelist_pop(&htab->freelist); | 633 | l = pcpu_freelist_pop(&htab->freelist); |
625 | if (!l) | 634 | if (!l) |
626 | err = -E2BIG; | 635 | return ERR_PTR(-E2BIG); |
627 | else | ||
628 | l_new = container_of(l, struct htab_elem, fnode); | 636 | l_new = container_of(l, struct htab_elem, fnode); |
629 | } else { | ||
630 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { | ||
631 | atomic_dec(&htab->count); | ||
632 | err = -E2BIG; | ||
633 | } else { | ||
634 | l_new = kmalloc(htab->elem_size, | ||
635 | GFP_ATOMIC | __GFP_NOWARN); | ||
636 | if (!l_new) | ||
637 | return ERR_PTR(-ENOMEM); | ||
638 | } | 637 | } |
639 | } | ||
640 | |||
641 | if (err) { | ||
642 | if (!old_elem_exists) | ||
643 | return ERR_PTR(err); | ||
644 | |||
645 | /* if we're updating the existing element and the hash table | ||
646 | * is full, use per-cpu extra elems | ||
647 | */ | ||
648 | l_new = this_cpu_ptr(htab->extra_elems); | ||
649 | if (l_new->state != HTAB_EXTRA_ELEM_FREE) | ||
650 | return ERR_PTR(-E2BIG); | ||
651 | l_new->state = HTAB_EXTRA_ELEM_USED; | ||
652 | } else { | 638 | } else { |
653 | l_new->state = HTAB_NOT_AN_EXTRA_ELEM; | 639 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) |
640 | if (!old_elem) { | ||
641 | /* when map is full and update() is replacing | ||
642 | * old element, it's ok to allocate, since | ||
643 | * old element will be freed immediately. | ||
644 | * Otherwise return an error | ||
645 | */ | ||
646 | atomic_dec(&htab->count); | ||
647 | return ERR_PTR(-E2BIG); | ||
648 | } | ||
649 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); | ||
650 | if (!l_new) | ||
651 | return ERR_PTR(-ENOMEM); | ||
654 | } | 652 | } |
655 | 653 | ||
656 | memcpy(l_new->key, key, key_size); | 654 | memcpy(l_new->key, key, key_size); |
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
731 | goto err; | 729 | goto err; |
732 | 730 | ||
733 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, | 731 | l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, |
734 | !!l_old); | 732 | l_old); |
735 | if (IS_ERR(l_new)) { | 733 | if (IS_ERR(l_new)) { |
736 | /* all pre-allocated elements are in use or memory exhausted */ | 734 | /* all pre-allocated elements are in use or memory exhausted */ |
737 | ret = PTR_ERR(l_new); | 735 | ret = PTR_ERR(l_new); |
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
744 | hlist_nulls_add_head_rcu(&l_new->hash_node, head); | 742 | hlist_nulls_add_head_rcu(&l_new->hash_node, head); |
745 | if (l_old) { | 743 | if (l_old) { |
746 | hlist_nulls_del_rcu(&l_old->hash_node); | 744 | hlist_nulls_del_rcu(&l_old->hash_node); |
747 | free_htab_elem(htab, l_old); | 745 | if (!htab_is_prealloc(htab)) |
746 | free_htab_elem(htab, l_old); | ||
748 | } | 747 | } |
749 | ret = 0; | 748 | ret = 0; |
750 | err: | 749 | err: |
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, | |||
856 | value, onallcpus); | 855 | value, onallcpus); |
857 | } else { | 856 | } else { |
858 | l_new = alloc_htab_elem(htab, key, value, key_size, | 857 | l_new = alloc_htab_elem(htab, key, value, key_size, |
859 | hash, true, onallcpus, false); | 858 | hash, true, onallcpus, NULL); |
860 | if (IS_ERR(l_new)) { | 859 | if (IS_ERR(l_new)) { |
861 | ret = PTR_ERR(l_new); | 860 | ret = PTR_ERR(l_new); |
862 | goto err; | 861 | goto err; |
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab) | |||
1024 | 1023 | ||
1025 | hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { | 1024 | hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { |
1026 | hlist_nulls_del_rcu(&l->hash_node); | 1025 | hlist_nulls_del_rcu(&l->hash_node); |
1027 | if (l->state != HTAB_EXTRA_ELEM_USED) | 1026 | htab_elem_free(htab, l); |
1028 | htab_elem_free(htab, l); | ||
1029 | } | 1027 | } |
1030 | } | 1028 | } |
1031 | } | 1029 | } |
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map) | |||
1045 | * not have executed. Wait for them. | 1043 | * not have executed. Wait for them. |
1046 | */ | 1044 | */ |
1047 | rcu_barrier(); | 1045 | rcu_barrier(); |
1048 | if (htab->map.map_flags & BPF_F_NO_PREALLOC) | 1046 | if (!htab_is_prealloc(htab)) |
1049 | delete_all_elements(htab); | 1047 | delete_all_elements(htab); |
1050 | else | 1048 | else |
1051 | prealloc_destroy(htab); | 1049 | prealloc_destroy(htab); |