summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c119
-rw-r--r--kernel/bpf/lpm_trie.c6
2 files changed, 86 insertions, 39 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ea87fb19a94..afe5bab376c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
13#include <linux/bpf.h> 13#include <linux/bpf.h>
14#include <linux/jhash.h> 14#include <linux/jhash.h>
15#include <linux/filter.h> 15#include <linux/filter.h>
16#include <linux/rculist_nulls.h>
16#include "percpu_freelist.h" 17#include "percpu_freelist.h"
17#include "bpf_lru_list.h" 18#include "bpf_lru_list.h"
18 19
19struct bucket { 20struct bucket {
20 struct hlist_head head; 21 struct hlist_nulls_head head;
21 raw_spinlock_t lock; 22 raw_spinlock_t lock;
22}; 23};
23 24
@@ -44,9 +45,14 @@ enum extra_elem_state {
44/* each htab element is struct htab_elem + key + value */ 45/* each htab element is struct htab_elem + key + value */
45struct htab_elem { 46struct htab_elem {
46 union { 47 union {
47 struct hlist_node hash_node; 48 struct hlist_nulls_node hash_node;
48 struct bpf_htab *htab; 49 struct {
49 struct pcpu_freelist_node fnode; 50 void *padding;
51 union {
52 struct bpf_htab *htab;
53 struct pcpu_freelist_node fnode;
54 };
55 };
50 }; 56 };
51 union { 57 union {
52 struct rcu_head rcu; 58 struct rcu_head rcu;
@@ -162,7 +168,8 @@ skip_percpu_elems:
162 offsetof(struct htab_elem, lru_node), 168 offsetof(struct htab_elem, lru_node),
163 htab->elem_size, htab->map.max_entries); 169 htab->elem_size, htab->map.max_entries);
164 else 170 else
165 pcpu_freelist_populate(&htab->freelist, htab->elems, 171 pcpu_freelist_populate(&htab->freelist,
172 htab->elems + offsetof(struct htab_elem, fnode),
166 htab->elem_size, htab->map.max_entries); 173 htab->elem_size, htab->map.max_entries);
167 174
168 return 0; 175 return 0;
@@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
217 int err, i; 224 int err, i;
218 u64 cost; 225 u64 cost;
219 226
227 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
228 offsetof(struct htab_elem, hash_node.pprev));
229 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
230 offsetof(struct htab_elem, hash_node.pprev));
231
220 if (lru && !capable(CAP_SYS_ADMIN)) 232 if (lru && !capable(CAP_SYS_ADMIN))
221 /* LRU implementation is much complicated than other 233 /* LRU implementation is much complicated than other
222 * maps. Hence, limit to CAP_SYS_ADMIN for now. 234 * maps. Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
326 goto free_htab; 338 goto free_htab;
327 339
328 for (i = 0; i < htab->n_buckets; i++) { 340 for (i = 0; i < htab->n_buckets; i++) {
329 INIT_HLIST_HEAD(&htab->buckets[i].head); 341 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
330 raw_spin_lock_init(&htab->buckets[i].lock); 342 raw_spin_lock_init(&htab->buckets[i].lock);
331 } 343 }
332 344
@@ -366,20 +378,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
366 return &htab->buckets[hash & (htab->n_buckets - 1)]; 378 return &htab->buckets[hash & (htab->n_buckets - 1)];
367} 379}
368 380
369static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 381static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
370{ 382{
371 return &__select_bucket(htab, hash)->head; 383 return &__select_bucket(htab, hash)->head;
372} 384}
373 385
374static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, 386/* this lookup function can only be called with bucket lock taken */
387static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
375 void *key, u32 key_size) 388 void *key, u32 key_size)
376{ 389{
390 struct hlist_nulls_node *n;
391 struct htab_elem *l;
392
393 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
394 if (l->hash == hash && !memcmp(&l->key, key, key_size))
395 return l;
396
397 return NULL;
398}
399
400/* can be called without bucket lock. it will repeat the loop in
401 * the unlikely event when elements moved from one bucket into another
402 * while link list is being walked
403 */
404static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
405 u32 hash, void *key,
406 u32 key_size, u32 n_buckets)
407{
408 struct hlist_nulls_node *n;
377 struct htab_elem *l; 409 struct htab_elem *l;
378 410
379 hlist_for_each_entry_rcu(l, head, hash_node) 411again:
412 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
380 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 413 if (l->hash == hash && !memcmp(&l->key, key, key_size))
381 return l; 414 return l;
382 415
416 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
417 goto again;
418
383 return NULL; 419 return NULL;
384} 420}
385 421
@@ -387,7 +423,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
387static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 423static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
388{ 424{
389 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 425 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
390 struct hlist_head *head; 426 struct hlist_nulls_head *head;
391 struct htab_elem *l; 427 struct htab_elem *l;
392 u32 hash, key_size; 428 u32 hash, key_size;
393 429
@@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
400 436
401 head = select_bucket(htab, hash); 437 head = select_bucket(htab, hash);
402 438
403 l = lookup_elem_raw(head, hash, key, key_size); 439 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
404 440
405 return l; 441 return l;
406} 442}
@@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
433static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 469static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
434{ 470{
435 struct bpf_htab *htab = (struct bpf_htab *)arg; 471 struct bpf_htab *htab = (struct bpf_htab *)arg;
436 struct htab_elem *l, *tgt_l; 472 struct htab_elem *l = NULL, *tgt_l;
437 struct hlist_head *head; 473 struct hlist_nulls_head *head;
474 struct hlist_nulls_node *n;
438 unsigned long flags; 475 unsigned long flags;
439 struct bucket *b; 476 struct bucket *b;
440 477
@@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
444 481
445 raw_spin_lock_irqsave(&b->lock, flags); 482 raw_spin_lock_irqsave(&b->lock, flags);
446 483
447 hlist_for_each_entry_rcu(l, head, hash_node) 484 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
448 if (l == tgt_l) { 485 if (l == tgt_l) {
449 hlist_del_rcu(&l->hash_node); 486 hlist_nulls_del_rcu(&l->hash_node);
450 break; 487 break;
451 } 488 }
452 489
@@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
459static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 496static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
460{ 497{
461 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 498 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
462 struct hlist_head *head; 499 struct hlist_nulls_head *head;
463 struct htab_elem *l, *next_l; 500 struct htab_elem *l, *next_l;
464 u32 hash, key_size; 501 u32 hash, key_size;
465 int i; 502 int i;
@@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
473 head = select_bucket(htab, hash); 510 head = select_bucket(htab, hash);
474 511
475 /* lookup the key */ 512 /* lookup the key */
476 l = lookup_elem_raw(head, hash, key, key_size); 513 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
477 514
478 if (!l) { 515 if (!l) {
479 i = 0; 516 i = 0;
@@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
481 } 518 }
482 519
483 /* key was found, get next key in the same bucket */ 520 /* key was found, get next key in the same bucket */
484 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), 521 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
485 struct htab_elem, hash_node); 522 struct htab_elem, hash_node);
486 523
487 if (next_l) { 524 if (next_l) {
@@ -500,7 +537,7 @@ find_first_elem:
500 head = select_bucket(htab, i); 537 head = select_bucket(htab, i);
501 538
502 /* pick first element in the bucket */ 539 /* pick first element in the bucket */
503 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 540 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
504 struct htab_elem, hash_node); 541 struct htab_elem, hash_node);
505 if (next_l) { 542 if (next_l) {
506 /* if it's not empty, just return it */ 543 /* if it's not empty, just return it */
@@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
582 int err = 0; 619 int err = 0;
583 620
584 if (prealloc) { 621 if (prealloc) {
585 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); 622 struct pcpu_freelist_node *l;
586 if (!l_new) 623
624 l = pcpu_freelist_pop(&htab->freelist);
625 if (!l)
587 err = -E2BIG; 626 err = -E2BIG;
627 else
628 l_new = container_of(l, struct htab_elem, fnode);
588 } else { 629 } else {
589 if (atomic_inc_return(&htab->count) > htab->map.max_entries) { 630 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
590 atomic_dec(&htab->count); 631 atomic_dec(&htab->count);
@@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
661{ 702{
662 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 703 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
663 struct htab_elem *l_new = NULL, *l_old; 704 struct htab_elem *l_new = NULL, *l_old;
664 struct hlist_head *head; 705 struct hlist_nulls_head *head;
665 unsigned long flags; 706 unsigned long flags;
666 struct bucket *b; 707 struct bucket *b;
667 u32 key_size, hash; 708 u32 key_size, hash;
@@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
700 /* add new element to the head of the list, so that 741 /* add new element to the head of the list, so that
701 * concurrent search will find it before old elem 742 * concurrent search will find it before old elem
702 */ 743 */
703 hlist_add_head_rcu(&l_new->hash_node, head); 744 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
704 if (l_old) { 745 if (l_old) {
705 hlist_del_rcu(&l_old->hash_node); 746 hlist_nulls_del_rcu(&l_old->hash_node);
706 free_htab_elem(htab, l_old); 747 free_htab_elem(htab, l_old);
707 } 748 }
708 ret = 0; 749 ret = 0;
@@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
716{ 757{
717 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 758 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
718 struct htab_elem *l_new, *l_old = NULL; 759 struct htab_elem *l_new, *l_old = NULL;
719 struct hlist_head *head; 760 struct hlist_nulls_head *head;
720 unsigned long flags; 761 unsigned long flags;
721 struct bucket *b; 762 struct bucket *b;
722 u32 key_size, hash; 763 u32 key_size, hash;
@@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
757 /* add new element to the head of the list, so that 798 /* add new element to the head of the list, so that
758 * concurrent search will find it before old elem 799 * concurrent search will find it before old elem
759 */ 800 */
760 hlist_add_head_rcu(&l_new->hash_node, head); 801 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
761 if (l_old) { 802 if (l_old) {
762 bpf_lru_node_set_ref(&l_new->lru_node); 803 bpf_lru_node_set_ref(&l_new->lru_node);
763 hlist_del_rcu(&l_old->hash_node); 804 hlist_nulls_del_rcu(&l_old->hash_node);
764 } 805 }
765 ret = 0; 806 ret = 0;
766 807
@@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
781{ 822{
782 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 823 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
783 struct htab_elem *l_new = NULL, *l_old; 824 struct htab_elem *l_new = NULL, *l_old;
784 struct hlist_head *head; 825 struct hlist_nulls_head *head;
785 unsigned long flags; 826 unsigned long flags;
786 struct bucket *b; 827 struct bucket *b;
787 u32 key_size, hash; 828 u32 key_size, hash;
@@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
820 ret = PTR_ERR(l_new); 861 ret = PTR_ERR(l_new);
821 goto err; 862 goto err;
822 } 863 }
823 hlist_add_head_rcu(&l_new->hash_node, head); 864 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
824 } 865 }
825 ret = 0; 866 ret = 0;
826err: 867err:
@@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
834{ 875{
835 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 876 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
836 struct htab_elem *l_new = NULL, *l_old; 877 struct htab_elem *l_new = NULL, *l_old;
837 struct hlist_head *head; 878 struct hlist_nulls_head *head;
838 unsigned long flags; 879 unsigned long flags;
839 struct bucket *b; 880 struct bucket *b;
840 u32 key_size, hash; 881 u32 key_size, hash;
@@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
882 } else { 923 } else {
883 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), 924 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
884 value, onallcpus); 925 value, onallcpus);
885 hlist_add_head_rcu(&l_new->hash_node, head); 926 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
886 l_new = NULL; 927 l_new = NULL;
887 } 928 }
888 ret = 0; 929 ret = 0;
@@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
910static int htab_map_delete_elem(struct bpf_map *map, void *key) 951static int htab_map_delete_elem(struct bpf_map *map, void *key)
911{ 952{
912 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 953 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
913 struct hlist_head *head; 954 struct hlist_nulls_head *head;
914 struct bucket *b; 955 struct bucket *b;
915 struct htab_elem *l; 956 struct htab_elem *l;
916 unsigned long flags; 957 unsigned long flags;
@@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
930 l = lookup_elem_raw(head, hash, key, key_size); 971 l = lookup_elem_raw(head, hash, key, key_size);
931 972
932 if (l) { 973 if (l) {
933 hlist_del_rcu(&l->hash_node); 974 hlist_nulls_del_rcu(&l->hash_node);
934 free_htab_elem(htab, l); 975 free_htab_elem(htab, l);
935 ret = 0; 976 ret = 0;
936 } 977 }
@@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
942static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) 983static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
943{ 984{
944 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 985 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
945 struct hlist_head *head; 986 struct hlist_nulls_head *head;
946 struct bucket *b; 987 struct bucket *b;
947 struct htab_elem *l; 988 struct htab_elem *l;
948 unsigned long flags; 989 unsigned long flags;
@@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
962 l = lookup_elem_raw(head, hash, key, key_size); 1003 l = lookup_elem_raw(head, hash, key, key_size);
963 1004
964 if (l) { 1005 if (l) {
965 hlist_del_rcu(&l->hash_node); 1006 hlist_nulls_del_rcu(&l->hash_node);
966 ret = 0; 1007 ret = 0;
967 } 1008 }
968 1009
@@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
977 int i; 1018 int i;
978 1019
979 for (i = 0; i < htab->n_buckets; i++) { 1020 for (i = 0; i < htab->n_buckets; i++) {
980 struct hlist_head *head = select_bucket(htab, i); 1021 struct hlist_nulls_head *head = select_bucket(htab, i);
981 struct hlist_node *n; 1022 struct hlist_nulls_node *n;
982 struct htab_elem *l; 1023 struct htab_elem *l;
983 1024
984 hlist_for_each_entry_safe(l, n, head, hash_node) { 1025 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
985 hlist_del_rcu(&l->hash_node); 1026 hlist_nulls_del_rcu(&l->hash_node);
986 if (l->state != HTAB_EXTRA_ELEM_USED) 1027 if (l->state != HTAB_EXTRA_ELEM_USED)
987 htab_elem_free(htab, l); 1028 htab_elem_free(htab, l);
988 } 1029 }
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8bfe0afaee10..b37bd9ab7f57 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ unlock:
500 raw_spin_unlock(&trie->lock); 500 raw_spin_unlock(&trie->lock);
501} 501}
502 502
503static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
504{
505 return -ENOTSUPP;
506}
507
503static const struct bpf_map_ops trie_ops = { 508static const struct bpf_map_ops trie_ops = {
504 .map_alloc = trie_alloc, 509 .map_alloc = trie_alloc,
505 .map_free = trie_free, 510 .map_free = trie_free,
511 .map_get_next_key = trie_get_next_key,
506 .map_lookup_elem = trie_lookup_elem, 512 .map_lookup_elem = trie_lookup_elem,
507 .map_update_elem = trie_update_elem, 513 .map_update_elem = trie_update_elem,
508 .map_delete_elem = trie_delete_elem, 514 .map_delete_elem = trie_delete_elem,