aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2017-03-07 23:00:13 -0500
committerDavid S. Miller <davem@davemloft.net>2017-03-09 16:27:17 -0500
commit4fe8435909fddc97b81472026aa954e06dd192a5 (patch)
tree31f7007c13a16573229090569e6fc3ca5c496a50
parent9f691549f76d488a0c74397b3e51e943865ea01f (diff)
bpf: convert htab map to hlist_nulls
when all map elements are pre-allocated one cpu can delete and reuse htab_elem while another cpu is still walking the hlist. In such case the lookup may miss the element. Convert hlist to hlist_nulls to avoid such scenario. When bucket lock is taken there is no need to take such precautions, so only convert map_lookup and map_get_next to nulls. The race window is extremely small and only reproducible with explicit udelay() inside lookup_nulls_elem_raw() Similar to hlist add hlist_nulls_for_each_entry_safe() and hlist_nulls_entry_safe() helpers. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Reported-by: Jonathan Perry <jonperry@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/list_nulls.h5
-rw-r--r--include/linux/rculist_nulls.h14
-rw-r--r--kernel/bpf/hashtab.c94
3 files changed, 79 insertions, 34 deletions
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe1009084..87ff4f58a2f0 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
29 ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) 29 ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
30 30
31#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) 31#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
32
33#define hlist_nulls_entry_safe(ptr, type, member) \
34 ({ typeof(ptr) ____ptr = (ptr); \
35 !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
36 })
32/** 37/**
33 * ptr_is_a_nulls - Test if a ptr is a nulls 38 * ptr_is_a_nulls - Test if a ptr is a nulls
34 * @ptr: ptr to be tested 39 * @ptr: ptr to be tested
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7e8597..a23a33153180 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
156 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ 156 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
157 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) 157 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
158 158
159/**
160 * hlist_nulls_for_each_entry_safe -
161 * iterate over list of given type safe against removal of list entry
162 * @tpos: the type * to use as a loop cursor.
163 * @pos: the &struct hlist_nulls_node to use as a loop cursor.
164 * @head: the head for your list.
165 * @member: the name of the hlist_nulls_node within the struct.
166 */
167#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
168 for (({barrier();}), \
169 pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
170 (!is_a_nulls(pos)) && \
171 ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
172 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
159#endif 173#endif
160#endif 174#endif
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 63c86a7be2a1..afe5bab376c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
13#include <linux/bpf.h> 13#include <linux/bpf.h>
14#include <linux/jhash.h> 14#include <linux/jhash.h>
15#include <linux/filter.h> 15#include <linux/filter.h>
16#include <linux/rculist_nulls.h>
16#include "percpu_freelist.h" 17#include "percpu_freelist.h"
17#include "bpf_lru_list.h" 18#include "bpf_lru_list.h"
18 19
19struct bucket { 20struct bucket {
20 struct hlist_head head; 21 struct hlist_nulls_head head;
21 raw_spinlock_t lock; 22 raw_spinlock_t lock;
22}; 23};
23 24
@@ -44,7 +45,7 @@ enum extra_elem_state {
44/* each htab element is struct htab_elem + key + value */ 45/* each htab element is struct htab_elem + key + value */
45struct htab_elem { 46struct htab_elem {
46 union { 47 union {
47 struct hlist_node hash_node; 48 struct hlist_nulls_node hash_node;
48 struct { 49 struct {
49 void *padding; 50 void *padding;
50 union { 51 union {
@@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
337 goto free_htab; 338 goto free_htab;
338 339
339 for (i = 0; i < htab->n_buckets; i++) { 340 for (i = 0; i < htab->n_buckets; i++) {
340 INIT_HLIST_HEAD(&htab->buckets[i].head); 341 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
341 raw_spin_lock_init(&htab->buckets[i].lock); 342 raw_spin_lock_init(&htab->buckets[i].lock);
342 } 343 }
343 344
@@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
377 return &htab->buckets[hash & (htab->n_buckets - 1)]; 378 return &htab->buckets[hash & (htab->n_buckets - 1)];
378} 379}
379 380
380static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 381static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
381{ 382{
382 return &__select_bucket(htab, hash)->head; 383 return &__select_bucket(htab, hash)->head;
383} 384}
384 385
385static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, 386/* this lookup function can only be called with bucket lock taken */
387static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
386 void *key, u32 key_size) 388 void *key, u32 key_size)
387{ 389{
390 struct hlist_nulls_node *n;
388 struct htab_elem *l; 391 struct htab_elem *l;
389 392
390 hlist_for_each_entry_rcu(l, head, hash_node) 393 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
391 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 394 if (l->hash == hash && !memcmp(&l->key, key, key_size))
392 return l; 395 return l;
393 396
394 return NULL; 397 return NULL;
395} 398}
396 399
400/* can be called without bucket lock. it will repeat the loop in
401 * the unlikely event when elements moved from one bucket into another
402 * while link list is being walked
403 */
404static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
405 u32 hash, void *key,
406 u32 key_size, u32 n_buckets)
407{
408 struct hlist_nulls_node *n;
409 struct htab_elem *l;
410
411again:
412 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
413 if (l->hash == hash && !memcmp(&l->key, key, key_size))
414 return l;
415
416 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
417 goto again;
418
419 return NULL;
420}
421
397/* Called from syscall or from eBPF program */ 422/* Called from syscall or from eBPF program */
398static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 423static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
399{ 424{
400 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 425 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
401 struct hlist_head *head; 426 struct hlist_nulls_head *head;
402 struct htab_elem *l; 427 struct htab_elem *l;
403 u32 hash, key_size; 428 u32 hash, key_size;
404 429
@@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
411 436
412 head = select_bucket(htab, hash); 437 head = select_bucket(htab, hash);
413 438
414 l = lookup_elem_raw(head, hash, key, key_size); 439 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
415 440
416 return l; 441 return l;
417} 442}
@@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
444static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 469static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
445{ 470{
446 struct bpf_htab *htab = (struct bpf_htab *)arg; 471 struct bpf_htab *htab = (struct bpf_htab *)arg;
447 struct htab_elem *l, *tgt_l; 472 struct htab_elem *l = NULL, *tgt_l;
448 struct hlist_head *head; 473 struct hlist_nulls_head *head;
474 struct hlist_nulls_node *n;
449 unsigned long flags; 475 unsigned long flags;
450 struct bucket *b; 476 struct bucket *b;
451 477
@@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
455 481
456 raw_spin_lock_irqsave(&b->lock, flags); 482 raw_spin_lock_irqsave(&b->lock, flags);
457 483
458 hlist_for_each_entry_rcu(l, head, hash_node) 484 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
459 if (l == tgt_l) { 485 if (l == tgt_l) {
460 hlist_del_rcu(&l->hash_node); 486 hlist_nulls_del_rcu(&l->hash_node);
461 break; 487 break;
462 } 488 }
463 489
@@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
470static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 496static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
471{ 497{
472 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 498 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
473 struct hlist_head *head; 499 struct hlist_nulls_head *head;
474 struct htab_elem *l, *next_l; 500 struct htab_elem *l, *next_l;
475 u32 hash, key_size; 501 u32 hash, key_size;
476 int i; 502 int i;
@@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
484 head = select_bucket(htab, hash); 510 head = select_bucket(htab, hash);
485 511
486 /* lookup the key */ 512 /* lookup the key */
487 l = lookup_elem_raw(head, hash, key, key_size); 513 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
488 514
489 if (!l) { 515 if (!l) {
490 i = 0; 516 i = 0;
@@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
492 } 518 }
493 519
494 /* key was found, get next key in the same bucket */ 520 /* key was found, get next key in the same bucket */
495 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), 521 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
496 struct htab_elem, hash_node); 522 struct htab_elem, hash_node);
497 523
498 if (next_l) { 524 if (next_l) {
@@ -511,7 +537,7 @@ find_first_elem:
511 head = select_bucket(htab, i); 537 head = select_bucket(htab, i);
512 538
513 /* pick first element in the bucket */ 539 /* pick first element in the bucket */
514 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), 540 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
515 struct htab_elem, hash_node); 541 struct htab_elem, hash_node);
516 if (next_l) { 542 if (next_l) {
517 /* if it's not empty, just return it */ 543 /* if it's not empty, just return it */
@@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
676{ 702{
677 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 703 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
678 struct htab_elem *l_new = NULL, *l_old; 704 struct htab_elem *l_new = NULL, *l_old;
679 struct hlist_head *head; 705 struct hlist_nulls_head *head;
680 unsigned long flags; 706 unsigned long flags;
681 struct bucket *b; 707 struct bucket *b;
682 u32 key_size, hash; 708 u32 key_size, hash;
@@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
715 /* add new element to the head of the list, so that 741 /* add new element to the head of the list, so that
716 * concurrent search will find it before old elem 742 * concurrent search will find it before old elem
717 */ 743 */
718 hlist_add_head_rcu(&l_new->hash_node, head); 744 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
719 if (l_old) { 745 if (l_old) {
720 hlist_del_rcu(&l_old->hash_node); 746 hlist_nulls_del_rcu(&l_old->hash_node);
721 free_htab_elem(htab, l_old); 747 free_htab_elem(htab, l_old);
722 } 748 }
723 ret = 0; 749 ret = 0;
@@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
731{ 757{
732 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 758 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
733 struct htab_elem *l_new, *l_old = NULL; 759 struct htab_elem *l_new, *l_old = NULL;
734 struct hlist_head *head; 760 struct hlist_nulls_head *head;
735 unsigned long flags; 761 unsigned long flags;
736 struct bucket *b; 762 struct bucket *b;
737 u32 key_size, hash; 763 u32 key_size, hash;
@@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
772 /* add new element to the head of the list, so that 798 /* add new element to the head of the list, so that
773 * concurrent search will find it before old elem 799 * concurrent search will find it before old elem
774 */ 800 */
775 hlist_add_head_rcu(&l_new->hash_node, head); 801 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
776 if (l_old) { 802 if (l_old) {
777 bpf_lru_node_set_ref(&l_new->lru_node); 803 bpf_lru_node_set_ref(&l_new->lru_node);
778 hlist_del_rcu(&l_old->hash_node); 804 hlist_nulls_del_rcu(&l_old->hash_node);
779 } 805 }
780 ret = 0; 806 ret = 0;
781 807
@@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
796{ 822{
797 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 823 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
798 struct htab_elem *l_new = NULL, *l_old; 824 struct htab_elem *l_new = NULL, *l_old;
799 struct hlist_head *head; 825 struct hlist_nulls_head *head;
800 unsigned long flags; 826 unsigned long flags;
801 struct bucket *b; 827 struct bucket *b;
802 u32 key_size, hash; 828 u32 key_size, hash;
@@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
835 ret = PTR_ERR(l_new); 861 ret = PTR_ERR(l_new);
836 goto err; 862 goto err;
837 } 863 }
838 hlist_add_head_rcu(&l_new->hash_node, head); 864 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
839 } 865 }
840 ret = 0; 866 ret = 0;
841err: 867err:
@@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
849{ 875{
850 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 876 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
851 struct htab_elem *l_new = NULL, *l_old; 877 struct htab_elem *l_new = NULL, *l_old;
852 struct hlist_head *head; 878 struct hlist_nulls_head *head;
853 unsigned long flags; 879 unsigned long flags;
854 struct bucket *b; 880 struct bucket *b;
855 u32 key_size, hash; 881 u32 key_size, hash;
@@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
897 } else { 923 } else {
898 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), 924 pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
899 value, onallcpus); 925 value, onallcpus);
900 hlist_add_head_rcu(&l_new->hash_node, head); 926 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
901 l_new = NULL; 927 l_new = NULL;
902 } 928 }
903 ret = 0; 929 ret = 0;
@@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
925static int htab_map_delete_elem(struct bpf_map *map, void *key) 951static int htab_map_delete_elem(struct bpf_map *map, void *key)
926{ 952{
927 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 953 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
928 struct hlist_head *head; 954 struct hlist_nulls_head *head;
929 struct bucket *b; 955 struct bucket *b;
930 struct htab_elem *l; 956 struct htab_elem *l;
931 unsigned long flags; 957 unsigned long flags;
@@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
945 l = lookup_elem_raw(head, hash, key, key_size); 971 l = lookup_elem_raw(head, hash, key, key_size);
946 972
947 if (l) { 973 if (l) {
948 hlist_del_rcu(&l->hash_node); 974 hlist_nulls_del_rcu(&l->hash_node);
949 free_htab_elem(htab, l); 975 free_htab_elem(htab, l);
950 ret = 0; 976 ret = 0;
951 } 977 }
@@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
957static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) 983static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
958{ 984{
959 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 985 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
960 struct hlist_head *head; 986 struct hlist_nulls_head *head;
961 struct bucket *b; 987 struct bucket *b;
962 struct htab_elem *l; 988 struct htab_elem *l;
963 unsigned long flags; 989 unsigned long flags;
@@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
977 l = lookup_elem_raw(head, hash, key, key_size); 1003 l = lookup_elem_raw(head, hash, key, key_size);
978 1004
979 if (l) { 1005 if (l) {
980 hlist_del_rcu(&l->hash_node); 1006 hlist_nulls_del_rcu(&l->hash_node);
981 ret = 0; 1007 ret = 0;
982 } 1008 }
983 1009
@@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
992 int i; 1018 int i;
993 1019
994 for (i = 0; i < htab->n_buckets; i++) { 1020 for (i = 0; i < htab->n_buckets; i++) {
995 struct hlist_head *head = select_bucket(htab, i); 1021 struct hlist_nulls_head *head = select_bucket(htab, i);
996 struct hlist_node *n; 1022 struct hlist_nulls_node *n;
997 struct htab_elem *l; 1023 struct htab_elem *l;
998 1024
999 hlist_for_each_entry_safe(l, n, head, hash_node) { 1025 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1000 hlist_del_rcu(&l->hash_node); 1026 hlist_nulls_del_rcu(&l->hash_node);
1001 if (l->state != HTAB_EXTRA_ELEM_USED) 1027 if (l->state != HTAB_EXTRA_ELEM_USED)
1002 htab_elem_free(htab, l); 1028 htab_elem_free(htab, l);
1003 } 1029 }