diff options
| author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-16 22:37:55 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2008-11-16 22:37:55 -0500 |
| commit | bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 (patch) | |
| tree | f0b215577427ab12ab4ae7b3f45cd6ad3fe16499 | |
| parent | e8b2dfe9b4501ed0047459b2756ba26e5a940a69 (diff) | |
rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
- mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
- mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
Only four helpers are declared for the moment :
hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash = udp_hashfn(net, hnum);
struct udp_hslot *hslot = &udptable->hash[hash];
int score, badness;
rcu_read_lock();
begin:
result = NULL;
badness = -1;
sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
result = sk;
badness = score;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != hash)
goto begin;
if (result) {
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
daddr, dport, dif) < badness)) {
sock_put(result);
goto begin;
}
}
rcu_read_unlock();
return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/linux/list_nulls.h | 94 | ||||
| -rw-r--r-- | include/linux/rculist_nulls.h | 110 |
2 files changed, 204 insertions, 0 deletions
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h new file mode 100644 index 000000000000..93150ecf3ea4 --- /dev/null +++ b/include/linux/list_nulls.h | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | #ifndef _LINUX_LIST_NULLS_H | ||
| 2 | #define _LINUX_LIST_NULLS_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Special version of lists, where end of list is not a NULL pointer, | ||
| 6 | * but a 'nulls' marker, which can have many different values. | ||
| 7 | * (up to 2^31 different values guaranteed on all platforms) | ||
| 8 | * | ||
| 9 | * In the standard hlist, termination of a list is the NULL pointer. | ||
| 10 | * In this special 'nulls' variant, we use the fact that objects stored in | ||
| 11 | * a list are aligned on a word (4 or 8 bytes alignment). | ||
| 12 | * We therefore use the last significant bit of 'ptr' : | ||
| 13 | * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) | ||
| 14 | * Set to 0 : This is a pointer to some object (ptr) | ||
| 15 | */ | ||
| 16 | |||
| 17 | struct hlist_nulls_head { | ||
| 18 | struct hlist_nulls_node *first; | ||
| 19 | }; | ||
| 20 | |||
| 21 | struct hlist_nulls_node { | ||
| 22 | struct hlist_nulls_node *next, **pprev; | ||
| 23 | }; | ||
| 24 | #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ | ||
| 25 | ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) | ||
| 26 | |||
| 27 | #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) | ||
| 28 | /** | ||
| 29 | * ptr_is_a_nulls - Test if a ptr is a nulls | ||
| 30 | * @ptr: ptr to be tested | ||
| 31 | * | ||
| 32 | */ | ||
| 33 | static inline int is_a_nulls(const struct hlist_nulls_node *ptr) | ||
| 34 | { | ||
| 35 | return ((unsigned long)ptr & 1); | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | ||
| 39 | * get_nulls_value - Get the 'nulls' value of the end of chain | ||
| 40 | * @ptr: end of chain | ||
| 41 | * | ||
| 42 | * Should be called only if is_a_nulls(ptr); | ||
| 43 | */ | ||
| 44 | static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) | ||
| 45 | { | ||
| 46 | return ((unsigned long)ptr) >> 1; | ||
| 47 | } | ||
| 48 | |||
| 49 | static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) | ||
| 50 | { | ||
| 51 | return !h->pprev; | ||
| 52 | } | ||
| 53 | |||
| 54 | static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) | ||
| 55 | { | ||
| 56 | return is_a_nulls(h->first); | ||
| 57 | } | ||
| 58 | |||
| 59 | static inline void __hlist_nulls_del(struct hlist_nulls_node *n) | ||
| 60 | { | ||
| 61 | struct hlist_nulls_node *next = n->next; | ||
| 62 | struct hlist_nulls_node **pprev = n->pprev; | ||
| 63 | *pprev = next; | ||
| 64 | if (!is_a_nulls(next)) | ||
| 65 | next->pprev = pprev; | ||
| 66 | } | ||
| 67 | |||
| 68 | /** | ||
| 69 | * hlist_nulls_for_each_entry - iterate over list of given type | ||
| 70 | * @tpos: the type * to use as a loop cursor. | ||
| 71 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
| 72 | * @head: the head for your list. | ||
| 73 | * @member: the name of the hlist_node within the struct. | ||
| 74 | * | ||
| 75 | */ | ||
| 76 | #define hlist_nulls_for_each_entry(tpos, pos, head, member) \ | ||
| 77 | for (pos = (head)->first; \ | ||
| 78 | (!is_a_nulls(pos)) && \ | ||
| 79 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ | ||
| 80 | pos = pos->next) | ||
| 81 | |||
| 82 | /** | ||
| 83 | * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point | ||
| 84 | * @tpos: the type * to use as a loop cursor. | ||
| 85 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
| 86 | * @member: the name of the hlist_node within the struct. | ||
| 87 | * | ||
| 88 | */ | ||
| 89 | #define hlist_nulls_for_each_entry_from(tpos, pos, member) \ | ||
| 90 | for (; (!is_a_nulls(pos)) && \ | ||
| 91 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ | ||
| 92 | pos = pos->next) | ||
| 93 | |||
| 94 | #endif | ||
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h new file mode 100644 index 000000000000..f9ddd03961a8 --- /dev/null +++ b/include/linux/rculist_nulls.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | #ifndef _LINUX_RCULIST_NULLS_H | ||
| 2 | #define _LINUX_RCULIST_NULLS_H | ||
| 3 | |||
| 4 | #ifdef __KERNEL__ | ||
| 5 | |||
| 6 | /* | ||
| 7 | * RCU-protected list version | ||
| 8 | */ | ||
| 9 | #include <linux/list_nulls.h> | ||
| 10 | #include <linux/rcupdate.h> | ||
| 11 | |||
| 12 | /** | ||
| 13 | * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization | ||
| 14 | * @n: the element to delete from the hash list. | ||
| 15 | * | ||
| 16 | * Note: hlist_nulls_unhashed() on the node return true after this. It is | ||
| 17 | * useful for RCU based read lockfree traversal if the writer side | ||
| 18 | * must know if the list entry is still hashed or already unhashed. | ||
| 19 | * | ||
| 20 | * In particular, it means that we can not poison the forward pointers | ||
| 21 | * that may still be used for walking the hash list and we can only | ||
| 22 | * zero the pprev pointer so list_unhashed() will return true after | ||
| 23 | * this. | ||
| 24 | * | ||
| 25 | * The caller must take whatever precautions are necessary (such as | ||
| 26 | * holding appropriate locks) to avoid racing with another | ||
| 27 | * list-mutation primitive, such as hlist_nulls_add_head_rcu() or | ||
| 28 | * hlist_nulls_del_rcu(), running on this same list. However, it is | ||
| 29 | * perfectly legal to run concurrently with the _rcu list-traversal | ||
| 30 | * primitives, such as hlist_nulls_for_each_entry_rcu(). | ||
| 31 | */ | ||
| 32 | static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) | ||
| 33 | { | ||
| 34 | if (!hlist_nulls_unhashed(n)) { | ||
| 35 | __hlist_nulls_del(n); | ||
| 36 | n->pprev = NULL; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | /** | ||
| 41 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization | ||
| 42 | * @n: the element to delete from the hash list. | ||
| 43 | * | ||
| 44 | * Note: hlist_nulls_unhashed() on entry does not return true after this, | ||
| 45 | * the entry is in an undefined state. It is useful for RCU based | ||
| 46 | * lockfree traversal. | ||
| 47 | * | ||
| 48 | * In particular, it means that we can not poison the forward | ||
| 49 | * pointers that may still be used for walking the hash list. | ||
| 50 | * | ||
| 51 | * The caller must take whatever precautions are necessary | ||
| 52 | * (such as holding appropriate locks) to avoid racing | ||
| 53 | * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | ||
| 54 | * or hlist_nulls_del_rcu(), running on this same list. | ||
| 55 | * However, it is perfectly legal to run concurrently with | ||
| 56 | * the _rcu list-traversal primitives, such as | ||
| 57 | * hlist_nulls_for_each_entry(). | ||
| 58 | */ | ||
| 59 | static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) | ||
| 60 | { | ||
| 61 | __hlist_nulls_del(n); | ||
| 62 | n->pprev = LIST_POISON2; | ||
| 63 | } | ||
| 64 | |||
| 65 | /** | ||
| 66 | * hlist_nulls_add_head_rcu | ||
| 67 | * @n: the element to add to the hash list. | ||
| 68 | * @h: the list to add to. | ||
| 69 | * | ||
| 70 | * Description: | ||
| 71 | * Adds the specified element to the specified hlist_nulls, | ||
| 72 | * while permitting racing traversals. | ||
| 73 | * | ||
| 74 | * The caller must take whatever precautions are necessary | ||
| 75 | * (such as holding appropriate locks) to avoid racing | ||
| 76 | * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | ||
| 77 | * or hlist_nulls_del_rcu(), running on this same list. | ||
| 78 | * However, it is perfectly legal to run concurrently with | ||
| 79 | * the _rcu list-traversal primitives, such as | ||
| 80 | * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency | ||
| 81 | * problems on Alpha CPUs. Regardless of the type of CPU, the | ||
| 82 | * list-traversal primitive must be guarded by rcu_read_lock(). | ||
| 83 | */ | ||
| 84 | static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | ||
| 85 | struct hlist_nulls_head *h) | ||
| 86 | { | ||
| 87 | struct hlist_nulls_node *first = h->first; | ||
| 88 | |||
| 89 | n->next = first; | ||
| 90 | n->pprev = &h->first; | ||
| 91 | rcu_assign_pointer(h->first, n); | ||
| 92 | if (!is_a_nulls(first)) | ||
| 93 | first->pprev = &n->next; | ||
| 94 | } | ||
| 95 | /** | ||
| 96 | * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type | ||
| 97 | * @tpos: the type * to use as a loop cursor. | ||
| 98 | * @pos: the &struct hlist_nulls_node to use as a loop cursor. | ||
| 99 | * @head: the head for your list. | ||
| 100 | * @member: the name of the hlist_nulls_node within the struct. | ||
| 101 | * | ||
| 102 | */ | ||
| 103 | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | ||
| 104 | for (pos = rcu_dereference((head)->first); \ | ||
| 105 | (!is_a_nulls(pos)) && \ | ||
| 106 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ | ||
| 107 | pos = rcu_dereference(pos->next)) | ||
| 108 | |||
| 109 | #endif | ||
| 110 | #endif | ||
