aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-10-29 14:19:58 -0400
committerDavid S. Miller <davem@davemloft.net>2008-10-29 14:19:58 -0400
commit96631ed16c514cf8b28fab991a076985ce378c26 (patch)
tree60023b60c6eaf2acdc8fcad258585e4425ebaf91
parentf52b5054ec108aaa9e903850d6b62af8ae3fe6ae (diff)
udp: introduce sk_for_each_rcu_safenext()
Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/rculist.h17
-rw-r--r--include/net/sock.h4
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/udp.c4
4 files changed, 23 insertions, 6 deletions
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3f2c97..3ba2998b22ba 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
383 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ 383 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
384 pos = rcu_dereference(pos->next)) 384 pos = rcu_dereference(pos->next))
385 385
386/**
387 * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type
388 * @tpos: the type * to use as a loop cursor.
389 * @pos: the &struct hlist_node to use as a loop cursor.
390 * @head: the head for your list.
391 * @member: the name of the hlist_node within the struct.
392 * @next: the &struct hlist_node to use as a next cursor
393 *
394 * Special version of hlist_for_each_entry_rcu that make sure
395 * each next pointer is fetched before each iteration.
396 */
397#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \
398 for (pos = rcu_dereference((head)->first); \
399 pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \
400 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
401 pos = rcu_dereference(next))
402
386#endif /* __KERNEL__ */ 403#endif /* __KERNEL__ */
387#endif 404#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 0bea25db5471..a4f6d3fc0470 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -419,8 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
419 419
420#define sk_for_each(__sk, node, list) \ 420#define sk_for_each(__sk, node, list) \
421 hlist_for_each_entry(__sk, node, list, sk_node) 421 hlist_for_each_entry(__sk, node, list, sk_node)
422#define sk_for_each_rcu(__sk, node, list) \ 422#define sk_for_each_rcu_safenext(__sk, node, list, next) \
423 hlist_for_each_entry_rcu(__sk, node, list, sk_node) 423 hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next)
424#define sk_for_each_from(__sk, node) \ 424#define sk_for_each_from(__sk, node) \
425 if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ 425 if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
426 hlist_for_each_entry_from(__sk, node, sk_node) 426 hlist_for_each_entry_from(__sk, node, sk_node)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ced820318f94..c3ecec8a9e1c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -256,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
256 int dif, struct udp_table *udptable) 256 int dif, struct udp_table *udptable)
257{ 257{
258 struct sock *sk, *result; 258 struct sock *sk, *result;
259 struct hlist_node *node; 259 struct hlist_node *node, *next;
260 unsigned short hnum = ntohs(dport); 260 unsigned short hnum = ntohs(dport);
261 unsigned int hash = udp_hashfn(net, hnum); 261 unsigned int hash = udp_hashfn(net, hnum);
262 struct udp_hslot *hslot = &udptable->hash[hash]; 262 struct udp_hslot *hslot = &udptable->hash[hash];
@@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
266begin: 266begin:
267 result = NULL; 267 result = NULL;
268 badness = -1; 268 badness = -1;
269 sk_for_each_rcu(sk, node, &hslot->head) { 269 sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
270 /* 270 /*
271 * lockless reader, and SLAB_DESTROY_BY_RCU items: 271 * lockless reader, and SLAB_DESTROY_BY_RCU items:
272 * We must check this item was not moved to another chain 272 * We must check this item was not moved to another chain
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1d9790e43dfc..32d914db6c4f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
98 int dif, struct udp_table *udptable) 98 int dif, struct udp_table *udptable)
99{ 99{
100 struct sock *sk, *result; 100 struct sock *sk, *result;
101 struct hlist_node *node; 101 struct hlist_node *node, *next;
102 unsigned short hnum = ntohs(dport); 102 unsigned short hnum = ntohs(dport);
103 unsigned int hash = udp_hashfn(net, hnum); 103 unsigned int hash = udp_hashfn(net, hnum);
104 struct udp_hslot *hslot = &udptable->hash[hash]; 104 struct udp_hslot *hslot = &udptable->hash[hash];
@@ -108,7 +108,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
108begin: 108begin:
109 result = NULL; 109 result = NULL;
110 badness = -1; 110 badness = -1;
111 sk_for_each_rcu(sk, node, &hslot->head) { 111 sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
112 /* 112 /*
113 * lockless reader, and SLAB_DESTROY_BY_RCU items: 113 * lockless reader, and SLAB_DESTROY_BY_RCU items:
114 * We must check this item was not moved to another chain 114 * We must check this item was not moved to another chain