diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-16 22:39:21 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-16 22:39:21 -0500 |
commit | 88ab1932eac721c6e7336708558fa5ed02c85c80 (patch) | |
tree | c8788a1e3de08100bca341fa4180adfe5d02880f /net | |
parent | bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 (diff) |
udp: Use hlist_nulls in UDP RCU code
This is a straightforward patch, using hlist_nulls infrastructure.
RCUification already done on UDP two weeks ago.
Using hlist_nulls permits us to avoid some memory barriers, both
at lookup time and delete time.
Patch is large because it adds new macros to include/net/sock.h.
These macros will be used by TCP & DCCP in next patch.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/udp.c | 47 | ||||
-rw-r--r-- | net/ipv6/udp.c | 26 |
2 files changed, 36 insertions, 37 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 54badc9a019d..fea2d873dd41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -127,9 +127,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
127 | const struct sock *sk2)) | 127 | const struct sock *sk2)) |
128 | { | 128 | { |
129 | struct sock *sk2; | 129 | struct sock *sk2; |
130 | struct hlist_node *node; | 130 | struct hlist_nulls_node *node; |
131 | 131 | ||
132 | sk_for_each(sk2, node, &hslot->head) | 132 | sk_nulls_for_each(sk2, node, &hslot->head) |
133 | if (net_eq(sock_net(sk2), net) && | 133 | if (net_eq(sock_net(sk2), net) && |
134 | sk2 != sk && | 134 | sk2 != sk && |
135 | sk2->sk_hash == num && | 135 | sk2->sk_hash == num && |
@@ -189,12 +189,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
189 | inet_sk(sk)->num = snum; | 189 | inet_sk(sk)->num = snum; |
190 | sk->sk_hash = snum; | 190 | sk->sk_hash = snum; |
191 | if (sk_unhashed(sk)) { | 191 | if (sk_unhashed(sk)) { |
192 | /* | 192 | sk_nulls_add_node_rcu(sk, &hslot->head); |
193 | * We need that previous write to sk->sk_hash committed | ||
194 | * before write to sk->next done in following add_node() variant | ||
195 | */ | ||
196 | smp_wmb(); | ||
197 | sk_add_node_rcu(sk, &hslot->head); | ||
198 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 193 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
199 | } | 194 | } |
200 | error = 0; | 195 | error = 0; |
@@ -261,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
261 | int dif, struct udp_table *udptable) | 256 | int dif, struct udp_table *udptable) |
262 | { | 257 | { |
263 | struct sock *sk, *result; | 258 | struct sock *sk, *result; |
264 | struct hlist_node *node, *next; | 259 | struct hlist_nulls_node *node; |
265 | unsigned short hnum = ntohs(dport); | 260 | unsigned short hnum = ntohs(dport); |
266 | unsigned int hash = udp_hashfn(net, hnum); | 261 | unsigned int hash = udp_hashfn(net, hnum); |
267 | struct udp_hslot *hslot = &udptable->hash[hash]; | 262 | struct udp_hslot *hslot = &udptable->hash[hash]; |
@@ -271,13 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
271 | begin: | 266 | begin: |
272 | result = NULL; | 267 | result = NULL; |
273 | badness = -1; | 268 | badness = -1; |
274 | sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { | 269 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
275 | /* | ||
276 | * lockless reader, and SLAB_DESTROY_BY_RCU items: | ||
277 | * We must check this item was not moved to another chain | ||
278 | */ | ||
279 | if (udp_hashfn(net, sk->sk_hash) != hash) | ||
280 | goto begin; | ||
281 | score = compute_score(sk, net, saddr, hnum, sport, | 270 | score = compute_score(sk, net, saddr, hnum, sport, |
282 | daddr, dport, dif); | 271 | daddr, dport, dif); |
283 | if (score > badness) { | 272 | if (score > badness) { |
@@ -285,6 +274,14 @@ begin: | |||
285 | badness = score; | 274 | badness = score; |
286 | } | 275 | } |
287 | } | 276 | } |
277 | /* | ||
278 | * if the nulls value we got at the end of this lookup is | ||
279 | * not the expected one, we must restart lookup. | ||
280 | * We probably met an item that was moved to another chain. | ||
281 | */ | ||
282 | if (get_nulls_value(node) != hash) | ||
283 | goto begin; | ||
284 | |||
288 | if (result) { | 285 | if (result) { |
289 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 286 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
290 | result = NULL; | 287 | result = NULL; |
@@ -325,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | |||
325 | __be16 rmt_port, __be32 rmt_addr, | 322 | __be16 rmt_port, __be32 rmt_addr, |
326 | int dif) | 323 | int dif) |
327 | { | 324 | { |
328 | struct hlist_node *node; | 325 | struct hlist_nulls_node *node; |
329 | struct sock *s = sk; | 326 | struct sock *s = sk; |
330 | unsigned short hnum = ntohs(loc_port); | 327 | unsigned short hnum = ntohs(loc_port); |
331 | 328 | ||
332 | sk_for_each_from(s, node) { | 329 | sk_nulls_for_each_from(s, node) { |
333 | struct inet_sock *inet = inet_sk(s); | 330 | struct inet_sock *inet = inet_sk(s); |
334 | 331 | ||
335 | if (!net_eq(sock_net(s), net) || | 332 | if (!net_eq(sock_net(s), net) || |
@@ -977,7 +974,7 @@ void udp_lib_unhash(struct sock *sk) | |||
977 | struct udp_hslot *hslot = &udptable->hash[hash]; | 974 | struct udp_hslot *hslot = &udptable->hash[hash]; |
978 | 975 | ||
979 | spin_lock_bh(&hslot->lock); | 976 | spin_lock_bh(&hslot->lock); |
980 | if (sk_del_node_init_rcu(sk)) { | 977 | if (sk_nulls_del_node_init_rcu(sk)) { |
981 | inet_sk(sk)->num = 0; | 978 | inet_sk(sk)->num = 0; |
982 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 979 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
983 | } | 980 | } |
@@ -1130,7 +1127,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1130 | int dif; | 1127 | int dif; |
1131 | 1128 | ||
1132 | spin_lock(&hslot->lock); | 1129 | spin_lock(&hslot->lock); |
1133 | sk = sk_head(&hslot->head); | 1130 | sk = sk_nulls_head(&hslot->head); |
1134 | dif = skb->dev->ifindex; | 1131 | dif = skb->dev->ifindex; |
1135 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1132 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1136 | if (sk) { | 1133 | if (sk) { |
@@ -1139,7 +1136,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1139 | do { | 1136 | do { |
1140 | struct sk_buff *skb1 = skb; | 1137 | struct sk_buff *skb1 = skb; |
1141 | 1138 | ||
1142 | sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, | 1139 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, |
1143 | daddr, uh->source, saddr, | 1140 | daddr, uh->source, saddr, |
1144 | dif); | 1141 | dif); |
1145 | if (sknext) | 1142 | if (sknext) |
@@ -1560,10 +1557,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) | |||
1560 | struct net *net = seq_file_net(seq); | 1557 | struct net *net = seq_file_net(seq); |
1561 | 1558 | ||
1562 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1559 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1563 | struct hlist_node *node; | 1560 | struct hlist_nulls_node *node; |
1564 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; | 1561 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1565 | spin_lock_bh(&hslot->lock); | 1562 | spin_lock_bh(&hslot->lock); |
1566 | sk_for_each(sk, node, &hslot->head) { | 1563 | sk_nulls_for_each(sk, node, &hslot->head) { |
1567 | if (!net_eq(sock_net(sk), net)) | 1564 | if (!net_eq(sock_net(sk), net)) |
1568 | continue; | 1565 | continue; |
1569 | if (sk->sk_family == state->family) | 1566 | if (sk->sk_family == state->family) |
@@ -1582,7 +1579,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1582 | struct net *net = seq_file_net(seq); | 1579 | struct net *net = seq_file_net(seq); |
1583 | 1580 | ||
1584 | do { | 1581 | do { |
1585 | sk = sk_next(sk); | 1582 | sk = sk_nulls_next(sk); |
1586 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1583 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1587 | 1584 | ||
1588 | if (!sk) { | 1585 | if (!sk) { |
@@ -1753,7 +1750,7 @@ void __init udp_table_init(struct udp_table *table) | |||
1753 | int i; | 1750 | int i; |
1754 | 1751 | ||
1755 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | 1752 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { |
1756 | INIT_HLIST_HEAD(&table->hash[i].head); | 1753 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); |
1757 | spin_lock_init(&table->hash[i].lock); | 1754 | spin_lock_init(&table->hash[i].lock); |
1758 | } | 1755 | } |
1759 | } | 1756 | } |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8dafa36b1ba5..fd2d9ad4a8a3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, | |||
98 | int dif, struct udp_table *udptable) | 98 | int dif, struct udp_table *udptable) |
99 | { | 99 | { |
100 | struct sock *sk, *result; | 100 | struct sock *sk, *result; |
101 | struct hlist_node *node, *next; | 101 | struct hlist_nulls_node *node; |
102 | unsigned short hnum = ntohs(dport); | 102 | unsigned short hnum = ntohs(dport); |
103 | unsigned int hash = udp_hashfn(net, hnum); | 103 | unsigned int hash = udp_hashfn(net, hnum); |
104 | struct udp_hslot *hslot = &udptable->hash[hash]; | 104 | struct udp_hslot *hslot = &udptable->hash[hash]; |
@@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, | |||
108 | begin: | 108 | begin: |
109 | result = NULL; | 109 | result = NULL; |
110 | badness = -1; | 110 | badness = -1; |
111 | sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { | 111 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
112 | /* | ||
113 | * lockless reader, and SLAB_DESTROY_BY_RCU items: | ||
114 | * We must check this item was not moved to another chain | ||
115 | */ | ||
116 | if (udp_hashfn(net, sk->sk_hash) != hash) | ||
117 | goto begin; | ||
118 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); | 112 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); |
119 | if (score > badness) { | 113 | if (score > badness) { |
120 | result = sk; | 114 | result = sk; |
121 | badness = score; | 115 | badness = score; |
122 | } | 116 | } |
123 | } | 117 | } |
118 | /* | ||
119 | * if the nulls value we got at the end of this lookup is | ||
120 | * not the expected one, we must restart lookup. | ||
121 | * We probably met an item that was moved to another chain. | ||
122 | */ | ||
123 | if (get_nulls_value(node) != hash) | ||
124 | goto begin; | ||
125 | |||
124 | if (result) { | 126 | if (result) { |
125 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 127 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
126 | result = NULL; | 128 | result = NULL; |
@@ -374,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, | |||
374 | __be16 rmt_port, struct in6_addr *rmt_addr, | 376 | __be16 rmt_port, struct in6_addr *rmt_addr, |
375 | int dif) | 377 | int dif) |
376 | { | 378 | { |
377 | struct hlist_node *node; | 379 | struct hlist_nulls_node *node; |
378 | struct sock *s = sk; | 380 | struct sock *s = sk; |
379 | unsigned short num = ntohs(loc_port); | 381 | unsigned short num = ntohs(loc_port); |
380 | 382 | ||
381 | sk_for_each_from(s, node) { | 383 | sk_nulls_for_each_from(s, node) { |
382 | struct inet_sock *inet = inet_sk(s); | 384 | struct inet_sock *inet = inet_sk(s); |
383 | 385 | ||
384 | if (!net_eq(sock_net(s), net)) | 386 | if (!net_eq(sock_net(s), net)) |
@@ -423,7 +425,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
423 | int dif; | 425 | int dif; |
424 | 426 | ||
425 | spin_lock(&hslot->lock); | 427 | spin_lock(&hslot->lock); |
426 | sk = sk_head(&hslot->head); | 428 | sk = sk_nulls_head(&hslot->head); |
427 | dif = inet6_iif(skb); | 429 | dif = inet6_iif(skb); |
428 | sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 430 | sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
429 | if (!sk) { | 431 | if (!sk) { |
@@ -432,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
432 | } | 434 | } |
433 | 435 | ||
434 | sk2 = sk; | 436 | sk2 = sk; |
435 | while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, | 437 | while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, |
436 | uh->source, saddr, dif))) { | 438 | uh->source, saddr, dif))) { |
437 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); | 439 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); |
438 | if (buff) { | 440 | if (buff) { |