aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-10-29 04:41:45 -0400
committerDavid S. Miller <davem@davemloft.net>2008-10-29 04:41:45 -0400
commit645ca708f936b2fbeb79e52d7823e3eb2c0905f8 (patch)
treeb384696994ee3cb04759a7bfffc29a48e4bf40f6 /net/ipv6
parentb189db5d299c6824780af5590564ff608adb3dea (diff)
udp: introduce struct udp_table and multiple spinlocks
UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/udp.c112
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/ipv6/udplite.c8
3 files changed, 68 insertions, 56 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e51da8c092fa..ccee7244ca0f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -54,62 +54,73 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
54 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); 54 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
55} 55}
56 56
57static inline int compute_score(struct sock *sk, struct net *net,
58 unsigned short hnum,
59 struct in6_addr *saddr, __be16 sport,
60 struct in6_addr *daddr, __be16 dport,
61 int dif)
62{
63 int score = -1;
64
65 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
66 sk->sk_family == PF_INET6) {
67 struct ipv6_pinfo *np = inet6_sk(sk);
68 struct inet_sock *inet = inet_sk(sk);
69
70 score = 0;
71 if (inet->dport) {
72 if (inet->dport != sport)
73 return -1;
74 score++;
75 }
76 if (!ipv6_addr_any(&np->rcv_saddr)) {
77 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
78 return -1;
79 score++;
80 }
81 if (!ipv6_addr_any(&np->daddr)) {
82 if (!ipv6_addr_equal(&np->daddr, saddr))
83 return -1;
84 score++;
85 }
86 if (sk->sk_bound_dev_if) {
87 if (sk->sk_bound_dev_if != dif)
88 return -1;
89 score++;
90 }
91 }
92 return score;
93}
94
57static struct sock *__udp6_lib_lookup(struct net *net, 95static struct sock *__udp6_lib_lookup(struct net *net,
58 struct in6_addr *saddr, __be16 sport, 96 struct in6_addr *saddr, __be16 sport,
59 struct in6_addr *daddr, __be16 dport, 97 struct in6_addr *daddr, __be16 dport,
60 int dif, struct hlist_head udptable[]) 98 int dif, struct udp_table *udptable)
61{ 99{
62 struct sock *sk, *result = NULL; 100 struct sock *sk, *result = NULL;
63 struct hlist_node *node; 101 struct hlist_node *node;
64 unsigned short hnum = ntohs(dport); 102 unsigned short hnum = ntohs(dport);
65 int badness = -1; 103 unsigned int hash = udp_hashfn(net, hnum);
66 104 struct udp_hslot *hslot = &udptable->hash[hash];
67 read_lock(&udp_hash_lock); 105 int score, badness = -1;
68 sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { 106
69 struct inet_sock *inet = inet_sk(sk); 107 spin_lock(&hslot->lock);
70 108 sk_for_each(sk, node, &hslot->head) {
71 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 109 score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
72 sk->sk_family == PF_INET6) { 110 if (score > badness) {
73 struct ipv6_pinfo *np = inet6_sk(sk); 111 result = sk;
74 int score = 0; 112 badness = score;
75 if (inet->dport) {
76 if (inet->dport != sport)
77 continue;
78 score++;
79 }
80 if (!ipv6_addr_any(&np->rcv_saddr)) {
81 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
82 continue;
83 score++;
84 }
85 if (!ipv6_addr_any(&np->daddr)) {
86 if (!ipv6_addr_equal(&np->daddr, saddr))
87 continue;
88 score++;
89 }
90 if (sk->sk_bound_dev_if) {
91 if (sk->sk_bound_dev_if != dif)
92 continue;
93 score++;
94 }
95 if (score == 4) {
96 result = sk;
97 break;
98 } else if (score > badness) {
99 result = sk;
100 badness = score;
101 }
102 } 113 }
103 } 114 }
104 if (result) 115 if (result)
105 sock_hold(result); 116 sock_hold(result);
106 read_unlock(&udp_hash_lock); 117 spin_unlock(&hslot->lock);
107 return result; 118 return result;
108} 119}
109 120
110static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, 121static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
111 __be16 sport, __be16 dport, 122 __be16 sport, __be16 dport,
112 struct hlist_head udptable[]) 123 struct udp_table *udptable)
113{ 124{
114 struct sock *sk; 125 struct sock *sk;
115 struct ipv6hdr *iph = ipv6_hdr(skb); 126 struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -239,7 +250,7 @@ csum_copy_err:
239 250
240void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 251void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
241 int type, int code, int offset, __be32 info, 252 int type, int code, int offset, __be32 info,
242 struct hlist_head udptable[] ) 253 struct udp_table *udptable)
243{ 254{
244 struct ipv6_pinfo *np; 255 struct ipv6_pinfo *np;
245 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; 256 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
@@ -275,7 +286,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
275 struct inet6_skb_parm *opt, int type, 286 struct inet6_skb_parm *opt, int type,
276 int code, int offset, __be32 info ) 287 int code, int offset, __be32 info )
277{ 288{
278 __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); 289 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
279} 290}
280 291
281int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 292int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
@@ -374,14 +385,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
374 */ 385 */
375static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 386static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
376 struct in6_addr *saddr, struct in6_addr *daddr, 387 struct in6_addr *saddr, struct in6_addr *daddr,
377 struct hlist_head udptable[]) 388 struct udp_table *udptable)
378{ 389{
379 struct sock *sk, *sk2; 390 struct sock *sk, *sk2;
380 const struct udphdr *uh = udp_hdr(skb); 391 const struct udphdr *uh = udp_hdr(skb);
392 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
381 int dif; 393 int dif;
382 394
383 read_lock(&udp_hash_lock); 395 spin_lock(&hslot->lock);
384 sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); 396 sk = sk_head(&hslot->head);
385 dif = inet6_iif(skb); 397 dif = inet6_iif(skb);
386 sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 398 sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
387 if (!sk) { 399 if (!sk) {
@@ -409,7 +421,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
409 sk_add_backlog(sk, skb); 421 sk_add_backlog(sk, skb);
410 bh_unlock_sock(sk); 422 bh_unlock_sock(sk);
411out: 423out:
412 read_unlock(&udp_hash_lock); 424 spin_unlock(&hslot->lock);
413 return 0; 425 return 0;
414} 426}
415 427
@@ -447,7 +459,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
447 return 0; 459 return 0;
448} 460}
449 461
450int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], 462int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
451 int proto) 463 int proto)
452{ 464{
453 struct sock *sk; 465 struct sock *sk;
@@ -544,7 +556,7 @@ discard:
544 556
545static __inline__ int udpv6_rcv(struct sk_buff *skb) 557static __inline__ int udpv6_rcv(struct sk_buff *skb)
546{ 558{
547 return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); 559 return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
548} 560}
549 561
550/* 562/*
@@ -1008,7 +1020,7 @@ int udp6_seq_show(struct seq_file *seq, void *v)
1008static struct udp_seq_afinfo udp6_seq_afinfo = { 1020static struct udp_seq_afinfo udp6_seq_afinfo = {
1009 .name = "udp6", 1021 .name = "udp6",
1010 .family = AF_INET6, 1022 .family = AF_INET6,
1011 .hashtable = udp_hash, 1023 .udp_table = &udp_table,
1012 .seq_fops = { 1024 .seq_fops = {
1013 .owner = THIS_MODULE, 1025 .owner = THIS_MODULE,
1014 }, 1026 },
@@ -1050,7 +1062,7 @@ struct proto udpv6_prot = {
1050 .sysctl_wmem = &sysctl_udp_wmem_min, 1062 .sysctl_wmem = &sysctl_udp_wmem_min,
1051 .sysctl_rmem = &sysctl_udp_rmem_min, 1063 .sysctl_rmem = &sysctl_udp_rmem_min,
1052 .obj_size = sizeof(struct udp6_sock), 1064 .obj_size = sizeof(struct udp6_sock),
1053 .h.udp_hash = udp_hash, 1065 .h.udp_table = &udp_table,
1054#ifdef CONFIG_COMPAT 1066#ifdef CONFIG_COMPAT
1055 .compat_setsockopt = compat_udpv6_setsockopt, 1067 .compat_setsockopt = compat_udpv6_setsockopt,
1056 .compat_getsockopt = compat_udpv6_getsockopt, 1068 .compat_getsockopt = compat_udpv6_getsockopt,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 92dd7da766d8..23779208c334 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -7,9 +7,9 @@
7#include <net/inet_common.h> 7#include <net/inet_common.h>
8#include <net/transp_v6.h> 8#include <net/transp_v6.h>
9 9
10extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 10extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, 11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
12 int , int , int , __be32 , struct hlist_head []); 12 int , int , int , __be32 , struct udp_table *);
13 13
14extern int udp_v6_get_port(struct sock *sk, unsigned short snum); 14extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
15 15
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 3cd1a1ac3d6c..f1e892a99e05 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -15,14 +15,14 @@
15 15
16static int udplitev6_rcv(struct sk_buff *skb) 16static int udplitev6_rcv(struct sk_buff *skb)
17{ 17{
18 return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); 18 return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
19} 19}
20 20
21static void udplitev6_err(struct sk_buff *skb, 21static void udplitev6_err(struct sk_buff *skb,
22 struct inet6_skb_parm *opt, 22 struct inet6_skb_parm *opt,
23 int type, int code, int offset, __be32 info) 23 int type, int code, int offset, __be32 info)
24{ 24{
25 __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); 25 __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
26} 26}
27 27
28static struct inet6_protocol udplitev6_protocol = { 28static struct inet6_protocol udplitev6_protocol = {
@@ -49,7 +49,7 @@ struct proto udplitev6_prot = {
49 .unhash = udp_lib_unhash, 49 .unhash = udp_lib_unhash,
50 .get_port = udp_v6_get_port, 50 .get_port = udp_v6_get_port,
51 .obj_size = sizeof(struct udp6_sock), 51 .obj_size = sizeof(struct udp6_sock),
52 .h.udp_hash = udplite_hash, 52 .h.udp_table = &udplite_table,
53#ifdef CONFIG_COMPAT 53#ifdef CONFIG_COMPAT
54 .compat_setsockopt = compat_udpv6_setsockopt, 54 .compat_setsockopt = compat_udpv6_setsockopt,
55 .compat_getsockopt = compat_udpv6_getsockopt, 55 .compat_getsockopt = compat_udpv6_getsockopt,
@@ -95,7 +95,7 @@ void udplitev6_exit(void)
95static struct udp_seq_afinfo udplite6_seq_afinfo = { 95static struct udp_seq_afinfo udplite6_seq_afinfo = {
96 .name = "udplite6", 96 .name = "udplite6",
97 .family = AF_INET6, 97 .family = AF_INET6,
98 .hashtable = udplite_hash, 98 .udp_table = &udplite_table,
99 .seq_fops = { 99 .seq_fops = {
100 .owner = THIS_MODULE, 100 .owner = THIS_MODULE,
101 }, 101 },