aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-10-29 04:41:45 -0400
committerDavid S. Miller <davem@davemloft.net>2008-10-29 04:41:45 -0400
commit645ca708f936b2fbeb79e52d7823e3eb2c0905f8 (patch)
treeb384696994ee3cb04759a7bfffc29a48e4bf40f6 /include/net
parentb189db5d299c6824780af5590564ff608adb3dea (diff)
udp: introduce struct udp_table and multiple spinlocks
UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/udp.h25
-rw-r--r--include/net/udplite.h2
3 files changed, 14 insertions, 15 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index d6b750a25078..d200dfbe1ef6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -599,7 +599,7 @@ struct proto {
599 599
600 union { 600 union {
601 struct inet_hashinfo *hashinfo; 601 struct inet_hashinfo *hashinfo;
602 struct hlist_head *udp_hash; 602 struct udp_table *udp_table;
603 struct raw_hashinfo *raw_hash; 603 struct raw_hashinfo *raw_hash;
604 } h; 604 } h;
605 605
diff --git a/include/net/udp.h b/include/net/udp.h
index 1e205095ea68..df2bfe545374 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -50,8 +50,15 @@ struct udp_skb_cb {
50}; 50};
51#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) 51#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb))
52 52
53extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; 53struct udp_hslot {
54extern rwlock_t udp_hash_lock; 54 struct hlist_head head;
55 spinlock_t lock;
56} __attribute__((aligned(2 * sizeof(long))));
57struct udp_table {
58 struct udp_hslot hash[UDP_HTABLE_SIZE];
59};
60extern struct udp_table udp_table;
61extern void udp_table_init(struct udp_table *);
55 62
56 63
57/* Note: this must match 'valbool' in sock_setsockopt */ 64/* Note: this must match 'valbool' in sock_setsockopt */
@@ -110,15 +117,7 @@ static inline void udp_lib_hash(struct sock *sk)
110 BUG(); 117 BUG();
111} 118}
112 119
113static inline void udp_lib_unhash(struct sock *sk) 120extern void udp_lib_unhash(struct sock *sk);
114{
115 write_lock_bh(&udp_hash_lock);
116 if (sk_del_node_init(sk)) {
117 inet_sk(sk)->num = 0;
118 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
119 }
120 write_unlock_bh(&udp_hash_lock);
121}
122 121
123static inline void udp_lib_close(struct sock *sk, long timeout) 122static inline void udp_lib_close(struct sock *sk, long timeout)
124{ 123{
@@ -187,7 +186,7 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
187struct udp_seq_afinfo { 186struct udp_seq_afinfo {
188 char *name; 187 char *name;
189 sa_family_t family; 188 sa_family_t family;
190 struct hlist_head *hashtable; 189 struct udp_table *udp_table;
191 struct file_operations seq_fops; 190 struct file_operations seq_fops;
192 struct seq_operations seq_ops; 191 struct seq_operations seq_ops;
193}; 192};
@@ -196,7 +195,7 @@ struct udp_iter_state {
196 struct seq_net_private p; 195 struct seq_net_private p;
197 sa_family_t family; 196 sa_family_t family;
198 int bucket; 197 int bucket;
199 struct hlist_head *hashtable; 198 struct udp_table *udp_table;
200}; 199};
201 200
202#ifdef CONFIG_PROC_FS 201#ifdef CONFIG_PROC_FS
diff --git a/include/net/udplite.h b/include/net/udplite.h
index b76b2e377af4..afdffe607b24 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -11,7 +11,7 @@
11#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ 11#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */
12 12
13extern struct proto udplite_prot; 13extern struct proto udplite_prot;
14extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; 14extern struct udp_table udplite_table;
15 15
16/* 16/*
17 * Checksum computation is all in software, hence simpler getfrag. 17 * Checksum computation is all in software, hence simpler getfrag.