aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-16 22:40:17 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-16 22:40:17 -0500
commit3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 (patch)
tree468296b7be813643248d4ca67497d6ddb6934fc6 /net/ipv6
parent88ab1932eac721c6e7336708558fa5ed02c85c80 (diff)
net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls
RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/inet6_hashtables.c70
-rw-r--r--net/ipv6/tcp_ipv6.c1
2 files changed, 48 insertions, 23 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1646a5658255..c1b4d401fd95 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -25,24 +25,28 @@
25void __inet6_hash(struct sock *sk) 25void __inet6_hash(struct sock *sk)
26{ 26{
27 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 27 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
28 struct hlist_head *list;
29 rwlock_t *lock; 28 rwlock_t *lock;
30 29
31 WARN_ON(!sk_unhashed(sk)); 30 WARN_ON(!sk_unhashed(sk));
32 31
33 if (sk->sk_state == TCP_LISTEN) { 32 if (sk->sk_state == TCP_LISTEN) {
33 struct hlist_head *list;
34
34 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 35 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
35 lock = &hashinfo->lhash_lock; 36 lock = &hashinfo->lhash_lock;
36 inet_listen_wlock(hashinfo); 37 inet_listen_wlock(hashinfo);
38 __sk_add_node(sk, list);
37 } else { 39 } else {
38 unsigned int hash; 40 unsigned int hash;
41 struct hlist_nulls_head *list;
42
39 sk->sk_hash = hash = inet6_sk_ehashfn(sk); 43 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
40 list = &inet_ehash_bucket(hashinfo, hash)->chain; 44 list = &inet_ehash_bucket(hashinfo, hash)->chain;
41 lock = inet_ehash_lockp(hashinfo, hash); 45 lock = inet_ehash_lockp(hashinfo, hash);
42 write_lock(lock); 46 write_lock(lock);
47 __sk_nulls_add_node_rcu(sk, list);
43 } 48 }
44 49
45 __sk_add_node(sk, list);
46 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 50 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
47 write_unlock(lock); 51 write_unlock(lock);
48} 52}
@@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net,
63 const int dif) 67 const int dif)
64{ 68{
65 struct sock *sk; 69 struct sock *sk;
66 const struct hlist_node *node; 70 const struct hlist_nulls_node *node;
67 const __portpair ports = INET_COMBINED_PORTS(sport, hnum); 71 const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
68 /* Optimize here for direct hit, only listening connections can 72 /* Optimize here for direct hit, only listening connections can
69 * have wildcards anyways. 73 * have wildcards anyways.
70 */ 74 */
71 unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); 75 unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
72 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 76 unsigned int slot = hash & (hashinfo->ehash_size - 1);
73 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); 77 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
74 78
75 prefetch(head->chain.first); 79
76 read_lock(lock); 80 rcu_read_lock();
77 sk_for_each(sk, node, &head->chain) { 81begin:
82 sk_nulls_for_each_rcu(sk, node, &head->chain) {
78 /* For IPV6 do the cheaper port and family tests first. */ 83 /* For IPV6 do the cheaper port and family tests first. */
79 if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) 84 if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
80 goto hit; /* You sunk my battleship! */ 85 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
86 goto begintw;
87 if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
88 sock_put(sk);
89 goto begin;
90 }
91 goto out;
92 }
81 } 93 }
94 if (get_nulls_value(node) != slot)
95 goto begin;
96
97begintw:
82 /* Must check for a TIME_WAIT'er before going to listener hash. */ 98 /* Must check for a TIME_WAIT'er before going to listener hash. */
83 sk_for_each(sk, node, &head->twchain) { 99 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
84 if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) 100 if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
85 goto hit; 101 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
102 sk = NULL;
103 goto out;
104 }
105 if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
106 sock_put(sk);
107 goto begintw;
108 }
109 goto out;
110 }
86 } 111 }
87 read_unlock(lock); 112 if (get_nulls_value(node) != slot)
88 return NULL; 113 goto begintw;
89 114 sk = NULL;
90hit: 115out:
91 sock_hold(sk); 116 rcu_read_unlock();
92 read_unlock(lock);
93 return sk; 117 return sk;
94} 118}
95EXPORT_SYMBOL(__inet6_lookup_established); 119EXPORT_SYMBOL(__inet6_lookup_established);
@@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
172 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 196 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
173 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); 197 rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
174 struct sock *sk2; 198 struct sock *sk2;
175 const struct hlist_node *node; 199 const struct hlist_nulls_node *node;
176 struct inet_timewait_sock *tw; 200 struct inet_timewait_sock *tw;
177 201
178 prefetch(head->chain.first); 202 prefetch(head->chain.first);
179 write_lock(lock); 203 write_lock(lock);
180 204
181 /* Check TIME-WAIT sockets first. */ 205 /* Check TIME-WAIT sockets first. */
182 sk_for_each(sk2, node, &head->twchain) { 206 sk_nulls_for_each(sk2, node, &head->twchain) {
183 tw = inet_twsk(sk2); 207 tw = inet_twsk(sk2);
184 208
185 if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { 209 if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
@@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
192 tw = NULL; 216 tw = NULL;
193 217
194 /* And established part... */ 218 /* And established part... */
195 sk_for_each(sk2, node, &head->chain) { 219 sk_nulls_for_each(sk2, node, &head->chain) {
196 if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) 220 if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
197 goto not_unique; 221 goto not_unique;
198 } 222 }
@@ -203,7 +227,7 @@ unique:
203 inet->num = lport; 227 inet->num = lport;
204 inet->sport = htons(lport); 228 inet->sport = htons(lport);
205 WARN_ON(!sk_unhashed(sk)); 229 WARN_ON(!sk_unhashed(sk));
206 __sk_add_node(sk, &head->chain); 230 __sk_nulls_add_node_rcu(sk, &head->chain);
207 sk->sk_hash = hash; 231 sk->sk_hash = hash;
208 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 232 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
209 write_unlock(lock); 233 write_unlock(lock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 984276463a8d..b35787056313 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = {
2043 .sysctl_rmem = sysctl_tcp_rmem, 2043 .sysctl_rmem = sysctl_tcp_rmem,
2044 .max_header = MAX_TCP_HEADER, 2044 .max_header = MAX_TCP_HEADER,
2045 .obj_size = sizeof(struct tcp6_sock), 2045 .obj_size = sizeof(struct tcp6_sock),
2046 .slab_flags = SLAB_DESTROY_BY_RCU,
2046 .twsk_prot = &tcp6_timewait_sock_ops, 2047 .twsk_prot = &tcp6_timewait_sock_ops,
2047 .rsk_prot = &tcp6_request_sock_ops, 2048 .rsk_prot = &tcp6_request_sock_ops,
2048 .h.hashinfo = &tcp_hashinfo, 2049 .h.hashinfo = &tcp_hashinfo,