diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-16 22:40:17 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-16 22:40:17 -0500 |
commit | 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 (patch) | |
tree | 468296b7be813643248d4ca67497d6ddb6934fc6 /net/ipv4/tcp_ipv4.c | |
parent | 88ab1932eac721c6e7336708558fa5ed02c85c80 (diff) |
net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls
RCU was added to UDP lookups, using a fast infrastructure :
- sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the
price of call_rcu() at freeing time.
- hlist_nulls permits to use few memory barriers.
This patch uses same infrastructure for TCP/DCCP established
and timewait sockets.
Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications
using short lived TCP connections. A followup patch, converting
rwlocks to spinlocks will even speedup this case.
__inet_lookup_established() is pretty fast now we dont have to
dirty a contended cache line (read_lock/read_unlock)
Only established and timewait hashtable are converted to RCU
(bind table and listen table are still using traditional locking)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d49233f409b..b2e3ab2287b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1857,16 +1857,16 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); | |||
1857 | #ifdef CONFIG_PROC_FS | 1857 | #ifdef CONFIG_PROC_FS |
1858 | /* Proc filesystem TCP sock list dumping. */ | 1858 | /* Proc filesystem TCP sock list dumping. */ |
1859 | 1859 | ||
1860 | static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) | 1860 | static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) |
1861 | { | 1861 | { |
1862 | return hlist_empty(head) ? NULL : | 1862 | return hlist_nulls_empty(head) ? NULL : |
1863 | list_entry(head->first, struct inet_timewait_sock, tw_node); | 1863 | list_entry(head->first, struct inet_timewait_sock, tw_node); |
1864 | } | 1864 | } |
1865 | 1865 | ||
1866 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) | 1866 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) |
1867 | { | 1867 | { |
1868 | return tw->tw_node.next ? | 1868 | return !is_a_nulls(tw->tw_node.next) ? |
1869 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1869 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
1870 | } | 1870 | } |
1871 | 1871 | ||
1872 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1872 | static void *listening_get_next(struct seq_file *seq, void *cur) |
@@ -1954,8 +1954,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
1954 | 1954 | ||
1955 | static inline int empty_bucket(struct tcp_iter_state *st) | 1955 | static inline int empty_bucket(struct tcp_iter_state *st) |
1956 | { | 1956 | { |
1957 | return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && | 1957 | return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && |
1958 | hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | 1958 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); |
1959 | } | 1959 | } |
1960 | 1960 | ||
1961 | static void *established_get_first(struct seq_file *seq) | 1961 | static void *established_get_first(struct seq_file *seq) |
@@ -1966,7 +1966,7 @@ static void *established_get_first(struct seq_file *seq) | |||
1966 | 1966 | ||
1967 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { | 1967 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { |
1968 | struct sock *sk; | 1968 | struct sock *sk; |
1969 | struct hlist_node *node; | 1969 | struct hlist_nulls_node *node; |
1970 | struct inet_timewait_sock *tw; | 1970 | struct inet_timewait_sock *tw; |
1971 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); | 1971 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); |
1972 | 1972 | ||
@@ -1975,7 +1975,7 @@ static void *established_get_first(struct seq_file *seq) | |||
1975 | continue; | 1975 | continue; |
1976 | 1976 | ||
1977 | read_lock_bh(lock); | 1977 | read_lock_bh(lock); |
1978 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 1978 | sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
1979 | if (sk->sk_family != st->family || | 1979 | if (sk->sk_family != st->family || |
1980 | !net_eq(sock_net(sk), net)) { | 1980 | !net_eq(sock_net(sk), net)) { |
1981 | continue; | 1981 | continue; |
@@ -2004,7 +2004,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
2004 | { | 2004 | { |
2005 | struct sock *sk = cur; | 2005 | struct sock *sk = cur; |
2006 | struct inet_timewait_sock *tw; | 2006 | struct inet_timewait_sock *tw; |
2007 | struct hlist_node *node; | 2007 | struct hlist_nulls_node *node; |
2008 | struct tcp_iter_state *st = seq->private; | 2008 | struct tcp_iter_state *st = seq->private; |
2009 | struct net *net = seq_file_net(seq); | 2009 | struct net *net = seq_file_net(seq); |
2010 | 2010 | ||
@@ -2032,11 +2032,11 @@ get_tw: | |||
2032 | return NULL; | 2032 | return NULL; |
2033 | 2033 | ||
2034 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2034 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2035 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | 2035 | sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); |
2036 | } else | 2036 | } else |
2037 | sk = sk_next(sk); | 2037 | sk = sk_nulls_next(sk); |
2038 | 2038 | ||
2039 | sk_for_each_from(sk, node) { | 2039 | sk_nulls_for_each_from(sk, node) { |
2040 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) | 2040 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) |
2041 | goto found; | 2041 | goto found; |
2042 | } | 2042 | } |
@@ -2375,6 +2375,7 @@ struct proto tcp_prot = { | |||
2375 | .sysctl_rmem = sysctl_tcp_rmem, | 2375 | .sysctl_rmem = sysctl_tcp_rmem, |
2376 | .max_header = MAX_TCP_HEADER, | 2376 | .max_header = MAX_TCP_HEADER, |
2377 | .obj_size = sizeof(struct tcp_sock), | 2377 | .obj_size = sizeof(struct tcp_sock), |
2378 | .slab_flags = SLAB_DESTROY_BY_RCU, | ||
2378 | .twsk_prot = &tcp_timewait_sock_ops, | 2379 | .twsk_prot = &tcp_timewait_sock_ops, |
2379 | .rsk_prot = &tcp_request_sock_ops, | 2380 | .rsk_prot = &tcp_request_sock_ops, |
2380 | .h.hashinfo = &tcp_hashinfo, | 2381 | .h.hashinfo = &tcp_hashinfo, |