aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2007-11-07 05:40:20 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 07:15:11 -0500
commit230140cffa7feae90ad50bf259db1fa07674f3a7 (patch)
tree815472add31606423a508a17806b7884f0ab3e2e /net/ipv4/tcp_ipv4.c
parentefac52762b1e3fe3035d29e82d8ee1aebc45e4a7 (diff)
[INET]: Remove per bucket rwlock in tcp/dccp ehash table.
As done two years ago on IP route cache table (commit 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per hash bucket for the huge TCP/DCCP hash tables. On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for litle performance differences. (we hit a different cache line for the rwlock, but then the bucket cache line have a better sharing factor among cpus, since we dirty it less often). For netstat or ss commands that want a full scan of hash table, we perform fewer memory accesses. Using a 'small' table of hashed rwlocks should be more than enough to provide correct SMP concurrency between different buckets, without using too much memory. Sizing of this table depends on num_possible_cpus() and various CONFIG settings. This patch provides some locking abstraction that may ease a future work using a different model for TCP/DCCP table. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9127cdced20..e566f3c67677 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq)
2049 struct sock *sk; 2049 struct sock *sk;
2050 struct hlist_node *node; 2050 struct hlist_node *node;
2051 struct inet_timewait_sock *tw; 2051 struct inet_timewait_sock *tw;
2052 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2052 2053
2053 read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2054 read_lock_bh(lock);
2054 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2055 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2055 if (sk->sk_family != st->family) { 2056 if (sk->sk_family != st->family) {
2056 continue; 2057 continue;
@@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq)
2067 rc = tw; 2068 rc = tw;
2068 goto out; 2069 goto out;
2069 } 2070 }
2070 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2071 read_unlock_bh(lock);
2071 st->state = TCP_SEQ_STATE_ESTABLISHED; 2072 st->state = TCP_SEQ_STATE_ESTABLISHED;
2072 } 2073 }
2073out: 2074out:
@@ -2094,11 +2095,11 @@ get_tw:
2094 cur = tw; 2095 cur = tw;
2095 goto out; 2096 goto out;
2096 } 2097 }
2097 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2098 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2098 st->state = TCP_SEQ_STATE_ESTABLISHED; 2099 st->state = TCP_SEQ_STATE_ESTABLISHED;
2099 2100
2100 if (++st->bucket < tcp_hashinfo.ehash_size) { 2101 if (++st->bucket < tcp_hashinfo.ehash_size) {
2101 read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2102 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2102 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); 2103 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2103 } else { 2104 } else {
2104 cur = NULL; 2105 cur = NULL;
@@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2206 case TCP_SEQ_STATE_TIME_WAIT: 2207 case TCP_SEQ_STATE_TIME_WAIT:
2207 case TCP_SEQ_STATE_ESTABLISHED: 2208 case TCP_SEQ_STATE_ESTABLISHED:
2208 if (v) 2209 if (v)
2209 read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); 2210 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2210 break; 2211 break;
2211 } 2212 }
2212} 2213}