tcp: Skip empty hash buckets faster in /proc/net/tcp

On most systems most of the TCP established/time-wait hash buckets are empty. When walking the hash table for /proc/net/tcp their read locks would always be aquired just to find out they're empty. This patch changes the code to check first if the buckets have any entries before taking the lock, which is much cheaper than taking a lock. Since the hash tables are large this makes a measurable difference on processing /proc/net/tcp, especially on architectures with slow read_lock (e.g. PPC) On a 2GB Core2 system time cat /proc/net/tcp > /dev/null (with a mostly empty hash table) goes from 0.046s to 0.005s. On systems with slower atomics (like P4 or POWER4) or larger hash tables (more RAM) the difference is much higher. This can be noticeable because there are some daemons around who regularly scan /proc/net/tcp. Original idea for this patch from Marcus Meissner, but redone by me. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Andi Kleen <ak@suse.de> 2008-08-28 04:08:02 -0400
committer: David S. Miller <davem@davemloft.net> 2008-08-28 04:08:02 -0400
commit: 6eac56040787c3ff604fe7d48bbbb7897cd1387c (patch)
tree: 1d3271c33d8d65bfea4aaf5d770f73ccd6da5825 /net/ipv4/tcp_ipv4.c
parent: 4d40555250320520c5398569457962b3984fc75e (diff)
1 files changed, 19 insertions, 7 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e934824b..37ca3843c40b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1946,6 +1946,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
        return rc;
 }
+static inline int empty_bucket(struct tcp_iter_state *st)
+{
+        return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+                hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+}
 static void *established_get_first(struct seq_file *seq)
 {
        struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1964,10 @@ static void *established_get_first(struct seq_file *seq)
                struct inet_timewait_sock *tw;
                rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+                /* Lockless fast path for the common case of empty buckets */
+                if (empty_bucket(st))
+                        continue;
                read_lock_bh(lock);
                sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
                        if (sk->sk_family != st->family ||
@@ -2008,13 +2018,15 @@ get_tw:
                read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
                st->state = TCP_SEQ_STATE_ESTABLISHED;
-                if (++st->bucket < tcp_hashinfo.ehash_size) {
+                /* Look for next non empty bucket */
-                        read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+                while (++st->bucket < tcp_hashinfo.ehash_size &&
-                        sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+                                empty_bucket(st))
-                } else {
+                        ;
-                        cur = NULL;
+                if (st->bucket >= tcp_hashinfo.ehash_size)
-                        goto out;
+                        return NULL;
-                }
+                read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+                sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
        } else
                sk = sk_next(sk);
author	Andi Kleen <ak@suse.de>	2008-08-28 04:08:02 -0400
committer	David S. Miller <davem@davemloft.net>	2008-08-28 04:08:02 -0400
commit	6eac56040787c3ff604fe7d48bbbb7897cd1387c (patch)
tree	1d3271c33d8d65bfea4aaf5d770f73ccd6da5825 /net/ipv4/tcp_ipv4.c
parent	4d40555250320520c5398569457962b3984fc75e (diff)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c1e934824b..37ca3843c40b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c
@@ -1946,6 +1946,12 @@ static void listening_get_idx(struct seq_file seq, loff_t *pos)
1946	return rc;	1946	return rc;
1947	}	1947	}
1948		1948
		1949	static inline int empty_bucket(struct tcp_iter_state *st)
		1950	{
		1951	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
		1952	hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
		1953	}
		1954
1949	static void established_get_first(struct seq_file seq)	1955	static void established_get_first(struct seq_file seq)
1950	{	1956	{
1951	struct tcp_iter_state* st = seq->private;	1957	struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1964,10 @@ static void established_get_first(struct seq_file seq)
1958	struct inet_timewait_sock *tw;	1964	struct inet_timewait_sock *tw;
1959	rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);	1965	rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1960		1966
		1967	/* Lockless fast path for the common case of empty buckets */
		1968	if (empty_bucket(st))
		1969	continue;
		1970
1961	read_lock_bh(lock);	1971	read_lock_bh(lock);
1962	sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {	1972	sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1963	if (sk->sk_family != st->family \|\|	1973	if (sk->sk_family != st->family \|\|
@@ -2008,13 +2018,15 @@ get_tw:
2008	read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));	2018	read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2009	st->state = TCP_SEQ_STATE_ESTABLISHED;	2019	st->state = TCP_SEQ_STATE_ESTABLISHED;
2010		2020
2011	if (++st->bucket < tcp_hashinfo.ehash_size) {	2021	/* Look for next non empty bucket */
2012	read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));	2022	while (++st->bucket < tcp_hashinfo.ehash_size &&
2013	sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);	2023	empty_bucket(st))
2014	} else {	2024	;
2015	cur = NULL;	2025	if (st->bucket >= tcp_hashinfo.ehash_size)
2016	goto out;	2026	return NULL;
2017	}	2027
		2028	read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
		2029	sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2018	} else	2030	} else
2019	sk = sk_next(sk);	2031	sk = sk_next(sk);
2020		2032