aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorTom Herbert <therbert@google.com>2010-06-07 03:43:42 -0400
committerDavid S. Miller <davem@davemloft.net>2010-06-07 03:43:42 -0400
commita8b690f98baf9fb1902b8eeab801351ea603fa3a (patch)
treeb48940354a3236d33e33ca4f1b9169c9545cd551 /net/ipv4
parent83038a2a7062f6cbbdcfaff47284566f060a5af1 (diff)
tcp: Fix slowness in read /proc/net/tcp
This patch address a serious performance issue in reading the TCP sockets table (/proc/net/tcp). Reading the full table is done by a number of sequential read operations. At each read operation, a seek is done to find the last socket that was previously read. This seek operation requires that the sockets in the table need to be counted up to the current file position, and to count each of these requires taking a lock for each non-empty bucket. The whole algorithm is O(n^2). The fix is to cache the last bucket value, offset within the bucket, and the file position returned by the last read operation. On the next sequential read, the bucket and offset are used to find the last read socket immediately without needing ot scan the previous buckets the table. This algorithm t read the whole table is O(n). The improvement offered by this patch is easily show by performing cat'ing /proc/net/tcp on a machine with a lot of connections. With about 182K connections in the table, I see the following: - Without patch time cat /proc/net/tcp > /dev/null real 1m56.729s user 0m0.214s sys 1m56.344s - With patch time cat /proc/net/tcp > /dev/null real 0m0.894s user 0m0.290s sys 0m0.594s Signed-off-by: Tom Herbert <therbert@google.com> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_ipv4.c92
1 files changed, 84 insertions, 8 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index acdc4c989853..7f976af27bf0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1980 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1980 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1981} 1981}
1982 1982
1983/*
1984 * Get next listener socket follow cur. If cur is NULL, get first socket
1985 * starting from bucket given in st->bucket; when st->bucket is zero the
1986 * very first socket in the hash table is returned.
1987 */
1983static void *listening_get_next(struct seq_file *seq, void *cur) 1988static void *listening_get_next(struct seq_file *seq, void *cur)
1984{ 1989{
1985 struct inet_connection_sock *icsk; 1990 struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1990 struct net *net = seq_file_net(seq); 1995 struct net *net = seq_file_net(seq);
1991 1996
1992 if (!sk) { 1997 if (!sk) {
1993 st->bucket = 0; 1998 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1994 ilb = &tcp_hashinfo.listening_hash[0];
1995 spin_lock_bh(&ilb->lock); 1999 spin_lock_bh(&ilb->lock);
1996 sk = sk_nulls_head(&ilb->head); 2000 sk = sk_nulls_head(&ilb->head);
2001 st->offset = 0;
1997 goto get_sk; 2002 goto get_sk;
1998 } 2003 }
1999 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2004 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2000 ++st->num; 2005 ++st->num;
2006 ++st->offset;
2001 2007
2002 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2008 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2003 struct request_sock *req = cur; 2009 struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2012 } 2018 }
2013 req = req->dl_next; 2019 req = req->dl_next;
2014 } 2020 }
2021 st->offset = 0;
2015 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2022 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2016 break; 2023 break;
2017get_req: 2024get_req:
@@ -2047,6 +2054,7 @@ start_req:
2047 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2054 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2048 } 2055 }
2049 spin_unlock_bh(&ilb->lock); 2056 spin_unlock_bh(&ilb->lock);
2057 st->offset = 0;
2050 if (++st->bucket < INET_LHTABLE_SIZE) { 2058 if (++st->bucket < INET_LHTABLE_SIZE) {
2051 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2059 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2052 spin_lock_bh(&ilb->lock); 2060 spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ out:
2060 2068
2061static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2069static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2062{ 2070{
2063 void *rc = listening_get_next(seq, NULL); 2071 struct tcp_iter_state *st = seq->private;
2072 void *rc;
2073
2074 st->bucket = 0;
2075 st->offset = 0;
2076 rc = listening_get_next(seq, NULL);
2064 2077
2065 while (rc && *pos) { 2078 while (rc && *pos) {
2066 rc = listening_get_next(seq, rc); 2079 rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
2075 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2088 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2076} 2089}
2077 2090
2091/*
2092 * Get first established socket starting from bucket given in st->bucket.
2093 * If st->bucket is zero, the very first socket in the hash is returned.
2094 */
2078static void *established_get_first(struct seq_file *seq) 2095static void *established_get_first(struct seq_file *seq)
2079{ 2096{
2080 struct tcp_iter_state *st = seq->private; 2097 struct tcp_iter_state *st = seq->private;
2081 struct net *net = seq_file_net(seq); 2098 struct net *net = seq_file_net(seq);
2082 void *rc = NULL; 2099 void *rc = NULL;
2083 2100
2084 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2101 st->offset = 0;
2102 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2085 struct sock *sk; 2103 struct sock *sk;
2086 struct hlist_nulls_node *node; 2104 struct hlist_nulls_node *node;
2087 struct inet_timewait_sock *tw; 2105 struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2126 struct net *net = seq_file_net(seq); 2144 struct net *net = seq_file_net(seq);
2127 2145
2128 ++st->num; 2146 ++st->num;
2147 ++st->offset;
2129 2148
2130 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2149 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2131 tw = cur; 2150 tw = cur;
@@ -2142,6 +2161,7 @@ get_tw:
2142 st->state = TCP_SEQ_STATE_ESTABLISHED; 2161 st->state = TCP_SEQ_STATE_ESTABLISHED;
2143 2162
2144 /* Look for next non empty bucket */ 2163 /* Look for next non empty bucket */
2164 st->offset = 0;
2145 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2165 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2146 empty_bucket(st)) 2166 empty_bucket(st))
2147 ; 2167 ;
@@ -2169,7 +2189,11 @@ out:
2169 2189
2170static void *established_get_idx(struct seq_file *seq, loff_t pos) 2190static void *established_get_idx(struct seq_file *seq, loff_t pos)
2171{ 2191{
2172 void *rc = established_get_first(seq); 2192 struct tcp_iter_state *st = seq->private;
2193 void *rc;
2194
2195 st->bucket = 0;
2196 rc = established_get_first(seq);
2173 2197
2174 while (rc && pos) { 2198 while (rc && pos) {
2175 rc = established_get_next(seq, rc); 2199 rc = established_get_next(seq, rc);
@@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2194 return rc; 2218 return rc;
2195} 2219}
2196 2220
2221static void *tcp_seek_last_pos(struct seq_file *seq)
2222{
2223 struct tcp_iter_state *st = seq->private;
2224 int offset = st->offset;
2225 int orig_num = st->num;
2226 void *rc = NULL;
2227
2228 switch (st->state) {
2229 case TCP_SEQ_STATE_OPENREQ:
2230 case TCP_SEQ_STATE_LISTENING:
2231 if (st->bucket >= INET_LHTABLE_SIZE)
2232 break;
2233 st->state = TCP_SEQ_STATE_LISTENING;
2234 rc = listening_get_next(seq, NULL);
2235 while (offset-- && rc)
2236 rc = listening_get_next(seq, rc);
2237 if (rc)
2238 break;
2239 st->bucket = 0;
2240 /* Fallthrough */
2241 case TCP_SEQ_STATE_ESTABLISHED:
2242 case TCP_SEQ_STATE_TIME_WAIT:
2243 st->state = TCP_SEQ_STATE_ESTABLISHED;
2244 if (st->bucket > tcp_hashinfo.ehash_mask)
2245 break;
2246 rc = established_get_first(seq);
2247 while (offset-- && rc)
2248 rc = established_get_next(seq, rc);
2249 }
2250
2251 st->num = orig_num;
2252
2253 return rc;
2254}
2255
2197static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2256static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2198{ 2257{
2199 struct tcp_iter_state *st = seq->private; 2258 struct tcp_iter_state *st = seq->private;
2259 void *rc;
2260
2261 if (*pos && *pos == st->last_pos) {
2262 rc = tcp_seek_last_pos(seq);
2263 if (rc)
2264 goto out;
2265 }
2266
2200 st->state = TCP_SEQ_STATE_LISTENING; 2267 st->state = TCP_SEQ_STATE_LISTENING;
2201 st->num = 0; 2268 st->num = 0;
2202 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2269 st->bucket = 0;
2270 st->offset = 0;
2271 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2272
2273out:
2274 st->last_pos = *pos;
2275 return rc;
2203} 2276}
2204 2277
2205static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2278static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2206{ 2279{
2280 struct tcp_iter_state *st = seq->private;
2207 void *rc = NULL; 2281 void *rc = NULL;
2208 struct tcp_iter_state *st;
2209 2282
2210 if (v == SEQ_START_TOKEN) { 2283 if (v == SEQ_START_TOKEN) {
2211 rc = tcp_get_idx(seq, 0); 2284 rc = tcp_get_idx(seq, 0);
2212 goto out; 2285 goto out;
2213 } 2286 }
2214 st = seq->private;
2215 2287
2216 switch (st->state) { 2288 switch (st->state) {
2217 case TCP_SEQ_STATE_OPENREQ: 2289 case TCP_SEQ_STATE_OPENREQ:
@@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2219 rc = listening_get_next(seq, v); 2291 rc = listening_get_next(seq, v);
2220 if (!rc) { 2292 if (!rc) {
2221 st->state = TCP_SEQ_STATE_ESTABLISHED; 2293 st->state = TCP_SEQ_STATE_ESTABLISHED;
2294 st->bucket = 0;
2295 st->offset = 0;
2222 rc = established_get_first(seq); 2296 rc = established_get_first(seq);
2223 } 2297 }
2224 break; 2298 break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2229 } 2303 }
2230out: 2304out:
2231 ++*pos; 2305 ++*pos;
2306 st->last_pos = *pos;
2232 return rc; 2307 return rc;
2233} 2308}
2234 2309
@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
2267 2342
2268 s = ((struct seq_file *)file->private_data)->private; 2343 s = ((struct seq_file *)file->private_data)->private;
2269 s->family = afinfo->family; 2344 s->family = afinfo->family;
2345 s->last_pos = 0;
2270 return 0; 2346 return 0;
2271} 2347}
2272 2348