diff options
author | Eric Dumazet <edumazet@google.com> | 2012-06-08 01:03:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-08 17:27:23 -0400 |
commit | 7123aaa3a1416529ce461e98108e6b343b294643 (patch) | |
tree | 094070a521666979523411422109cce72a1db8c4 | |
parent | 54db0cc2ba0d38166acc2d6bae21721405305537 (diff) |
af_unix: speedup /proc/net/unix
/proc/net/unix has quadratic behavior, and can hold unix_table_lock for
a while if high number of unix sockets are alive. (90 ms for 200k
sockets...)
We already have a hash table, so its quite easy to use it.
Problem is unbound sockets are still hashed in a single hash slot
(unix_socket_table[UNIX_HASH_TABLE])
This patch also spreads unbound sockets to 256 hash slots, to speedup
both /proc/net/unix and unix_diag.
Time to read /proc/net/unix with 200k unix sockets :
(time dd if=/proc/net/unix of=/dev/null bs=4k)
before : 520 secs
after : 2 secs
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/af_unix.h | 3 | ||||
-rw-r--r-- | net/unix/af_unix.c | 110 | ||||
-rw-r--r-- | net/unix/diag.c | 6 |
3 files changed, 70 insertions, 49 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2ee33da36a7a..b5f8988e4283 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -14,10 +14,11 @@ extern struct sock *unix_get_socket(struct file *filp); | |||
14 | extern struct sock *unix_peer_get(struct sock *); | 14 | extern struct sock *unix_peer_get(struct sock *); |
15 | 15 | ||
16 | #define UNIX_HASH_SIZE 256 | 16 | #define UNIX_HASH_SIZE 256 |
17 | #define UNIX_HASH_BITS 8 | ||
17 | 18 | ||
18 | extern unsigned int unix_tot_inflight; | 19 | extern unsigned int unix_tot_inflight; |
19 | extern spinlock_t unix_table_lock; | 20 | extern spinlock_t unix_table_lock; |
20 | extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; | 21 | extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; |
21 | 22 | ||
22 | struct unix_address { | 23 | struct unix_address { |
23 | atomic_t refcnt; | 24 | atomic_t refcnt; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f165..cf83f6b5ac91 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -115,15 +115,24 @@ | |||
115 | #include <net/checksum.h> | 115 | #include <net/checksum.h> |
116 | #include <linux/security.h> | 116 | #include <linux/security.h> |
117 | 117 | ||
118 | struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; | 118 | struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; |
119 | EXPORT_SYMBOL_GPL(unix_socket_table); | 119 | EXPORT_SYMBOL_GPL(unix_socket_table); |
120 | DEFINE_SPINLOCK(unix_table_lock); | 120 | DEFINE_SPINLOCK(unix_table_lock); |
121 | EXPORT_SYMBOL_GPL(unix_table_lock); | 121 | EXPORT_SYMBOL_GPL(unix_table_lock); |
122 | static atomic_long_t unix_nr_socks; | 122 | static atomic_long_t unix_nr_socks; |
123 | 123 | ||
124 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) | ||
125 | 124 | ||
126 | #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) | 125 | static struct hlist_head *unix_sockets_unbound(void *addr) |
126 | { | ||
127 | unsigned long hash = (unsigned long)addr; | ||
128 | |||
129 | hash ^= hash >> 16; | ||
130 | hash ^= hash >> 8; | ||
131 | hash %= UNIX_HASH_SIZE; | ||
132 | return &unix_socket_table[UNIX_HASH_SIZE + hash]; | ||
133 | } | ||
134 | |||
135 | #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) | ||
127 | 136 | ||
128 | #ifdef CONFIG_SECURITY_NETWORK | 137 | #ifdef CONFIG_SECURITY_NETWORK |
129 | static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) | 138 | static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) |
@@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) | |||
645 | INIT_LIST_HEAD(&u->link); | 654 | INIT_LIST_HEAD(&u->link); |
646 | mutex_init(&u->readlock); /* single task reading lock */ | 655 | mutex_init(&u->readlock); /* single task reading lock */ |
647 | init_waitqueue_head(&u->peer_wait); | 656 | init_waitqueue_head(&u->peer_wait); |
648 | unix_insert_socket(unix_sockets_unbound, sk); | 657 | unix_insert_socket(unix_sockets_unbound(sk), sk); |
649 | out: | 658 | out: |
650 | if (sk == NULL) | 659 | if (sk == NULL) |
651 | atomic_long_dec(&unix_nr_socks); | 660 | atomic_long_dec(&unix_nr_socks); |
@@ -2239,47 +2248,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, | |||
2239 | } | 2248 | } |
2240 | 2249 | ||
2241 | #ifdef CONFIG_PROC_FS | 2250 | #ifdef CONFIG_PROC_FS |
2242 | static struct sock *first_unix_socket(int *i) | ||
2243 | { | ||
2244 | for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { | ||
2245 | if (!hlist_empty(&unix_socket_table[*i])) | ||
2246 | return __sk_head(&unix_socket_table[*i]); | ||
2247 | } | ||
2248 | return NULL; | ||
2249 | } | ||
2250 | 2251 | ||
2251 | static struct sock *next_unix_socket(int *i, struct sock *s) | 2252 | #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) |
2252 | { | 2253 | |
2253 | struct sock *next = sk_next(s); | 2254 | #define get_bucket(x) ((x) >> BUCKET_SPACE) |
2254 | /* More in this chain? */ | 2255 | #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) |
2255 | if (next) | 2256 | #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) |
2256 | return next; | ||
2257 | /* Look for next non-empty chain. */ | ||
2258 | for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { | ||
2259 | if (!hlist_empty(&unix_socket_table[*i])) | ||
2260 | return __sk_head(&unix_socket_table[*i]); | ||
2261 | } | ||
2262 | return NULL; | ||
2263 | } | ||
2264 | 2257 | ||
2265 | struct unix_iter_state { | 2258 | struct unix_iter_state { |
2266 | struct seq_net_private p; | 2259 | struct seq_net_private p; |
2267 | int i; | ||
2268 | }; | 2260 | }; |
2269 | 2261 | ||
2270 | static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) | 2262 | static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) |
2271 | { | 2263 | { |
2272 | struct unix_iter_state *iter = seq->private; | 2264 | unsigned long offset = get_offset(*pos); |
2273 | loff_t off = 0; | 2265 | unsigned long bucket = get_bucket(*pos); |
2274 | struct sock *s; | 2266 | struct sock *sk; |
2267 | unsigned long count = 0; | ||
2275 | 2268 | ||
2276 | for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { | 2269 | for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { |
2277 | if (sock_net(s) != seq_file_net(seq)) | 2270 | if (sock_net(sk) != seq_file_net(seq)) |
2278 | continue; | 2271 | continue; |
2279 | if (off == pos) | 2272 | if (++count == offset) |
2280 | return s; | 2273 | break; |
2281 | ++off; | ||
2282 | } | 2274 | } |
2275 | |||
2276 | return sk; | ||
2277 | } | ||
2278 | |||
2279 | static struct sock *unix_next_socket(struct seq_file *seq, | ||
2280 | struct sock *sk, | ||
2281 | loff_t *pos) | ||
2282 | { | ||
2283 | unsigned long bucket; | ||
2284 | |||
2285 | while (sk > (struct sock *)SEQ_START_TOKEN) { | ||
2286 | sk = sk_next(sk); | ||
2287 | if (!sk) | ||
2288 | goto next_bucket; | ||
2289 | if (sock_net(sk) == seq_file_net(seq)) | ||
2290 | return sk; | ||
2291 | } | ||
2292 | |||
2293 | do { | ||
2294 | sk = unix_from_bucket(seq, pos); | ||
2295 | if (sk) | ||
2296 | return sk; | ||
2297 | |||
2298 | next_bucket: | ||
2299 | bucket = get_bucket(*pos) + 1; | ||
2300 | *pos = set_bucket_offset(bucket, 1); | ||
2301 | } while (bucket < ARRAY_SIZE(unix_socket_table)); | ||
2302 | |||
2283 | return NULL; | 2303 | return NULL; |
2284 | } | 2304 | } |
2285 | 2305 | ||
@@ -2287,22 +2307,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) | |||
2287 | __acquires(unix_table_lock) | 2307 | __acquires(unix_table_lock) |
2288 | { | 2308 | { |
2289 | spin_lock(&unix_table_lock); | 2309 | spin_lock(&unix_table_lock); |
2290 | return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2310 | |
2311 | if (!*pos) | ||
2312 | return SEQ_START_TOKEN; | ||
2313 | |||
2314 | if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) | ||
2315 | return NULL; | ||
2316 | |||
2317 | return unix_next_socket(seq, NULL, pos); | ||
2291 | } | 2318 | } |
2292 | 2319 | ||
2293 | static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2320 | static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2294 | { | 2321 | { |
2295 | struct unix_iter_state *iter = seq->private; | ||
2296 | struct sock *sk = v; | ||
2297 | ++*pos; | 2322 | ++*pos; |
2298 | 2323 | return unix_next_socket(seq, v, pos); | |
2299 | if (v == SEQ_START_TOKEN) | ||
2300 | sk = first_unix_socket(&iter->i); | ||
2301 | else | ||
2302 | sk = next_unix_socket(&iter->i, sk); | ||
2303 | while (sk && (sock_net(sk) != seq_file_net(seq))) | ||
2304 | sk = next_unix_socket(&iter->i, sk); | ||
2305 | return sk; | ||
2306 | } | 2324 | } |
2307 | 2325 | ||
2308 | static void unix_seq_stop(struct seq_file *seq, void *v) | 2326 | static void unix_seq_stop(struct seq_file *seq, void *v) |
diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f5..7e8a24bff34a 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c | |||
@@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
195 | num = s_num = cb->args[1]; | 195 | num = s_num = cb->args[1]; |
196 | 196 | ||
197 | spin_lock(&unix_table_lock); | 197 | spin_lock(&unix_table_lock); |
198 | for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { | 198 | for (slot = s_slot; |
199 | slot < ARRAY_SIZE(unix_socket_table); | ||
200 | s_num = 0, slot++) { | ||
199 | struct sock *sk; | 201 | struct sock *sk; |
200 | struct hlist_node *node; | 202 | struct hlist_node *node; |
201 | 203 | ||
@@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino) | |||
228 | struct sock *sk; | 230 | struct sock *sk; |
229 | 231 | ||
230 | spin_lock(&unix_table_lock); | 232 | spin_lock(&unix_table_lock); |
231 | for (i = 0; i <= UNIX_HASH_SIZE; i++) { | 233 | for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { |
232 | struct hlist_node *node; | 234 | struct hlist_node *node; |
233 | 235 | ||
234 | sk_for_each(sk, node, &unix_socket_table[i]) | 236 | sk_for_each(sk, node, &unix_socket_table[i]) |