diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-11-26 00:17:14 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-11-26 00:17:14 -0500 |
commit | dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 (patch) | |
tree | e955c09e0b288e50c706b6ee409229d5a930c80c | |
parent | 1748376b6626acf59c24e9592ac67b3fe2a0e026 (diff) |
net: Use a percpu_counter for orphan_count
Instead of using one atomic_t per protocol, use a percpu_counter
for "orphan_count", to reduce cache line contention on
heavy duty network servers.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 2 | ||||
-rw-r--r-- | net/dccp/dccp.h | 2 | ||||
-rw-r--r-- | net/dccp/proto.c | 16 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
8 files changed, 24 insertions, 18 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index a2a3890959c4..5a3a151bd730 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -666,7 +666,7 @@ struct proto { | |||
666 | unsigned int obj_size; | 666 | unsigned int obj_size; |
667 | int slab_flags; | 667 | int slab_flags; |
668 | 668 | ||
669 | atomic_t *orphan_count; | 669 | struct percpu_counter *orphan_count; |
670 | 670 | ||
671 | struct request_sock_ops *rsk_prot; | 671 | struct request_sock_ops *rsk_prot; |
672 | struct timewait_sock_ops *twsk_prot; | 672 | struct timewait_sock_ops *twsk_prot; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index cbca3b8a133d..de1e91d959b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -46,7 +46,7 @@ | |||
46 | 46 | ||
47 | extern struct inet_hashinfo tcp_hashinfo; | 47 | extern struct inet_hashinfo tcp_hashinfo; |
48 | 48 | ||
49 | extern atomic_t tcp_orphan_count; | 49 | extern struct percpu_counter tcp_orphan_count; |
50 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 50 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
51 | 51 | ||
52 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 52 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 031ce350d3c1..33a1127270c1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -49,7 +49,7 @@ extern int dccp_debug; | |||
49 | 49 | ||
50 | extern struct inet_hashinfo dccp_hashinfo; | 50 | extern struct inet_hashinfo dccp_hashinfo; |
51 | 51 | ||
52 | extern atomic_t dccp_orphan_count; | 52 | extern struct percpu_counter dccp_orphan_count; |
53 | 53 | ||
54 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); | 54 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); |
55 | 55 | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ea85c423cdbd..db225f93cd5a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; | |||
40 | 40 | ||
41 | EXPORT_SYMBOL_GPL(dccp_statistics); | 41 | EXPORT_SYMBOL_GPL(dccp_statistics); |
42 | 42 | ||
43 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | 43 | struct percpu_counter dccp_orphan_count; |
44 | |||
45 | EXPORT_SYMBOL_GPL(dccp_orphan_count); | 44 | EXPORT_SYMBOL_GPL(dccp_orphan_count); |
46 | 45 | ||
47 | struct inet_hashinfo dccp_hashinfo; | 46 | struct inet_hashinfo dccp_hashinfo; |
@@ -1000,7 +999,7 @@ adjudge_to_death: | |||
1000 | state = sk->sk_state; | 999 | state = sk->sk_state; |
1001 | sock_hold(sk); | 1000 | sock_hold(sk); |
1002 | sock_orphan(sk); | 1001 | sock_orphan(sk); |
1003 | atomic_inc(sk->sk_prot->orphan_count); | 1002 | percpu_counter_inc(sk->sk_prot->orphan_count); |
1004 | 1003 | ||
1005 | /* | 1004 | /* |
1006 | * It is the last release_sock in its life. It will remove backlog. | 1005 | * It is the last release_sock in its life. It will remove backlog. |
@@ -1064,18 +1063,21 @@ static int __init dccp_init(void) | |||
1064 | { | 1063 | { |
1065 | unsigned long goal; | 1064 | unsigned long goal; |
1066 | int ehash_order, bhash_order, i; | 1065 | int ehash_order, bhash_order, i; |
1067 | int rc = -ENOBUFS; | 1066 | int rc; |
1068 | 1067 | ||
1069 | BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > | 1068 | BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > |
1070 | FIELD_SIZEOF(struct sk_buff, cb)); | 1069 | FIELD_SIZEOF(struct sk_buff, cb)); |
1071 | 1070 | rc = percpu_counter_init(&dccp_orphan_count, 0); | |
1071 | if (rc) | ||
1072 | goto out; | ||
1073 | rc = -ENOBUFS; | ||
1072 | inet_hashinfo_init(&dccp_hashinfo); | 1074 | inet_hashinfo_init(&dccp_hashinfo); |
1073 | dccp_hashinfo.bind_bucket_cachep = | 1075 | dccp_hashinfo.bind_bucket_cachep = |
1074 | kmem_cache_create("dccp_bind_bucket", | 1076 | kmem_cache_create("dccp_bind_bucket", |
1075 | sizeof(struct inet_bind_bucket), 0, | 1077 | sizeof(struct inet_bind_bucket), 0, |
1076 | SLAB_HWCACHE_ALIGN, NULL); | 1078 | SLAB_HWCACHE_ALIGN, NULL); |
1077 | if (!dccp_hashinfo.bind_bucket_cachep) | 1079 | if (!dccp_hashinfo.bind_bucket_cachep) |
1078 | goto out; | 1080 | goto out_free_percpu; |
1079 | 1081 | ||
1080 | /* | 1082 | /* |
1081 | * Size and allocate the main established and bind bucket | 1083 | * Size and allocate the main established and bind bucket |
@@ -1168,6 +1170,8 @@ out_free_dccp_ehash: | |||
1168 | out_free_bind_bucket_cachep: | 1170 | out_free_bind_bucket_cachep: |
1169 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | 1171 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); |
1170 | dccp_hashinfo.bind_bucket_cachep = NULL; | 1172 | dccp_hashinfo.bind_bucket_cachep = NULL; |
1173 | out_free_percpu: | ||
1174 | percpu_counter_destroy(&dccp_orphan_count); | ||
1171 | goto out; | 1175 | goto out; |
1172 | } | 1176 | } |
1173 | 1177 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05af807ca9b9..1ccdbba528be 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk) | |||
561 | 561 | ||
562 | sk_refcnt_debug_release(sk); | 562 | sk_refcnt_debug_release(sk); |
563 | 563 | ||
564 | atomic_dec(sk->sk_prot->orphan_count); | 564 | percpu_counter_dec(sk->sk_prot->orphan_count); |
565 | sock_put(sk); | 565 | sock_put(sk); |
566 | } | 566 | } |
567 | 567 | ||
@@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk) | |||
641 | 641 | ||
642 | sock_orphan(child); | 642 | sock_orphan(child); |
643 | 643 | ||
644 | atomic_inc(sk->sk_prot->orphan_count); | 644 | percpu_counter_inc(sk->sk_prot->orphan_count); |
645 | 645 | ||
646 | inet_csk_destroy_sock(child); | 646 | inet_csk_destroy_sock(child); |
647 | 647 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4944b47ad628..614958b7c276 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
54 | socket_seq_show(seq); | 54 | socket_seq_show(seq); |
55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
56 | sock_prot_inuse_get(net, &tcp_prot), | 56 | sock_prot_inuse_get(net, &tcp_prot), |
57 | atomic_read(&tcp_orphan_count), | 57 | (int)percpu_counter_sum_positive(&tcp_orphan_count), |
58 | tcp_death_row.tw_count, | 58 | tcp_death_row.tw_count, |
59 | (int)percpu_counter_sum_positive(&tcp_sockets_allocated), | 59 | (int)percpu_counter_sum_positive(&tcp_sockets_allocated), |
60 | atomic_read(&tcp_memory_allocated)); | 60 | atomic_read(&tcp_memory_allocated)); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6fade9ebf62..019243408623 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -277,8 +277,7 @@ | |||
277 | 277 | ||
278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | 278 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; |
279 | 279 | ||
280 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); | 280 | struct percpu_counter tcp_orphan_count; |
281 | |||
282 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 281 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
283 | 282 | ||
284 | int sysctl_tcp_mem[3] __read_mostly; | 283 | int sysctl_tcp_mem[3] __read_mostly; |
@@ -1837,7 +1836,7 @@ adjudge_to_death: | |||
1837 | state = sk->sk_state; | 1836 | state = sk->sk_state; |
1838 | sock_hold(sk); | 1837 | sock_hold(sk); |
1839 | sock_orphan(sk); | 1838 | sock_orphan(sk); |
1840 | atomic_inc(sk->sk_prot->orphan_count); | 1839 | percpu_counter_inc(sk->sk_prot->orphan_count); |
1841 | 1840 | ||
1842 | /* It is the last release_sock in its life. It will remove backlog. */ | 1841 | /* It is the last release_sock in its life. It will remove backlog. */ |
1843 | release_sock(sk); | 1842 | release_sock(sk); |
@@ -1888,9 +1887,11 @@ adjudge_to_death: | |||
1888 | } | 1887 | } |
1889 | } | 1888 | } |
1890 | if (sk->sk_state != TCP_CLOSE) { | 1889 | if (sk->sk_state != TCP_CLOSE) { |
1890 | int orphan_count = percpu_counter_read_positive( | ||
1891 | sk->sk_prot->orphan_count); | ||
1892 | |||
1891 | sk_mem_reclaim(sk); | 1893 | sk_mem_reclaim(sk); |
1892 | if (tcp_too_many_orphans(sk, | 1894 | if (tcp_too_many_orphans(sk, orphan_count)) { |
1893 | atomic_read(sk->sk_prot->orphan_count))) { | ||
1894 | if (net_ratelimit()) | 1895 | if (net_ratelimit()) |
1895 | printk(KERN_INFO "TCP: too many of orphaned " | 1896 | printk(KERN_INFO "TCP: too many of orphaned " |
1896 | "sockets\n"); | 1897 | "sockets\n"); |
@@ -2689,6 +2690,7 @@ void __init tcp_init(void) | |||
2689 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 2690 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
2690 | 2691 | ||
2691 | percpu_counter_init(&tcp_sockets_allocated, 0); | 2692 | percpu_counter_init(&tcp_sockets_allocated, 0); |
2693 | percpu_counter_init(&tcp_orphan_count, 0); | ||
2692 | tcp_hashinfo.bind_bucket_cachep = | 2694 | tcp_hashinfo.bind_bucket_cachep = |
2693 | kmem_cache_create("tcp_bind_bucket", | 2695 | kmem_cache_create("tcp_bind_bucket", |
2694 | sizeof(struct inet_bind_bucket), 0, | 2696 | sizeof(struct inet_bind_bucket), 0, |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3df339e3e363..cc4e6d27dedc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) | |||
65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) | 65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
66 | { | 66 | { |
67 | struct tcp_sock *tp = tcp_sk(sk); | 67 | struct tcp_sock *tp = tcp_sk(sk); |
68 | int orphans = atomic_read(&tcp_orphan_count); | 68 | int orphans = percpu_counter_read_positive(&tcp_orphan_count); |
69 | 69 | ||
70 | /* If peer does not open window for long time, or did not transmit | 70 | /* If peer does not open window for long time, or did not transmit |
71 | * anything for long time, penalize it. */ | 71 | * anything for long time, penalize it. */ |