aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-11-26 00:17:14 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-26 00:17:14 -0500
commitdd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 (patch)
treee955c09e0b288e50c706b6ee409229d5a930c80c
parent1748376b6626acf59c24e9592ac67b3fe2a0e026 (diff)
net: Use a percpu_counter for orphan_count
Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/proto.c16
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_timer.c2
8 files changed, 24 insertions, 18 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index a2a3890959c4..5a3a151bd730 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -666,7 +666,7 @@ struct proto {
666 unsigned int obj_size; 666 unsigned int obj_size;
667 int slab_flags; 667 int slab_flags;
668 668
669 atomic_t *orphan_count; 669 struct percpu_counter *orphan_count;
670 670
671 struct request_sock_ops *rsk_prot; 671 struct request_sock_ops *rsk_prot;
672 struct timewait_sock_ops *twsk_prot; 672 struct timewait_sock_ops *twsk_prot;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cbca3b8a133d..de1e91d959b8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,7 +46,7 @@
46 46
47extern struct inet_hashinfo tcp_hashinfo; 47extern struct inet_hashinfo tcp_hashinfo;
48 48
49extern atomic_t tcp_orphan_count; 49extern struct percpu_counter tcp_orphan_count;
50extern void tcp_time_wait(struct sock *sk, int state, int timeo); 50extern void tcp_time_wait(struct sock *sk, int state, int timeo);
51 51
52#define MAX_TCP_HEADER (128 + MAX_HEADER) 52#define MAX_TCP_HEADER (128 + MAX_HEADER)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 031ce350d3c1..33a1127270c1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -49,7 +49,7 @@ extern int dccp_debug;
49 49
50extern struct inet_hashinfo dccp_hashinfo; 50extern struct inet_hashinfo dccp_hashinfo;
51 51
52extern atomic_t dccp_orphan_count; 52extern struct percpu_counter dccp_orphan_count;
53 53
54extern void dccp_time_wait(struct sock *sk, int state, int timeo); 54extern void dccp_time_wait(struct sock *sk, int state, int timeo);
55 55
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ea85c423cdbd..db225f93cd5a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40 40
41EXPORT_SYMBOL_GPL(dccp_statistics); 41EXPORT_SYMBOL_GPL(dccp_statistics);
42 42
43atomic_t dccp_orphan_count = ATOMIC_INIT(0); 43struct percpu_counter dccp_orphan_count;
44
45EXPORT_SYMBOL_GPL(dccp_orphan_count); 44EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 45
47struct inet_hashinfo dccp_hashinfo; 46struct inet_hashinfo dccp_hashinfo;
@@ -1000,7 +999,7 @@ adjudge_to_death:
1000 state = sk->sk_state; 999 state = sk->sk_state;
1001 sock_hold(sk); 1000 sock_hold(sk);
1002 sock_orphan(sk); 1001 sock_orphan(sk);
1003 atomic_inc(sk->sk_prot->orphan_count); 1002 percpu_counter_inc(sk->sk_prot->orphan_count);
1004 1003
1005 /* 1004 /*
1006 * It is the last release_sock in its life. It will remove backlog. 1005 * It is the last release_sock in its life. It will remove backlog.
@@ -1064,18 +1063,21 @@ static int __init dccp_init(void)
1064{ 1063{
1065 unsigned long goal; 1064 unsigned long goal;
1066 int ehash_order, bhash_order, i; 1065 int ehash_order, bhash_order, i;
1067 int rc = -ENOBUFS; 1066 int rc;
1068 1067
1069 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > 1068 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1070 FIELD_SIZEOF(struct sk_buff, cb)); 1069 FIELD_SIZEOF(struct sk_buff, cb));
1071 1070 rc = percpu_counter_init(&dccp_orphan_count, 0);
1071 if (rc)
1072 goto out;
1073 rc = -ENOBUFS;
1072 inet_hashinfo_init(&dccp_hashinfo); 1074 inet_hashinfo_init(&dccp_hashinfo);
1073 dccp_hashinfo.bind_bucket_cachep = 1075 dccp_hashinfo.bind_bucket_cachep =
1074 kmem_cache_create("dccp_bind_bucket", 1076 kmem_cache_create("dccp_bind_bucket",
1075 sizeof(struct inet_bind_bucket), 0, 1077 sizeof(struct inet_bind_bucket), 0,
1076 SLAB_HWCACHE_ALIGN, NULL); 1078 SLAB_HWCACHE_ALIGN, NULL);
1077 if (!dccp_hashinfo.bind_bucket_cachep) 1079 if (!dccp_hashinfo.bind_bucket_cachep)
1078 goto out; 1080 goto out_free_percpu;
1079 1081
1080 /* 1082 /*
1081 * Size and allocate the main established and bind bucket 1083 * Size and allocate the main established and bind bucket
@@ -1168,6 +1170,8 @@ out_free_dccp_ehash:
1168out_free_bind_bucket_cachep: 1170out_free_bind_bucket_cachep:
1169 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1171 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1170 dccp_hashinfo.bind_bucket_cachep = NULL; 1172 dccp_hashinfo.bind_bucket_cachep = NULL;
1173out_free_percpu:
1174 percpu_counter_destroy(&dccp_orphan_count);
1171 goto out; 1175 goto out;
1172} 1176}
1173 1177
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 05af807ca9b9..1ccdbba528be 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk)
561 561
562 sk_refcnt_debug_release(sk); 562 sk_refcnt_debug_release(sk);
563 563
564 atomic_dec(sk->sk_prot->orphan_count); 564 percpu_counter_dec(sk->sk_prot->orphan_count);
565 sock_put(sk); 565 sock_put(sk);
566} 566}
567 567
@@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk)
641 641
642 sock_orphan(child); 642 sock_orphan(child);
643 643
644 atomic_inc(sk->sk_prot->orphan_count); 644 percpu_counter_inc(sk->sk_prot->orphan_count);
645 645
646 inet_csk_destroy_sock(child); 646 inet_csk_destroy_sock(child);
647 647
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4944b47ad628..614958b7c276 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
54 socket_seq_show(seq); 54 socket_seq_show(seq);
55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
56 sock_prot_inuse_get(net, &tcp_prot), 56 sock_prot_inuse_get(net, &tcp_prot),
57 atomic_read(&tcp_orphan_count), 57 (int)percpu_counter_sum_positive(&tcp_orphan_count),
58 tcp_death_row.tw_count, 58 tcp_death_row.tw_count,
59 (int)percpu_counter_sum_positive(&tcp_sockets_allocated), 59 (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
60 atomic_read(&tcp_memory_allocated)); 60 atomic_read(&tcp_memory_allocated));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e6fade9ebf62..019243408623 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,8 +277,7 @@
277 277
278int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 278int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
279 279
280atomic_t tcp_orphan_count = ATOMIC_INIT(0); 280struct percpu_counter tcp_orphan_count;
281
282EXPORT_SYMBOL_GPL(tcp_orphan_count); 281EXPORT_SYMBOL_GPL(tcp_orphan_count);
283 282
284int sysctl_tcp_mem[3] __read_mostly; 283int sysctl_tcp_mem[3] __read_mostly;
@@ -1837,7 +1836,7 @@ adjudge_to_death:
1837 state = sk->sk_state; 1836 state = sk->sk_state;
1838 sock_hold(sk); 1837 sock_hold(sk);
1839 sock_orphan(sk); 1838 sock_orphan(sk);
1840 atomic_inc(sk->sk_prot->orphan_count); 1839 percpu_counter_inc(sk->sk_prot->orphan_count);
1841 1840
1842 /* It is the last release_sock in its life. It will remove backlog. */ 1841 /* It is the last release_sock in its life. It will remove backlog. */
1843 release_sock(sk); 1842 release_sock(sk);
@@ -1888,9 +1887,11 @@ adjudge_to_death:
1888 } 1887 }
1889 } 1888 }
1890 if (sk->sk_state != TCP_CLOSE) { 1889 if (sk->sk_state != TCP_CLOSE) {
1890 int orphan_count = percpu_counter_read_positive(
1891 sk->sk_prot->orphan_count);
1892
1891 sk_mem_reclaim(sk); 1893 sk_mem_reclaim(sk);
1892 if (tcp_too_many_orphans(sk, 1894 if (tcp_too_many_orphans(sk, orphan_count)) {
1893 atomic_read(sk->sk_prot->orphan_count))) {
1894 if (net_ratelimit()) 1895 if (net_ratelimit())
1895 printk(KERN_INFO "TCP: too many of orphaned " 1896 printk(KERN_INFO "TCP: too many of orphaned "
1896 "sockets\n"); 1897 "sockets\n");
@@ -2689,6 +2690,7 @@ void __init tcp_init(void)
2689 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 2690 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
2690 2691
2691 percpu_counter_init(&tcp_sockets_allocated, 0); 2692 percpu_counter_init(&tcp_sockets_allocated, 0);
2693 percpu_counter_init(&tcp_orphan_count, 0);
2692 tcp_hashinfo.bind_bucket_cachep = 2694 tcp_hashinfo.bind_bucket_cachep =
2693 kmem_cache_create("tcp_bind_bucket", 2695 kmem_cache_create("tcp_bind_bucket",
2694 sizeof(struct inet_bind_bucket), 0, 2696 sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3df339e3e363..cc4e6d27dedc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk)
65static int tcp_out_of_resources(struct sock *sk, int do_reset) 65static int tcp_out_of_resources(struct sock *sk, int do_reset)
66{ 66{
67 struct tcp_sock *tp = tcp_sk(sk); 67 struct tcp_sock *tp = tcp_sk(sk);
68 int orphans = atomic_read(&tcp_orphan_count); 68 int orphans = percpu_counter_read_positive(&tcp_orphan_count);
69 69
70 /* If peer does not open window for long time, or did not transmit 70 /* If peer does not open window for long time, or did not transmit
71 * anything for long time, penalize it. */ 71 * anything for long time, penalize it. */