aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-07-19 03:34:03 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-19 13:35:30 -0400
commitbe9f4a44e7d41cee50ddb5f038fc2391cbbb4046 (patch)
tree184e45a62fa0b4d15961427c0e8d5a496f0617a5 /net/ipv4/tcp_ipv4.c
parentaee06da6726d4981c51928c2d6d1e2cabeec7a10 (diff)
ipv4: tcp: remove per net tcp_sock
tcp_v4_send_reset() and tcp_v4_send_ack() use a single socket per network namespace. This leads to bad behavior on multiqueue NICS, because many cpus contend for the socket lock and once socket lock is acquired, extra false sharing on various socket fields slow down the operations. To better resist to attacks, we use a percpu socket. Each cpu can run without contention, using appropriate memory (local node) Additional features : 1) We also mirror the queue_mapping of the incoming skb, so that answers use the same queue if possible. 2) Setting SOCK_USE_WRITE_QUEUE socket flag speedup sock_wfree() 3) We now limit the number of in-flight RST/ACK [1] packets per cpu, instead of per namespace, and we honor the sysctl_wmem_default limit dynamically. (Prior to this patch, sysctl_wmem_default value was copied at boot time, so any further change would not affect tcp_sock limit) [1] These packets are only generated when no socket was matched for the incoming packet. Reported-by: Bill Sommerfeld <wsommerfeld@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c8
1 files changed, 3 insertions, 5 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d9caf5c07aae..d7d2fa50f07f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -688,7 +688,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
688 688
689 net = dev_net(skb_dst(skb)->dev); 689 net = dev_net(skb_dst(skb)->dev);
690 arg.tos = ip_hdr(skb)->tos; 690 arg.tos = ip_hdr(skb)->tos;
691 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 691 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
692 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 692 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
693 693
694 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 694 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -771,7 +771,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
771 if (oif) 771 if (oif)
772 arg.bound_dev_if = oif; 772 arg.bound_dev_if = oif;
773 arg.tos = tos; 773 arg.tos = tos;
774 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 774 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
775 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); 775 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
776 776
777 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 777 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -2624,13 +2624,11 @@ EXPORT_SYMBOL(tcp_prot);
2624 2624
2625static int __net_init tcp_sk_init(struct net *net) 2625static int __net_init tcp_sk_init(struct net *net)
2626{ 2626{
2627 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2627 return 0;
2628 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2629} 2628}
2630 2629
2631static void __net_exit tcp_sk_exit(struct net *net) 2630static void __net_exit tcp_sk_exit(struct net *net)
2632{ 2631{
2633 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2634} 2632}
2635 2633
2636static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2634static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)