aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 21:39:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 21:39:49 -0400
commit1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch)
treedcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /net/ipv4/tcp_ipv4.c
parent285767604576148fc1be7fcd112e4a90eb0d6ad2 (diff)
parent7170e6045a6a8b33f4fa5753589dc77b16198e2d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c55
1 files changed, 49 insertions, 6 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c970626b398..fed3f1c66167 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -110,8 +110,38 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
110 110
111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{ 112{
113 const struct inet_timewait_sock *tw = inet_twsk(sktw);
113 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 struct tcp_sock *tp = tcp_sk(sk); 115 struct tcp_sock *tp = tcp_sk(sk);
116 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
117
118 if (reuse == 2) {
119 /* Still does not detect *everything* that goes through
120 * lo, since we require a loopback src or dst address
121 * or direct binding to 'lo' interface.
122 */
123 bool loopback = false;
124 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
125 loopback = true;
126#if IS_ENABLED(CONFIG_IPV6)
127 if (tw->tw_family == AF_INET6) {
128 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
129 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
130 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
131 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
132 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
133 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
134 loopback = true;
135 } else
136#endif
137 {
138 if (ipv4_is_loopback(tw->tw_daddr) ||
139 ipv4_is_loopback(tw->tw_rcv_saddr))
140 loopback = true;
141 }
142 if (!loopback)
143 reuse = 0;
144 }
115 145
116 /* With PAWS, it is safe from the viewpoint 146 /* With PAWS, it is safe from the viewpoint
117 of data integrity. Even without PAWS it is safe provided sequence 147 of data integrity. Even without PAWS it is safe provided sequence
@@ -125,8 +155,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
125 and use initial timestamp retrieved from peer table. 155 and use initial timestamp retrieved from peer table.
126 */ 156 */
127 if (tcptw->tw_ts_recent_stamp && 157 if (tcptw->tw_ts_recent_stamp &&
128 (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && 158 (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
129 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
130 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 159 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 if (tp->write_seq == 0) 160 if (tp->write_seq == 0)
132 tp->write_seq = 1; 161 tp->write_seq = 1;
@@ -621,6 +650,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
621 struct sock *sk1 = NULL; 650 struct sock *sk1 = NULL;
622#endif 651#endif
623 struct net *net; 652 struct net *net;
653 struct sock *ctl_sk;
624 654
625 /* Never send a reset in response to a reset. */ 655 /* Never send a reset in response to a reset. */
626 if (th->rst) 656 if (th->rst)
@@ -723,11 +753,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
723 arg.tos = ip_hdr(skb)->tos; 753 arg.tos = ip_hdr(skb)->tos;
724 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 754 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
725 local_bh_disable(); 755 local_bh_disable();
726 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 756 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
757 if (sk)
758 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
759 inet_twsk(sk)->tw_mark : sk->sk_mark;
760 ip_send_unicast_reply(ctl_sk,
727 skb, &TCP_SKB_CB(skb)->header.h4.opt, 761 skb, &TCP_SKB_CB(skb)->header.h4.opt,
728 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 762 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
729 &arg, arg.iov[0].iov_len); 763 &arg, arg.iov[0].iov_len);
730 764
765 ctl_sk->sk_mark = 0;
731 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 766 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
732 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 767 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
733 local_bh_enable(); 768 local_bh_enable();
@@ -759,6 +794,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
759 } rep; 794 } rep;
760 struct net *net = sock_net(sk); 795 struct net *net = sock_net(sk);
761 struct ip_reply_arg arg; 796 struct ip_reply_arg arg;
797 struct sock *ctl_sk;
762 798
763 memset(&rep.th, 0, sizeof(struct tcphdr)); 799 memset(&rep.th, 0, sizeof(struct tcphdr));
764 memset(&arg, 0, sizeof(arg)); 800 memset(&arg, 0, sizeof(arg));
@@ -809,11 +845,16 @@ static void tcp_v4_send_ack(const struct sock *sk,
809 arg.tos = tos; 845 arg.tos = tos;
810 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); 846 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
811 local_bh_disable(); 847 local_bh_disable();
812 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 848 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
849 if (sk)
850 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
851 inet_twsk(sk)->tw_mark : sk->sk_mark;
852 ip_send_unicast_reply(ctl_sk,
813 skb, &TCP_SKB_CB(skb)->header.h4.opt, 853 skb, &TCP_SKB_CB(skb)->header.h4.opt,
814 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 854 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
815 &arg, arg.iov[0].iov_len); 855 &arg, arg.iov[0].iov_len);
816 856
857 ctl_sk->sk_mark = 0;
817 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 858 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
818 local_bh_enable(); 859 local_bh_enable();
819} 860}
@@ -1474,7 +1515,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1474 sk->sk_rx_dst = NULL; 1515 sk->sk_rx_dst = NULL;
1475 } 1516 }
1476 } 1517 }
1477 tcp_rcv_established(sk, skb, tcp_hdr(skb)); 1518 tcp_rcv_established(sk, skb);
1478 return 0; 1519 return 0;
1479 } 1520 }
1480 1521
@@ -2481,7 +2522,7 @@ static int __net_init tcp_sk_init(struct net *net)
2481 net->ipv4.sysctl_tcp_orphan_retries = 0; 2522 net->ipv4.sysctl_tcp_orphan_retries = 0;
2482 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 2523 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2483 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; 2524 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2484 net->ipv4.sysctl_tcp_tw_reuse = 0; 2525 net->ipv4.sysctl_tcp_tw_reuse = 2;
2485 2526
2486 cnt = tcp_hashinfo.ehash_mask + 1; 2527 cnt = tcp_hashinfo.ehash_mask + 1;
2487 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; 2528 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
@@ -2524,6 +2565,8 @@ static int __net_init tcp_sk_init(struct net *net)
2524 init_net.ipv4.sysctl_tcp_wmem, 2565 init_net.ipv4.sysctl_tcp_wmem,
2525 sizeof(init_net.ipv4.sysctl_tcp_wmem)); 2566 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2526 } 2567 }
2568 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
2569 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
2527 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; 2570 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
2528 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); 2571 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
2529 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; 2572 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;