aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-09 18:12:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-09 18:12:33 -0500
commitd48f782e4fb20dc7ec935ca0ca41ae31e4a69362 (patch)
tree482270b85d4ab9b1284e07e4cb439b4dc7af919f /net/ipv4/tcp_output.c
parent8586ca8a214471e4573d76356aabe890bfecdc8a (diff)
parent35cc3cefc4de90001c9137e2d01dd9d06b11acfb (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "A decent batch of fixes here. I'd say about half are for problems that have existed for a while, and half are for new regressions added in the 4.20 merge window. 1) Fix 10G SFP phy module detection in mvpp2, from Baruch Siach. 2) Revert bogus emac driver change, from Benjamin Herrenschmidt. 3) Handle BPF exported data structure with pointers when building 32-bit userland, from Daniel Borkmann. 4) Memory leak fix in act_police, from Davide Caratti. 5) Check RX checksum offload in RX descriptors properly in aquantia driver, from Dmitry Bogdanov. 6) SKB unlink fix in various spots, from Edward Cree. 7) ndo_dflt_fdb_dump() only works with ethernet, enforce this, from Eric Dumazet. 8) Fix FID leak in mlxsw driver, from Ido Schimmel. 9) IOTLB locking fix in vhost, from Jean-Philippe Brucker. 10) Fix SKB truesize accounting in ipv4/ipv6/netfilter frag memory limits otherwise namespace exit can hang. From Jiri Wiesner. 11) Address block parsing length fixes in x25 from Martin Schiller. 12) IRQ and ring accounting fixes in bnxt_en, from Michael Chan. 13) For tun interfaces, only iface delete works with rtnl ops, enforce this by disallowing add. From Nicolas Dichtel. 14) Use after free in liquidio, from Pan Bian. 15) Fix SKB use after passing to netif_receive_skb(), from Prashant Bhole. 16) Static key accounting and other fixes in XPS from Sabrina Dubroca. 17) Partially initialized flow key passed to ip6_route_output(), from Shmulik Ladkani. 18) Fix RTNL deadlock during reset in ibmvnic driver, from Thomas Falcon. 19) Several small TCP fixes (off-by-one on window probe abort, NULL deref in tail loss probe, SNMP mis-estimations) from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (93 commits) net/sched: cls_flower: Reject duplicated rules also under skip_sw bnxt_en: Fix _bnxt_get_max_rings() for 57500 chips. bnxt_en: Fix NQ/CP rings accounting on the new 57500 chips. bnxt_en: Keep track of reserved IRQs. bnxt_en: Fix CNP CoS queue regression. net/mlx4_core: Correctly set PFC param if global pause is turned off. Revert "net/ibm/emac: wrong bit is used for STA control" neighbour: Avoid writing before skb->head in neigh_hh_output() ipv6: Check available headroom in ip6_xmit() even without options tcp: lack of available data can also cause TSO defer ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output mlxsw: spectrum_switchdev: Fix VLAN device deletion via ioctl mlxsw: spectrum_router: Relax GRE decap matching check mlxsw: spectrum_switchdev: Avoid leaking FID's reference count mlxsw: spectrum_nve: Remove easily triggerable warnings ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes sctp: frag_point sanity check tcp: fix NULL ref in tail loss probe tcp: Do not underestimate rwnd_limited net: use skb_list_del_init() to remove from RX sublists ...
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c45
1 files changed, 32 insertions, 13 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f510cad0b3e..d1676d8a6ed7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
1904 * This algorithm is from John Heffner. 1904 * This algorithm is from John Heffner.
1905 */ 1905 */
1906static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, 1906static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1907 bool *is_cwnd_limited, u32 max_segs) 1907 bool *is_cwnd_limited,
1908 bool *is_rwnd_limited,
1909 u32 max_segs)
1908{ 1910{
1909 const struct inet_connection_sock *icsk = inet_csk(sk); 1911 const struct inet_connection_sock *icsk = inet_csk(sk);
1910 u32 age, send_win, cong_win, limit, in_flight; 1912 u32 age, send_win, cong_win, limit, in_flight;
@@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1912 struct sk_buff *head; 1914 struct sk_buff *head;
1913 int win_divisor; 1915 int win_divisor;
1914 1916
1915 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1916 goto send_now;
1917
1918 if (icsk->icsk_ca_state >= TCP_CA_Recovery) 1917 if (icsk->icsk_ca_state >= TCP_CA_Recovery)
1919 goto send_now; 1918 goto send_now;
1920 1919
@@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
1973 if (age < (tp->srtt_us >> 4)) 1972 if (age < (tp->srtt_us >> 4))
1974 goto send_now; 1973 goto send_now;
1975 1974
1976 /* Ok, it looks like it is advisable to defer. */ 1975 /* Ok, it looks like it is advisable to defer.
1976 * Three cases are tracked :
1977 * 1) We are cwnd-limited
1978 * 2) We are rwnd-limited
1979 * 3) We are application limited.
1980 */
1981 if (cong_win < send_win) {
1982 if (cong_win <= skb->len) {
1983 *is_cwnd_limited = true;
1984 return true;
1985 }
1986 } else {
1987 if (send_win <= skb->len) {
1988 *is_rwnd_limited = true;
1989 return true;
1990 }
1991 }
1977 1992
1978 if (cong_win < send_win && cong_win <= skb->len) 1993 /* If this packet won't get more data, do not wait. */
1979 *is_cwnd_limited = true; 1994 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1995 goto send_now;
1980 1996
1981 return true; 1997 return true;
1982 1998
@@ -2356,7 +2372,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2356 } else { 2372 } else {
2357 if (!push_one && 2373 if (!push_one &&
2358 tcp_tso_should_defer(sk, skb, &is_cwnd_limited, 2374 tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
2359 max_segs)) 2375 &is_rwnd_limited, max_segs))
2360 break; 2376 break;
2361 } 2377 }
2362 2378
@@ -2494,15 +2510,18 @@ void tcp_send_loss_probe(struct sock *sk)
2494 goto rearm_timer; 2510 goto rearm_timer;
2495 } 2511 }
2496 skb = skb_rb_last(&sk->tcp_rtx_queue); 2512 skb = skb_rb_last(&sk->tcp_rtx_queue);
2513 if (unlikely(!skb)) {
2514 WARN_ONCE(tp->packets_out,
2515 "invalid inflight: %u state %u cwnd %u mss %d\n",
2516 tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
2517 inet_csk(sk)->icsk_pending = 0;
2518 return;
2519 }
2497 2520
2498 /* At most one outstanding TLP retransmission. */ 2521 /* At most one outstanding TLP retransmission. */
2499 if (tp->tlp_high_seq) 2522 if (tp->tlp_high_seq)
2500 goto rearm_timer; 2523 goto rearm_timer;
2501 2524
2502 /* Retransmit last segment. */
2503 if (WARN_ON(!skb))
2504 goto rearm_timer;
2505
2506 if (skb_still_in_host_queue(sk, skb)) 2525 if (skb_still_in_host_queue(sk, skb))
2507 goto rearm_timer; 2526 goto rearm_timer;
2508 2527
@@ -2920,7 +2939,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2920 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2939 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2921 trace_tcp_retransmit_skb(sk, skb); 2940 trace_tcp_retransmit_skb(sk, skb);
2922 } else if (err != -EBUSY) { 2941 } else if (err != -EBUSY) {
2923 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2942 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
2924 } 2943 }
2925 return err; 2944 return err;
2926} 2945}