aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c45
1 files changed, 28 insertions, 17 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eda4f4a233f3..b54d9d37b636 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -66,6 +66,7 @@
66#include <linux/mm.h> 66#include <linux/mm.h>
67#include <linux/module.h> 67#include <linux/module.h>
68#include <linux/sysctl.h> 68#include <linux/sysctl.h>
69#include <net/dst.h>
69#include <net/tcp.h> 70#include <net/tcp.h>
70#include <net/inet_common.h> 71#include <net/inet_common.h>
71#include <linux/ipsec.h> 72#include <linux/ipsec.h>
@@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly;
113#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 114#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
114#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) 115#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
115 116
116#define IsSackFrto() (sysctl_tcp_frto == 0x2)
117
118#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 117#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
119#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 118#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
120 119
@@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk)
605 u32 rto_min = TCP_RTO_MIN; 604 u32 rto_min = TCP_RTO_MIN;
606 605
607 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) 606 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
608 rto_min = dst->metrics[RTAX_RTO_MIN - 1]; 607 rto_min = dst_metric(dst, RTAX_RTO_MIN);
609 return rto_min; 608 return rto_min;
610} 609}
611 610
@@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk)
769 dst->metrics[RTAX_RTTVAR - 1] = m; 768 dst->metrics[RTAX_RTTVAR - 1] = m;
770 else 769 else
771 dst->metrics[RTAX_RTTVAR-1] -= 770 dst->metrics[RTAX_RTTVAR-1] -=
772 (dst->metrics[RTAX_RTTVAR-1] - m)>>2; 771 (dst_metric(dst, RTAX_RTTVAR) - m)>>2;
773 } 772 }
774 773
775 if (tp->snd_ssthresh >= 0xFFFF) { 774 if (tp->snd_ssthresh >= 0xFFFF) {
@@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk)
788 dst->metrics[RTAX_SSTHRESH-1] = 787 dst->metrics[RTAX_SSTHRESH-1] =
789 max(tp->snd_cwnd >> 1, tp->snd_ssthresh); 788 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
790 if (!dst_metric_locked(dst, RTAX_CWND)) 789 if (!dst_metric_locked(dst, RTAX_CWND))
791 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; 790 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
792 } else { 791 } else {
793 /* Else slow start did not finish, cwnd is non-sense, 792 /* Else slow start did not finish, cwnd is non-sense,
794 ssthresh may be also invalid. 793 ssthresh may be also invalid.
795 */ 794 */
796 if (!dst_metric_locked(dst, RTAX_CWND)) 795 if (!dst_metric_locked(dst, RTAX_CWND))
797 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; 796 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
798 if (dst->metrics[RTAX_SSTHRESH-1] && 797 if (dst_metric(dst, RTAX_SSTHRESH) &&
799 !dst_metric_locked(dst, RTAX_SSTHRESH) && 798 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
800 tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) 799 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
801 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; 800 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
802 } 801 }
803 802
804 if (!dst_metric_locked(dst, RTAX_REORDERING)) { 803 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
805 if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && 804 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
806 tp->reordering != sysctl_tcp_reordering) 805 tp->reordering != sysctl_tcp_reordering)
807 dst->metrics[RTAX_REORDERING-1] = tp->reordering; 806 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
808 } 807 }
@@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1685 tp->sacked_out = 0; 1684 tp->sacked_out = 0;
1686} 1685}
1687 1686
1687static int tcp_is_sackfrto(const struct tcp_sock *tp)
1688{
1689 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1690}
1691
1688/* F-RTO can only be used if TCP has never retransmitted anything other than 1692/* F-RTO can only be used if TCP has never retransmitted anything other than
1689 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) 1693 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1690 */ 1694 */
@@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk)
1701 if (icsk->icsk_mtup.probe_size) 1705 if (icsk->icsk_mtup.probe_size)
1702 return 0; 1706 return 0;
1703 1707
1704 if (IsSackFrto()) 1708 if (tcp_is_sackfrto(tp))
1705 return 1; 1709 return 1;
1706 1710
1707 /* Avoid expensive walking of rexmit queue if possible */ 1711 /* Avoid expensive walking of rexmit queue if possible */
@@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk)
1791 /* Earlier loss recovery underway (see RFC4138; Appendix B). 1795 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1792 * The last condition is necessary at least in tp->frto_counter case. 1796 * The last condition is necessary at least in tp->frto_counter case.
1793 */ 1797 */
1794 if (IsSackFrto() && (tp->frto_counter || 1798 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1795 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && 1799 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1796 after(tp->high_seq, tp->snd_una)) { 1800 after(tp->high_seq, tp->snd_una)) {
1797 tp->frto_highmark = tp->high_seq; 1801 tp->frto_highmark = tp->high_seq;
@@ -1838,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1838 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1842 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1839 } 1843 }
1840 1844
1841 /* Don't lost mark skbs that were fwd transmitted after RTO */ 1845 /* Marking forward transmissions that were made after RTO lost
1842 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && 1846 * can cause unnecessary retransmissions in some scenarios,
1843 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { 1847 * SACK blocks will mitigate that in some but not in all cases.
1848 * We used to not mark them but it was causing break-ups with
1849 * receivers that do only in-order receival.
1850 *
1851 * TODO: we could detect presence of such receiver and select
1852 * different behavior per flow.
1853 */
1854 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1844 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1855 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1845 tp->lost_out += tcp_skb_pcount(skb); 1856 tp->lost_out += tcp_skb_pcount(skb);
1846 } 1857 }
@@ -1856,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1856 tp->reordering = min_t(unsigned int, tp->reordering, 1867 tp->reordering = min_t(unsigned int, tp->reordering,
1857 sysctl_tcp_reordering); 1868 sysctl_tcp_reordering);
1858 tcp_set_ca_state(sk, TCP_CA_Loss); 1869 tcp_set_ca_state(sk, TCP_CA_Loss);
1859 tp->high_seq = tp->frto_highmark; 1870 tp->high_seq = tp->snd_nxt;
1860 TCP_ECN_queue_cwr(tp); 1871 TCP_ECN_queue_cwr(tp);
1861 1872
1862 tcp_clear_retrans_hints_partial(tp); 1873 tcp_clear_retrans_hints_partial(tp);
@@ -2478,7 +2489,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2478 2489
2479 tcp_verify_left_out(tp); 2490 tcp_verify_left_out(tp);
2480 2491
2481 if (tp->retrans_out == 0) 2492 if (!tp->frto_counter && tp->retrans_out == 0)
2482 tp->retrans_stamp = 0; 2493 tp->retrans_stamp = 0;
2483 2494
2484 if (flag & FLAG_ECE) 2495 if (flag & FLAG_ECE)
@@ -3123,7 +3134,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
3123 return 1; 3134 return 1;
3124 } 3135 }
3125 3136
3126 if (!IsSackFrto() || tcp_is_reno(tp)) { 3137 if (!tcp_is_sackfrto(tp)) {
3127 /* RFC4138 shortcoming in step 2; should also have case c): 3138 /* RFC4138 shortcoming in step 2; should also have case c):
3128 * ACK isn't duplicate nor advances window, e.g., opposite dir 3139 * ACK isn't duplicate nor advances window, e.g., opposite dir
3129 * data, winupdate 3140 * data, winupdate