aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c194
1 files changed, 117 insertions, 77 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3a4d9b34bed4..684f095d196e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
359 /* Check #1 */ 359 /* Check #1 */
360 if (tp->rcv_ssthresh < tp->window_clamp && 360 if (tp->rcv_ssthresh < tp->window_clamp &&
361 (int)tp->rcv_ssthresh < tcp_space(sk) && 361 (int)tp->rcv_ssthresh < tcp_space(sk) &&
362 !sk_under_memory_pressure(sk)) { 362 !tcp_under_memory_pressure(sk)) {
363 int incr; 363 int incr;
364 364
365 /* Check #2. Increase window, if skb with such overhead 365 /* Check #2. Increase window, if skb with such overhead
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk)
446 446
447 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 447 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
448 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 448 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
449 !sk_under_memory_pressure(sk) && 449 !tcp_under_memory_pressure(sk) &&
450 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { 450 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
451 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 451 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
452 sysctl_tcp_rmem[2]); 452 sysctl_tcp_rmem[2]);
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1130struct tcp_sacktag_state { 1130struct tcp_sacktag_state {
1131 int reord; 1131 int reord;
1132 int fack_count; 1132 int fack_count;
1133 long rtt_us; /* RTT measured by SACKing never-retransmitted data */ 1133 /* Timestamps for earliest and latest never-retransmitted segment
1134 * that was SACKed. RTO needs the earliest RTT to stay conservative,
1135 * but congestion control should still get an accurate delay signal.
1136 */
1137 struct skb_mstamp first_sackt;
1138 struct skb_mstamp last_sackt;
1134 int flag; 1139 int flag;
1135}; 1140};
1136 1141
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
1233 state->reord); 1238 state->reord);
1234 if (!after(end_seq, tp->high_seq)) 1239 if (!after(end_seq, tp->high_seq))
1235 state->flag |= FLAG_ORIG_SACK_ACKED; 1240 state->flag |= FLAG_ORIG_SACK_ACKED;
1236 /* Pick the earliest sequence sacked for RTT */ 1241 if (state->first_sackt.v64 == 0)
1237 if (state->rtt_us < 0) { 1242 state->first_sackt = *xmit_time;
1238 struct skb_mstamp now; 1243 state->last_sackt = *xmit_time;
1239
1240 skb_mstamp_get(&now);
1241 state->rtt_us = skb_mstamp_us_delta(&now,
1242 xmit_time);
1243 }
1244 } 1244 }
1245 1245
1246 if (sacked & TCPCB_LOST) { 1246 if (sacked & TCPCB_LOST) {
@@ -1316,16 +1316,12 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1316 * code can come after this skb later on it's better to keep 1316 * code can come after this skb later on it's better to keep
1317 * setting gso_size to something. 1317 * setting gso_size to something.
1318 */ 1318 */
1319 if (!skb_shinfo(prev)->gso_size) { 1319 if (!TCP_SKB_CB(prev)->tcp_gso_size)
1320 skb_shinfo(prev)->gso_size = mss; 1320 TCP_SKB_CB(prev)->tcp_gso_size = mss;
1321 skb_shinfo(prev)->gso_type = sk->sk_gso_type;
1322 }
1323 1321
1324 /* CHECKME: To clear or not to clear? Mimics normal skb currently */ 1322 /* CHECKME: To clear or not to clear? Mimics normal skb currently */
1325 if (tcp_skb_pcount(skb) <= 1) { 1323 if (tcp_skb_pcount(skb) <= 1)
1326 skb_shinfo(skb)->gso_size = 0; 1324 TCP_SKB_CB(skb)->tcp_gso_size = 0;
1327 skb_shinfo(skb)->gso_type = 0;
1328 }
1329 1325
1330 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1326 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1331 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1327 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1634,7 +1630,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1634 1630
1635static int 1631static int
1636tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1632tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1637 u32 prior_snd_una, long *sack_rtt_us) 1633 u32 prior_snd_una, struct tcp_sacktag_state *state)
1638{ 1634{
1639 struct tcp_sock *tp = tcp_sk(sk); 1635 struct tcp_sock *tp = tcp_sk(sk);
1640 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1636 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1638,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1642 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1638 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1643 struct tcp_sack_block sp[TCP_NUM_SACKS]; 1639 struct tcp_sack_block sp[TCP_NUM_SACKS];
1644 struct tcp_sack_block *cache; 1640 struct tcp_sack_block *cache;
1645 struct tcp_sacktag_state state;
1646 struct sk_buff *skb; 1641 struct sk_buff *skb;
1647 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); 1642 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1648 int used_sacks; 1643 int used_sacks;
@@ -1650,9 +1645,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1650 int i, j; 1645 int i, j;
1651 int first_sack_index; 1646 int first_sack_index;
1652 1647
1653 state.flag = 0; 1648 state->flag = 0;
1654 state.reord = tp->packets_out; 1649 state->reord = tp->packets_out;
1655 state.rtt_us = -1L;
1656 1650
1657 if (!tp->sacked_out) { 1651 if (!tp->sacked_out) {
1658 if (WARN_ON(tp->fackets_out)) 1652 if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1657,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1663 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, 1657 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1664 num_sacks, prior_snd_una); 1658 num_sacks, prior_snd_una);
1665 if (found_dup_sack) 1659 if (found_dup_sack)
1666 state.flag |= FLAG_DSACKING_ACK; 1660 state->flag |= FLAG_DSACKING_ACK;
1667 1661
1668 /* Eliminate too old ACKs, but take into 1662 /* Eliminate too old ACKs, but take into
1669 * account more or less fresh ones, they can 1663 * account more or less fresh ones, they can
@@ -1728,7 +1722,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1728 } 1722 }
1729 1723
1730 skb = tcp_write_queue_head(sk); 1724 skb = tcp_write_queue_head(sk);
1731 state.fack_count = 0; 1725 state->fack_count = 0;
1732 i = 0; 1726 i = 0;
1733 1727
1734 if (!tp->sacked_out) { 1728 if (!tp->sacked_out) {
@@ -1762,10 +1756,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1762 1756
1763 /* Head todo? */ 1757 /* Head todo? */
1764 if (before(start_seq, cache->start_seq)) { 1758 if (before(start_seq, cache->start_seq)) {
1765 skb = tcp_sacktag_skip(skb, sk, &state, 1759 skb = tcp_sacktag_skip(skb, sk, state,
1766 start_seq); 1760 start_seq);
1767 skb = tcp_sacktag_walk(skb, sk, next_dup, 1761 skb = tcp_sacktag_walk(skb, sk, next_dup,
1768 &state, 1762 state,
1769 start_seq, 1763 start_seq,
1770 cache->start_seq, 1764 cache->start_seq,
1771 dup_sack); 1765 dup_sack);
@@ -1776,7 +1770,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1776 goto advance_sp; 1770 goto advance_sp;
1777 1771
1778 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, 1772 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1779 &state, 1773 state,
1780 cache->end_seq); 1774 cache->end_seq);
1781 1775
1782 /* ...tail remains todo... */ 1776 /* ...tail remains todo... */
@@ -1785,12 +1779,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1785 skb = tcp_highest_sack(sk); 1779 skb = tcp_highest_sack(sk);
1786 if (!skb) 1780 if (!skb)
1787 break; 1781 break;
1788 state.fack_count = tp->fackets_out; 1782 state->fack_count = tp->fackets_out;
1789 cache++; 1783 cache++;
1790 goto walk; 1784 goto walk;
1791 } 1785 }
1792 1786
1793 skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); 1787 skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
1794 /* Check overlap against next cached too (past this one already) */ 1788 /* Check overlap against next cached too (past this one already) */
1795 cache++; 1789 cache++;
1796 continue; 1790 continue;
@@ -1800,12 +1794,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1800 skb = tcp_highest_sack(sk); 1794 skb = tcp_highest_sack(sk);
1801 if (!skb) 1795 if (!skb)
1802 break; 1796 break;
1803 state.fack_count = tp->fackets_out; 1797 state->fack_count = tp->fackets_out;
1804 } 1798 }
1805 skb = tcp_sacktag_skip(skb, sk, &state, start_seq); 1799 skb = tcp_sacktag_skip(skb, sk, state, start_seq);
1806 1800
1807walk: 1801walk:
1808 skb = tcp_sacktag_walk(skb, sk, next_dup, &state, 1802 skb = tcp_sacktag_walk(skb, sk, next_dup, state,
1809 start_seq, end_seq, dup_sack); 1803 start_seq, end_seq, dup_sack);
1810 1804
1811advance_sp: 1805advance_sp:
@@ -1820,14 +1814,12 @@ advance_sp:
1820 for (j = 0; j < used_sacks; j++) 1814 for (j = 0; j < used_sacks; j++)
1821 tp->recv_sack_cache[i++] = sp[j]; 1815 tp->recv_sack_cache[i++] = sp[j];
1822 1816
1823 tcp_mark_lost_retrans(sk); 1817 if ((state->reord < tp->fackets_out) &&
1824
1825 tcp_verify_left_out(tp);
1826
1827 if ((state.reord < tp->fackets_out) &&
1828 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1818 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1829 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1819 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1830 1820
1821 tcp_mark_lost_retrans(sk);
1822 tcp_verify_left_out(tp);
1831out: 1823out:
1832 1824
1833#if FASTRETRANS_DEBUG > 0 1825#if FASTRETRANS_DEBUG > 0
@@ -1836,8 +1828,7 @@ out:
1836 WARN_ON((int)tp->retrans_out < 0); 1828 WARN_ON((int)tp->retrans_out < 0);
1837 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1829 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1838#endif 1830#endif
1839 *sack_rtt_us = state.rtt_us; 1831 return state->flag;
1840 return state.flag;
1841} 1832}
1842 1833
1843/* Limits sacked_out so that sum with lost_out isn't ever larger than 1834/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -2257,7 +2248,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2257 (oldcnt >= packets)) 2248 (oldcnt >= packets))
2258 break; 2249 break;
2259 2250
2260 mss = skb_shinfo(skb)->gso_size; 2251 mss = tcp_skb_mss(skb);
2261 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, 2252 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
2262 mss, GFP_ATOMIC); 2253 mss, GFP_ATOMIC);
2263 if (err < 0) 2254 if (err < 0)
@@ -2557,6 +2548,7 @@ void tcp_enter_cwr(struct sock *sk)
2557 tcp_set_ca_state(sk, TCP_CA_CWR); 2548 tcp_set_ca_state(sk, TCP_CA_CWR);
2558 } 2549 }
2559} 2550}
2551EXPORT_SYMBOL(tcp_enter_cwr);
2560 2552
2561static void tcp_try_keep_open(struct sock *sk) 2553static void tcp_try_keep_open(struct sock *sk)
2562{ 2554{
@@ -2700,16 +2692,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2700 struct tcp_sock *tp = tcp_sk(sk); 2692 struct tcp_sock *tp = tcp_sk(sk);
2701 bool recovered = !before(tp->snd_una, tp->high_seq); 2693 bool recovered = !before(tp->snd_una, tp->high_seq);
2702 2694
2695 if ((flag & FLAG_SND_UNA_ADVANCED) &&
2696 tcp_try_undo_loss(sk, false))
2697 return;
2698
2703 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ 2699 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
2704 /* Step 3.b. A timeout is spurious if not all data are 2700 /* Step 3.b. A timeout is spurious if not all data are
2705 * lost, i.e., never-retransmitted data are (s)acked. 2701 * lost, i.e., never-retransmitted data are (s)acked.
2706 */ 2702 */
2707 if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) 2703 if ((flag & FLAG_ORIG_SACK_ACKED) &&
2704 tcp_try_undo_loss(sk, true))
2708 return; 2705 return;
2709 2706
2710 if (after(tp->snd_nxt, tp->high_seq) && 2707 if (after(tp->snd_nxt, tp->high_seq)) {
2711 (flag & FLAG_DATA_SACKED || is_dupack)) { 2708 if (flag & FLAG_DATA_SACKED || is_dupack)
2712 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ 2709 tp->frto = 0; /* Step 3.a. loss was real */
2713 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { 2710 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2714 tp->high_seq = tp->snd_nxt; 2711 tp->high_seq = tp->snd_nxt;
2715 __tcp_push_pending_frames(sk, tcp_current_mss(sk), 2712 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
@@ -2734,8 +2731,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2734 else if (flag & FLAG_SND_UNA_ADVANCED) 2731 else if (flag & FLAG_SND_UNA_ADVANCED)
2735 tcp_reset_reno_sack(tp); 2732 tcp_reset_reno_sack(tp);
2736 } 2733 }
2737 if (tcp_try_undo_loss(sk, false))
2738 return;
2739 tcp_xmit_retransmit_queue(sk); 2734 tcp_xmit_retransmit_queue(sk);
2740} 2735}
2741 2736
@@ -3054,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3054 * arrived at the other end. 3049 * arrived at the other end.
3055 */ 3050 */
3056static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3051static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3057 u32 prior_snd_una, long sack_rtt_us) 3052 u32 prior_snd_una,
3053 struct tcp_sacktag_state *sack)
3058{ 3054{
3059 const struct inet_connection_sock *icsk = inet_csk(sk); 3055 const struct inet_connection_sock *icsk = inet_csk(sk);
3060 struct skb_mstamp first_ackt, last_ackt, now; 3056 struct skb_mstamp first_ackt, last_ackt, now;
@@ -3062,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3062 u32 prior_sacked = tp->sacked_out; 3058 u32 prior_sacked = tp->sacked_out;
3063 u32 reord = tp->packets_out; 3059 u32 reord = tp->packets_out;
3064 bool fully_acked = true; 3060 bool fully_acked = true;
3065 long ca_seq_rtt_us = -1L; 3061 long sack_rtt_us = -1L;
3066 long seq_rtt_us = -1L; 3062 long seq_rtt_us = -1L;
3063 long ca_rtt_us = -1L;
3067 struct sk_buff *skb; 3064 struct sk_buff *skb;
3068 u32 pkts_acked = 0; 3065 u32 pkts_acked = 0;
3069 bool rtt_update; 3066 bool rtt_update;
@@ -3152,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3152 skb_mstamp_get(&now); 3149 skb_mstamp_get(&now);
3153 if (likely(first_ackt.v64)) { 3150 if (likely(first_ackt.v64)) {
3154 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); 3151 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3155 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); 3152 ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3153 }
3154 if (sack->first_sackt.v64) {
3155 sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
3156 ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
3156 } 3157 }
3157 3158
3158 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); 3159 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3159 3160
3160 if (flag & FLAG_ACKED) { 3161 if (flag & FLAG_ACKED) {
3161 const struct tcp_congestion_ops *ca_ops
3162 = inet_csk(sk)->icsk_ca_ops;
3163
3164 tcp_rearm_rto(sk); 3162 tcp_rearm_rto(sk);
3165 if (unlikely(icsk->icsk_mtup.probe_size && 3163 if (unlikely(icsk->icsk_mtup.probe_size &&
3166 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { 3164 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3183,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3183 3181
3184 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3182 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3185 3183
3186 if (ca_ops->pkts_acked) {
3187 long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
3188 ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
3189 }
3190
3191 } else if (skb && rtt_update && sack_rtt_us >= 0 && 3184 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3192 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { 3185 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3193 /* Do not re-arm RTO if the sack RTT is measured from data sent 3186 /* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3197,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3197 tcp_rearm_rto(sk); 3190 tcp_rearm_rto(sk);
3198 } 3191 }
3199 3192
3193 if (icsk->icsk_ca_ops->pkts_acked)
3194 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
3195
3200#if FASTRETRANS_DEBUG > 0 3196#if FASTRETRANS_DEBUG > 0
3201 WARN_ON((int)tp->sacked_out < 0); 3197 WARN_ON((int)tp->sacked_out < 0);
3202 WARN_ON((int)tp->lost_out < 0); 3198 WARN_ON((int)tp->lost_out < 0);
@@ -3237,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk)
3237 * This function is not for random using! 3233 * This function is not for random using!
3238 */ 3234 */
3239 } else { 3235 } else {
3240 unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 3236 unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
3241 3237
3242 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3238 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3243 when, TCP_RTO_MAX); 3239 when, TCP_RTO_MAX);
@@ -3280,6 +3276,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3280 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); 3276 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3281} 3277}
3282 3278
3279/* If we update tp->snd_una, also update tp->bytes_acked */
3280static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3281{
3282 u32 delta = ack - tp->snd_una;
3283
3284 u64_stats_update_begin(&tp->syncp);
3285 tp->bytes_acked += delta;
3286 u64_stats_update_end(&tp->syncp);
3287 tp->snd_una = ack;
3288}
3289
3290/* If we update tp->rcv_nxt, also update tp->bytes_received */
3291static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3292{
3293 u32 delta = seq - tp->rcv_nxt;
3294
3295 u64_stats_update_begin(&tp->syncp);
3296 tp->bytes_received += delta;
3297 u64_stats_update_end(&tp->syncp);
3298 tp->rcv_nxt = seq;
3299}
3300
3283/* Update our send window. 3301/* Update our send window.
3284 * 3302 *
3285 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 3303 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3315,7 +3333,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3315 } 3333 }
3316 } 3334 }
3317 3335
3318 tp->snd_una = ack; 3336 tcp_snd_una_update(tp, ack);
3319 3337
3320 return flag; 3338 return flag;
3321} 3339}
@@ -3443,6 +3461,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3443{ 3461{
3444 struct inet_connection_sock *icsk = inet_csk(sk); 3462 struct inet_connection_sock *icsk = inet_csk(sk);
3445 struct tcp_sock *tp = tcp_sk(sk); 3463 struct tcp_sock *tp = tcp_sk(sk);
3464 struct tcp_sacktag_state sack_state;
3446 u32 prior_snd_una = tp->snd_una; 3465 u32 prior_snd_una = tp->snd_una;
3447 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3466 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3448 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3467 u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3451,7 +3470,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3451 int prior_packets = tp->packets_out; 3470 int prior_packets = tp->packets_out;
3452 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3471 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3453 int acked = 0; /* Number of packets newly acked */ 3472 int acked = 0; /* Number of packets newly acked */
3454 long sack_rtt_us = -1L; 3473
3474 sack_state.first_sackt.v64 = 0;
3455 3475
3456 /* We very likely will need to access write queue head. */ 3476 /* We very likely will need to access write queue head. */
3457 prefetchw(sk->sk_write_queue.next); 3477 prefetchw(sk->sk_write_queue.next);
@@ -3497,7 +3517,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3497 * Note, we use the fact that SND.UNA>=SND.WL2. 3517 * Note, we use the fact that SND.UNA>=SND.WL2.
3498 */ 3518 */
3499 tcp_update_wl(tp, ack_seq); 3519 tcp_update_wl(tp, ack_seq);
3500 tp->snd_una = ack; 3520 tcp_snd_una_update(tp, ack);
3501 flag |= FLAG_WIN_UPDATE; 3521 flag |= FLAG_WIN_UPDATE;
3502 3522
3503 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); 3523 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
@@ -3515,7 +3535,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3515 3535
3516 if (TCP_SKB_CB(skb)->sacked) 3536 if (TCP_SKB_CB(skb)->sacked)
3517 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3537 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3518 &sack_rtt_us); 3538 &sack_state);
3519 3539
3520 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { 3540 if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
3521 flag |= FLAG_ECE; 3541 flag |= FLAG_ECE;
@@ -3540,7 +3560,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3540 /* See if we can take anything off of the retransmit queue. */ 3560 /* See if we can take anything off of the retransmit queue. */
3541 acked = tp->packets_out; 3561 acked = tp->packets_out;
3542 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, 3562 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3543 sack_rtt_us); 3563 &sack_state);
3544 acked -= tp->packets_out; 3564 acked -= tp->packets_out;
3545 3565
3546 /* Advance cwnd if state allows */ 3566 /* Advance cwnd if state allows */
@@ -3592,7 +3612,7 @@ old_ack:
3592 */ 3612 */
3593 if (TCP_SKB_CB(skb)->sacked) { 3613 if (TCP_SKB_CB(skb)->sacked) {
3594 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3614 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3595 &sack_rtt_us); 3615 &sack_state);
3596 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3616 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3597 is_dupack, flag); 3617 is_dupack, flag);
3598 } 3618 }
@@ -4236,7 +4256,7 @@ static void tcp_ofo_queue(struct sock *sk)
4236 4256
4237 tail = skb_peek_tail(&sk->sk_receive_queue); 4257 tail = skb_peek_tail(&sk->sk_receive_queue);
4238 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); 4258 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4239 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4259 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4240 if (!eaten) 4260 if (!eaten)
4241 __skb_queue_tail(&sk->sk_receive_queue, skb); 4261 __skb_queue_tail(&sk->sk_receive_queue, skb);
4242 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 4262 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4404,7 +4424,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4404 __skb_pull(skb, hdrlen); 4424 __skb_pull(skb, hdrlen);
4405 eaten = (tail && 4425 eaten = (tail &&
4406 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; 4426 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4407 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4427 tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
4408 if (!eaten) { 4428 if (!eaten) {
4409 __skb_queue_tail(&sk->sk_receive_queue, skb); 4429 __skb_queue_tail(&sk->sk_receive_queue, skb);
4410 skb_set_owner_r(skb, sk); 4430 skb_set_owner_r(skb, sk);
@@ -4491,13 +4511,15 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4491 4511
4492 if (eaten <= 0) { 4512 if (eaten <= 0) {
4493queue_and_out: 4513queue_and_out:
4494 if (eaten < 0 && 4514 if (eaten < 0) {
4495 tcp_try_rmem_schedule(sk, skb, skb->truesize)) 4515 if (skb_queue_len(&sk->sk_receive_queue) == 0)
4496 goto drop; 4516 sk_forced_mem_schedule(sk, skb->truesize);
4497 4517 else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4518 goto drop;
4519 }
4498 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); 4520 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4499 } 4521 }
4500 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4522 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
4501 if (skb->len) 4523 if (skb->len)
4502 tcp_event_data_recv(sk, skb); 4524 tcp_event_data_recv(sk, skb);
4503 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 4525 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4765,7 +4787,7 @@ static int tcp_prune_queue(struct sock *sk)
4765 4787
4766 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4788 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4767 tcp_clamp_window(sk); 4789 tcp_clamp_window(sk);
4768 else if (sk_under_memory_pressure(sk)) 4790 else if (tcp_under_memory_pressure(sk))
4769 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 4791 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4770 4792
4771 tcp_collapse_ofo_queue(sk); 4793 tcp_collapse_ofo_queue(sk);
@@ -4809,7 +4831,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
4809 return false; 4831 return false;
4810 4832
4811 /* If we are under global TCP memory pressure, do not expand. */ 4833 /* If we are under global TCP memory pressure, do not expand. */
4812 if (sk_under_memory_pressure(sk)) 4834 if (tcp_under_memory_pressure(sk))
4813 return false; 4835 return false;
4814 4836
4815 /* If we are under soft global TCP memory pressure, do not expand. */ 4837 /* If we are under soft global TCP memory pressure, do not expand. */
@@ -5245,7 +5267,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5245 tcp_rcv_rtt_measure_ts(sk, skb); 5267 tcp_rcv_rtt_measure_ts(sk, skb);
5246 5268
5247 __skb_pull(skb, tcp_header_len); 5269 __skb_pull(skb, tcp_header_len);
5248 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 5270 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
5249 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); 5271 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5250 eaten = 1; 5272 eaten = 1;
5251 } 5273 }
@@ -6044,6 +6066,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
6044 return want_cookie; 6066 return want_cookie;
6045} 6067}
6046 6068
6069static void tcp_reqsk_record_syn(const struct sock *sk,
6070 struct request_sock *req,
6071 const struct sk_buff *skb)
6072{
6073 if (tcp_sk(sk)->save_syn) {
6074 u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
6075 u32 *copy;
6076
6077 copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
6078 if (copy) {
6079 copy[0] = len;
6080 memcpy(&copy[1], skb_network_header(skb), len);
6081 req->saved_syn = copy;
6082 }
6083 }
6084}
6085
6047int tcp_conn_request(struct request_sock_ops *rsk_ops, 6086int tcp_conn_request(struct request_sock_ops *rsk_ops,
6048 const struct tcp_request_sock_ops *af_ops, 6087 const struct tcp_request_sock_ops *af_ops,
6049 struct sock *sk, struct sk_buff *skb) 6088 struct sock *sk, struct sk_buff *skb)
@@ -6176,6 +6215,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6176 tcp_rsk(req)->tfo_listener = false; 6215 tcp_rsk(req)->tfo_listener = false;
6177 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6216 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
6178 } 6217 }
6218 tcp_reqsk_record_syn(sk, req, skb);
6179 6219
6180 return 0; 6220 return 0;
6181 6221