diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 194 |
1 files changed, 117 insertions, 77 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a4d9b34bed4..684f095d196e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) | |||
359 | /* Check #1 */ | 359 | /* Check #1 */ |
360 | if (tp->rcv_ssthresh < tp->window_clamp && | 360 | if (tp->rcv_ssthresh < tp->window_clamp && |
361 | (int)tp->rcv_ssthresh < tcp_space(sk) && | 361 | (int)tp->rcv_ssthresh < tcp_space(sk) && |
362 | !sk_under_memory_pressure(sk)) { | 362 | !tcp_under_memory_pressure(sk)) { |
363 | int incr; | 363 | int incr; |
364 | 364 | ||
365 | /* Check #2. Increase window, if skb with such overhead | 365 | /* Check #2. Increase window, if skb with such overhead |
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk) | |||
446 | 446 | ||
447 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 447 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
448 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 448 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
449 | !sk_under_memory_pressure(sk) && | 449 | !tcp_under_memory_pressure(sk) && |
450 | sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { | 450 | sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { |
451 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 451 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
452 | sysctl_tcp_rmem[2]); | 452 | sysctl_tcp_rmem[2]); |
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, | |||
1130 | struct tcp_sacktag_state { | 1130 | struct tcp_sacktag_state { |
1131 | int reord; | 1131 | int reord; |
1132 | int fack_count; | 1132 | int fack_count; |
1133 | long rtt_us; /* RTT measured by SACKing never-retransmitted data */ | 1133 | /* Timestamps for earliest and latest never-retransmitted segment |
1134 | * that was SACKed. RTO needs the earliest RTT to stay conservative, | ||
1135 | * but congestion control should still get an accurate delay signal. | ||
1136 | */ | ||
1137 | struct skb_mstamp first_sackt; | ||
1138 | struct skb_mstamp last_sackt; | ||
1134 | int flag; | 1139 | int flag; |
1135 | }; | 1140 | }; |
1136 | 1141 | ||
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1233 | state->reord); | 1238 | state->reord); |
1234 | if (!after(end_seq, tp->high_seq)) | 1239 | if (!after(end_seq, tp->high_seq)) |
1235 | state->flag |= FLAG_ORIG_SACK_ACKED; | 1240 | state->flag |= FLAG_ORIG_SACK_ACKED; |
1236 | /* Pick the earliest sequence sacked for RTT */ | 1241 | if (state->first_sackt.v64 == 0) |
1237 | if (state->rtt_us < 0) { | 1242 | state->first_sackt = *xmit_time; |
1238 | struct skb_mstamp now; | 1243 | state->last_sackt = *xmit_time; |
1239 | |||
1240 | skb_mstamp_get(&now); | ||
1241 | state->rtt_us = skb_mstamp_us_delta(&now, | ||
1242 | xmit_time); | ||
1243 | } | ||
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | if (sacked & TCPCB_LOST) { | 1246 | if (sacked & TCPCB_LOST) { |
@@ -1316,16 +1316,12 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1316 | * code can come after this skb later on it's better to keep | 1316 | * code can come after this skb later on it's better to keep |
1317 | * setting gso_size to something. | 1317 | * setting gso_size to something. |
1318 | */ | 1318 | */ |
1319 | if (!skb_shinfo(prev)->gso_size) { | 1319 | if (!TCP_SKB_CB(prev)->tcp_gso_size) |
1320 | skb_shinfo(prev)->gso_size = mss; | 1320 | TCP_SKB_CB(prev)->tcp_gso_size = mss; |
1321 | skb_shinfo(prev)->gso_type = sk->sk_gso_type; | ||
1322 | } | ||
1323 | 1321 | ||
1324 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ | 1322 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ |
1325 | if (tcp_skb_pcount(skb) <= 1) { | 1323 | if (tcp_skb_pcount(skb) <= 1) |
1326 | skb_shinfo(skb)->gso_size = 0; | 1324 | TCP_SKB_CB(skb)->tcp_gso_size = 0; |
1327 | skb_shinfo(skb)->gso_type = 0; | ||
1328 | } | ||
1329 | 1325 | ||
1330 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1326 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
1331 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1327 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
@@ -1634,7 +1630,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl | |||
1634 | 1630 | ||
1635 | static int | 1631 | static int |
1636 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | 1632 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, |
1637 | u32 prior_snd_una, long *sack_rtt_us) | 1633 | u32 prior_snd_una, struct tcp_sacktag_state *state) |
1638 | { | 1634 | { |
1639 | struct tcp_sock *tp = tcp_sk(sk); | 1635 | struct tcp_sock *tp = tcp_sk(sk); |
1640 | const unsigned char *ptr = (skb_transport_header(ack_skb) + | 1636 | const unsigned char *ptr = (skb_transport_header(ack_skb) + |
@@ -1642,7 +1638,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1642 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); | 1638 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); |
1643 | struct tcp_sack_block sp[TCP_NUM_SACKS]; | 1639 | struct tcp_sack_block sp[TCP_NUM_SACKS]; |
1644 | struct tcp_sack_block *cache; | 1640 | struct tcp_sack_block *cache; |
1645 | struct tcp_sacktag_state state; | ||
1646 | struct sk_buff *skb; | 1641 | struct sk_buff *skb; |
1647 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); | 1642 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); |
1648 | int used_sacks; | 1643 | int used_sacks; |
@@ -1650,9 +1645,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1650 | int i, j; | 1645 | int i, j; |
1651 | int first_sack_index; | 1646 | int first_sack_index; |
1652 | 1647 | ||
1653 | state.flag = 0; | 1648 | state->flag = 0; |
1654 | state.reord = tp->packets_out; | 1649 | state->reord = tp->packets_out; |
1655 | state.rtt_us = -1L; | ||
1656 | 1650 | ||
1657 | if (!tp->sacked_out) { | 1651 | if (!tp->sacked_out) { |
1658 | if (WARN_ON(tp->fackets_out)) | 1652 | if (WARN_ON(tp->fackets_out)) |
@@ -1663,7 +1657,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1663 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, | 1657 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, |
1664 | num_sacks, prior_snd_una); | 1658 | num_sacks, prior_snd_una); |
1665 | if (found_dup_sack) | 1659 | if (found_dup_sack) |
1666 | state.flag |= FLAG_DSACKING_ACK; | 1660 | state->flag |= FLAG_DSACKING_ACK; |
1667 | 1661 | ||
1668 | /* Eliminate too old ACKs, but take into | 1662 | /* Eliminate too old ACKs, but take into |
1669 | * account more or less fresh ones, they can | 1663 | * account more or less fresh ones, they can |
@@ -1728,7 +1722,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1728 | } | 1722 | } |
1729 | 1723 | ||
1730 | skb = tcp_write_queue_head(sk); | 1724 | skb = tcp_write_queue_head(sk); |
1731 | state.fack_count = 0; | 1725 | state->fack_count = 0; |
1732 | i = 0; | 1726 | i = 0; |
1733 | 1727 | ||
1734 | if (!tp->sacked_out) { | 1728 | if (!tp->sacked_out) { |
@@ -1762,10 +1756,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1762 | 1756 | ||
1763 | /* Head todo? */ | 1757 | /* Head todo? */ |
1764 | if (before(start_seq, cache->start_seq)) { | 1758 | if (before(start_seq, cache->start_seq)) { |
1765 | skb = tcp_sacktag_skip(skb, sk, &state, | 1759 | skb = tcp_sacktag_skip(skb, sk, state, |
1766 | start_seq); | 1760 | start_seq); |
1767 | skb = tcp_sacktag_walk(skb, sk, next_dup, | 1761 | skb = tcp_sacktag_walk(skb, sk, next_dup, |
1768 | &state, | 1762 | state, |
1769 | start_seq, | 1763 | start_seq, |
1770 | cache->start_seq, | 1764 | cache->start_seq, |
1771 | dup_sack); | 1765 | dup_sack); |
@@ -1776,7 +1770,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1776 | goto advance_sp; | 1770 | goto advance_sp; |
1777 | 1771 | ||
1778 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, | 1772 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, |
1779 | &state, | 1773 | state, |
1780 | cache->end_seq); | 1774 | cache->end_seq); |
1781 | 1775 | ||
1782 | /* ...tail remains todo... */ | 1776 | /* ...tail remains todo... */ |
@@ -1785,12 +1779,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1785 | skb = tcp_highest_sack(sk); | 1779 | skb = tcp_highest_sack(sk); |
1786 | if (!skb) | 1780 | if (!skb) |
1787 | break; | 1781 | break; |
1788 | state.fack_count = tp->fackets_out; | 1782 | state->fack_count = tp->fackets_out; |
1789 | cache++; | 1783 | cache++; |
1790 | goto walk; | 1784 | goto walk; |
1791 | } | 1785 | } |
1792 | 1786 | ||
1793 | skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); | 1787 | skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq); |
1794 | /* Check overlap against next cached too (past this one already) */ | 1788 | /* Check overlap against next cached too (past this one already) */ |
1795 | cache++; | 1789 | cache++; |
1796 | continue; | 1790 | continue; |
@@ -1800,12 +1794,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1800 | skb = tcp_highest_sack(sk); | 1794 | skb = tcp_highest_sack(sk); |
1801 | if (!skb) | 1795 | if (!skb) |
1802 | break; | 1796 | break; |
1803 | state.fack_count = tp->fackets_out; | 1797 | state->fack_count = tp->fackets_out; |
1804 | } | 1798 | } |
1805 | skb = tcp_sacktag_skip(skb, sk, &state, start_seq); | 1799 | skb = tcp_sacktag_skip(skb, sk, state, start_seq); |
1806 | 1800 | ||
1807 | walk: | 1801 | walk: |
1808 | skb = tcp_sacktag_walk(skb, sk, next_dup, &state, | 1802 | skb = tcp_sacktag_walk(skb, sk, next_dup, state, |
1809 | start_seq, end_seq, dup_sack); | 1803 | start_seq, end_seq, dup_sack); |
1810 | 1804 | ||
1811 | advance_sp: | 1805 | advance_sp: |
@@ -1820,14 +1814,12 @@ advance_sp: | |||
1820 | for (j = 0; j < used_sacks; j++) | 1814 | for (j = 0; j < used_sacks; j++) |
1821 | tp->recv_sack_cache[i++] = sp[j]; | 1815 | tp->recv_sack_cache[i++] = sp[j]; |
1822 | 1816 | ||
1823 | tcp_mark_lost_retrans(sk); | 1817 | if ((state->reord < tp->fackets_out) && |
1824 | |||
1825 | tcp_verify_left_out(tp); | ||
1826 | |||
1827 | if ((state.reord < tp->fackets_out) && | ||
1828 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) | 1818 | ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) |
1829 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); | 1819 | tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); |
1830 | 1820 | ||
1821 | tcp_mark_lost_retrans(sk); | ||
1822 | tcp_verify_left_out(tp); | ||
1831 | out: | 1823 | out: |
1832 | 1824 | ||
1833 | #if FASTRETRANS_DEBUG > 0 | 1825 | #if FASTRETRANS_DEBUG > 0 |
@@ -1836,8 +1828,7 @@ out: | |||
1836 | WARN_ON((int)tp->retrans_out < 0); | 1828 | WARN_ON((int)tp->retrans_out < 0); |
1837 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); | 1829 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); |
1838 | #endif | 1830 | #endif |
1839 | *sack_rtt_us = state.rtt_us; | 1831 | return state->flag; |
1840 | return state.flag; | ||
1841 | } | 1832 | } |
1842 | 1833 | ||
1843 | /* Limits sacked_out so that sum with lost_out isn't ever larger than | 1834 | /* Limits sacked_out so that sum with lost_out isn't ever larger than |
@@ -2257,7 +2248,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2257 | (oldcnt >= packets)) | 2248 | (oldcnt >= packets)) |
2258 | break; | 2249 | break; |
2259 | 2250 | ||
2260 | mss = skb_shinfo(skb)->gso_size; | 2251 | mss = tcp_skb_mss(skb); |
2261 | err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, | 2252 | err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, |
2262 | mss, GFP_ATOMIC); | 2253 | mss, GFP_ATOMIC); |
2263 | if (err < 0) | 2254 | if (err < 0) |
@@ -2557,6 +2548,7 @@ void tcp_enter_cwr(struct sock *sk) | |||
2557 | tcp_set_ca_state(sk, TCP_CA_CWR); | 2548 | tcp_set_ca_state(sk, TCP_CA_CWR); |
2558 | } | 2549 | } |
2559 | } | 2550 | } |
2551 | EXPORT_SYMBOL(tcp_enter_cwr); | ||
2560 | 2552 | ||
2561 | static void tcp_try_keep_open(struct sock *sk) | 2553 | static void tcp_try_keep_open(struct sock *sk) |
2562 | { | 2554 | { |
@@ -2700,16 +2692,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) | |||
2700 | struct tcp_sock *tp = tcp_sk(sk); | 2692 | struct tcp_sock *tp = tcp_sk(sk); |
2701 | bool recovered = !before(tp->snd_una, tp->high_seq); | 2693 | bool recovered = !before(tp->snd_una, tp->high_seq); |
2702 | 2694 | ||
2695 | if ((flag & FLAG_SND_UNA_ADVANCED) && | ||
2696 | tcp_try_undo_loss(sk, false)) | ||
2697 | return; | ||
2698 | |||
2703 | if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ | 2699 | if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ |
2704 | /* Step 3.b. A timeout is spurious if not all data are | 2700 | /* Step 3.b. A timeout is spurious if not all data are |
2705 | * lost, i.e., never-retransmitted data are (s)acked. | 2701 | * lost, i.e., never-retransmitted data are (s)acked. |
2706 | */ | 2702 | */ |
2707 | if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) | 2703 | if ((flag & FLAG_ORIG_SACK_ACKED) && |
2704 | tcp_try_undo_loss(sk, true)) | ||
2708 | return; | 2705 | return; |
2709 | 2706 | ||
2710 | if (after(tp->snd_nxt, tp->high_seq) && | 2707 | if (after(tp->snd_nxt, tp->high_seq)) { |
2711 | (flag & FLAG_DATA_SACKED || is_dupack)) { | 2708 | if (flag & FLAG_DATA_SACKED || is_dupack) |
2712 | tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ | 2709 | tp->frto = 0; /* Step 3.a. loss was real */ |
2713 | } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { | 2710 | } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { |
2714 | tp->high_seq = tp->snd_nxt; | 2711 | tp->high_seq = tp->snd_nxt; |
2715 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), | 2712 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), |
@@ -2734,8 +2731,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) | |||
2734 | else if (flag & FLAG_SND_UNA_ADVANCED) | 2731 | else if (flag & FLAG_SND_UNA_ADVANCED) |
2735 | tcp_reset_reno_sack(tp); | 2732 | tcp_reset_reno_sack(tp); |
2736 | } | 2733 | } |
2737 | if (tcp_try_undo_loss(sk, false)) | ||
2738 | return; | ||
2739 | tcp_xmit_retransmit_queue(sk); | 2734 | tcp_xmit_retransmit_queue(sk); |
2740 | } | 2735 | } |
2741 | 2736 | ||
@@ -3054,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, | |||
3054 | * arrived at the other end. | 3049 | * arrived at the other end. |
3055 | */ | 3050 | */ |
3056 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | 3051 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, |
3057 | u32 prior_snd_una, long sack_rtt_us) | 3052 | u32 prior_snd_una, |
3053 | struct tcp_sacktag_state *sack) | ||
3058 | { | 3054 | { |
3059 | const struct inet_connection_sock *icsk = inet_csk(sk); | 3055 | const struct inet_connection_sock *icsk = inet_csk(sk); |
3060 | struct skb_mstamp first_ackt, last_ackt, now; | 3056 | struct skb_mstamp first_ackt, last_ackt, now; |
@@ -3062,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3062 | u32 prior_sacked = tp->sacked_out; | 3058 | u32 prior_sacked = tp->sacked_out; |
3063 | u32 reord = tp->packets_out; | 3059 | u32 reord = tp->packets_out; |
3064 | bool fully_acked = true; | 3060 | bool fully_acked = true; |
3065 | long ca_seq_rtt_us = -1L; | 3061 | long sack_rtt_us = -1L; |
3066 | long seq_rtt_us = -1L; | 3062 | long seq_rtt_us = -1L; |
3063 | long ca_rtt_us = -1L; | ||
3067 | struct sk_buff *skb; | 3064 | struct sk_buff *skb; |
3068 | u32 pkts_acked = 0; | 3065 | u32 pkts_acked = 0; |
3069 | bool rtt_update; | 3066 | bool rtt_update; |
@@ -3152,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3152 | skb_mstamp_get(&now); | 3149 | skb_mstamp_get(&now); |
3153 | if (likely(first_ackt.v64)) { | 3150 | if (likely(first_ackt.v64)) { |
3154 | seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); | 3151 | seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); |
3155 | ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); | 3152 | ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); |
3153 | } | ||
3154 | if (sack->first_sackt.v64) { | ||
3155 | sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); | ||
3156 | ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); | ||
3156 | } | 3157 | } |
3157 | 3158 | ||
3158 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); | 3159 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); |
3159 | 3160 | ||
3160 | if (flag & FLAG_ACKED) { | 3161 | if (flag & FLAG_ACKED) { |
3161 | const struct tcp_congestion_ops *ca_ops | ||
3162 | = inet_csk(sk)->icsk_ca_ops; | ||
3163 | |||
3164 | tcp_rearm_rto(sk); | 3162 | tcp_rearm_rto(sk); |
3165 | if (unlikely(icsk->icsk_mtup.probe_size && | 3163 | if (unlikely(icsk->icsk_mtup.probe_size && |
3166 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { | 3164 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { |
@@ -3183,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3183 | 3181 | ||
3184 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3182 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
3185 | 3183 | ||
3186 | if (ca_ops->pkts_acked) { | ||
3187 | long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); | ||
3188 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); | ||
3189 | } | ||
3190 | |||
3191 | } else if (skb && rtt_update && sack_rtt_us >= 0 && | 3184 | } else if (skb && rtt_update && sack_rtt_us >= 0 && |
3192 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { | 3185 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { |
3193 | /* Do not re-arm RTO if the sack RTT is measured from data sent | 3186 | /* Do not re-arm RTO if the sack RTT is measured from data sent |
@@ -3197,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3197 | tcp_rearm_rto(sk); | 3190 | tcp_rearm_rto(sk); |
3198 | } | 3191 | } |
3199 | 3192 | ||
3193 | if (icsk->icsk_ca_ops->pkts_acked) | ||
3194 | icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us); | ||
3195 | |||
3200 | #if FASTRETRANS_DEBUG > 0 | 3196 | #if FASTRETRANS_DEBUG > 0 |
3201 | WARN_ON((int)tp->sacked_out < 0); | 3197 | WARN_ON((int)tp->sacked_out < 0); |
3202 | WARN_ON((int)tp->lost_out < 0); | 3198 | WARN_ON((int)tp->lost_out < 0); |
@@ -3237,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk) | |||
3237 | * This function is not for random using! | 3233 | * This function is not for random using! |
3238 | */ | 3234 | */ |
3239 | } else { | 3235 | } else { |
3240 | unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); | 3236 | unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); |
3241 | 3237 | ||
3242 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3238 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
3243 | when, TCP_RTO_MAX); | 3239 | when, TCP_RTO_MAX); |
@@ -3280,6 +3276,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, | |||
3280 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); | 3276 | (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); |
3281 | } | 3277 | } |
3282 | 3278 | ||
3279 | /* If we update tp->snd_una, also update tp->bytes_acked */ | ||
3280 | static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) | ||
3281 | { | ||
3282 | u32 delta = ack - tp->snd_una; | ||
3283 | |||
3284 | u64_stats_update_begin(&tp->syncp); | ||
3285 | tp->bytes_acked += delta; | ||
3286 | u64_stats_update_end(&tp->syncp); | ||
3287 | tp->snd_una = ack; | ||
3288 | } | ||
3289 | |||
3290 | /* If we update tp->rcv_nxt, also update tp->bytes_received */ | ||
3291 | static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) | ||
3292 | { | ||
3293 | u32 delta = seq - tp->rcv_nxt; | ||
3294 | |||
3295 | u64_stats_update_begin(&tp->syncp); | ||
3296 | tp->bytes_received += delta; | ||
3297 | u64_stats_update_end(&tp->syncp); | ||
3298 | tp->rcv_nxt = seq; | ||
3299 | } | ||
3300 | |||
3283 | /* Update our send window. | 3301 | /* Update our send window. |
3284 | * | 3302 | * |
3285 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 | 3303 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 |
@@ -3315,7 +3333,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 | |||
3315 | } | 3333 | } |
3316 | } | 3334 | } |
3317 | 3335 | ||
3318 | tp->snd_una = ack; | 3336 | tcp_snd_una_update(tp, ack); |
3319 | 3337 | ||
3320 | return flag; | 3338 | return flag; |
3321 | } | 3339 | } |
@@ -3443,6 +3461,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3443 | { | 3461 | { |
3444 | struct inet_connection_sock *icsk = inet_csk(sk); | 3462 | struct inet_connection_sock *icsk = inet_csk(sk); |
3445 | struct tcp_sock *tp = tcp_sk(sk); | 3463 | struct tcp_sock *tp = tcp_sk(sk); |
3464 | struct tcp_sacktag_state sack_state; | ||
3446 | u32 prior_snd_una = tp->snd_una; | 3465 | u32 prior_snd_una = tp->snd_una; |
3447 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3466 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
3448 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3467 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
@@ -3451,7 +3470,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3451 | int prior_packets = tp->packets_out; | 3470 | int prior_packets = tp->packets_out; |
3452 | const int prior_unsacked = tp->packets_out - tp->sacked_out; | 3471 | const int prior_unsacked = tp->packets_out - tp->sacked_out; |
3453 | int acked = 0; /* Number of packets newly acked */ | 3472 | int acked = 0; /* Number of packets newly acked */ |
3454 | long sack_rtt_us = -1L; | 3473 | |
3474 | sack_state.first_sackt.v64 = 0; | ||
3455 | 3475 | ||
3456 | /* We very likely will need to access write queue head. */ | 3476 | /* We very likely will need to access write queue head. */ |
3457 | prefetchw(sk->sk_write_queue.next); | 3477 | prefetchw(sk->sk_write_queue.next); |
@@ -3497,7 +3517,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3497 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3517 | * Note, we use the fact that SND.UNA>=SND.WL2. |
3498 | */ | 3518 | */ |
3499 | tcp_update_wl(tp, ack_seq); | 3519 | tcp_update_wl(tp, ack_seq); |
3500 | tp->snd_una = ack; | 3520 | tcp_snd_una_update(tp, ack); |
3501 | flag |= FLAG_WIN_UPDATE; | 3521 | flag |= FLAG_WIN_UPDATE; |
3502 | 3522 | ||
3503 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); | 3523 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); |
@@ -3515,7 +3535,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3515 | 3535 | ||
3516 | if (TCP_SKB_CB(skb)->sacked) | 3536 | if (TCP_SKB_CB(skb)->sacked) |
3517 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, | 3537 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3518 | &sack_rtt_us); | 3538 | &sack_state); |
3519 | 3539 | ||
3520 | if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { | 3540 | if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { |
3521 | flag |= FLAG_ECE; | 3541 | flag |= FLAG_ECE; |
@@ -3540,7 +3560,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3540 | /* See if we can take anything off of the retransmit queue. */ | 3560 | /* See if we can take anything off of the retransmit queue. */ |
3541 | acked = tp->packets_out; | 3561 | acked = tp->packets_out; |
3542 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, | 3562 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, |
3543 | sack_rtt_us); | 3563 | &sack_state); |
3544 | acked -= tp->packets_out; | 3564 | acked -= tp->packets_out; |
3545 | 3565 | ||
3546 | /* Advance cwnd if state allows */ | 3566 | /* Advance cwnd if state allows */ |
@@ -3592,7 +3612,7 @@ old_ack: | |||
3592 | */ | 3612 | */ |
3593 | if (TCP_SKB_CB(skb)->sacked) { | 3613 | if (TCP_SKB_CB(skb)->sacked) { |
3594 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, | 3614 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3595 | &sack_rtt_us); | 3615 | &sack_state); |
3596 | tcp_fastretrans_alert(sk, acked, prior_unsacked, | 3616 | tcp_fastretrans_alert(sk, acked, prior_unsacked, |
3597 | is_dupack, flag); | 3617 | is_dupack, flag); |
3598 | } | 3618 | } |
@@ -4236,7 +4256,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4236 | 4256 | ||
4237 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4257 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4238 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | 4258 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4239 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4259 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4240 | if (!eaten) | 4260 | if (!eaten) |
4241 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4261 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4242 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4262 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4404,7 +4424,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4404 | __skb_pull(skb, hdrlen); | 4424 | __skb_pull(skb, hdrlen); |
4405 | eaten = (tail && | 4425 | eaten = (tail && |
4406 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | 4426 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; |
4407 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4427 | tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); |
4408 | if (!eaten) { | 4428 | if (!eaten) { |
4409 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4429 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
4410 | skb_set_owner_r(skb, sk); | 4430 | skb_set_owner_r(skb, sk); |
@@ -4491,13 +4511,15 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4491 | 4511 | ||
4492 | if (eaten <= 0) { | 4512 | if (eaten <= 0) { |
4493 | queue_and_out: | 4513 | queue_and_out: |
4494 | if (eaten < 0 && | 4514 | if (eaten < 0) { |
4495 | tcp_try_rmem_schedule(sk, skb, skb->truesize)) | 4515 | if (skb_queue_len(&sk->sk_receive_queue) == 0) |
4496 | goto drop; | 4516 | sk_forced_mem_schedule(sk, skb->truesize); |
4497 | 4517 | else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) | |
4518 | goto drop; | ||
4519 | } | ||
4498 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4520 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
4499 | } | 4521 | } |
4500 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4522 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
4501 | if (skb->len) | 4523 | if (skb->len) |
4502 | tcp_event_data_recv(sk, skb); | 4524 | tcp_event_data_recv(sk, skb); |
4503 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4525 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
@@ -4765,7 +4787,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
4765 | 4787 | ||
4766 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | 4788 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) |
4767 | tcp_clamp_window(sk); | 4789 | tcp_clamp_window(sk); |
4768 | else if (sk_under_memory_pressure(sk)) | 4790 | else if (tcp_under_memory_pressure(sk)) |
4769 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 4791 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
4770 | 4792 | ||
4771 | tcp_collapse_ofo_queue(sk); | 4793 | tcp_collapse_ofo_queue(sk); |
@@ -4809,7 +4831,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) | |||
4809 | return false; | 4831 | return false; |
4810 | 4832 | ||
4811 | /* If we are under global TCP memory pressure, do not expand. */ | 4833 | /* If we are under global TCP memory pressure, do not expand. */ |
4812 | if (sk_under_memory_pressure(sk)) | 4834 | if (tcp_under_memory_pressure(sk)) |
4813 | return false; | 4835 | return false; |
4814 | 4836 | ||
4815 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4837 | /* If we are under soft global TCP memory pressure, do not expand. */ |
@@ -5245,7 +5267,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5245 | tcp_rcv_rtt_measure_ts(sk, skb); | 5267 | tcp_rcv_rtt_measure_ts(sk, skb); |
5246 | 5268 | ||
5247 | __skb_pull(skb, tcp_header_len); | 5269 | __skb_pull(skb, tcp_header_len); |
5248 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 5270 | tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); |
5249 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); | 5271 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); |
5250 | eaten = 1; | 5272 | eaten = 1; |
5251 | } | 5273 | } |
@@ -6044,6 +6066,23 @@ static bool tcp_syn_flood_action(struct sock *sk, | |||
6044 | return want_cookie; | 6066 | return want_cookie; |
6045 | } | 6067 | } |
6046 | 6068 | ||
6069 | static void tcp_reqsk_record_syn(const struct sock *sk, | ||
6070 | struct request_sock *req, | ||
6071 | const struct sk_buff *skb) | ||
6072 | { | ||
6073 | if (tcp_sk(sk)->save_syn) { | ||
6074 | u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); | ||
6075 | u32 *copy; | ||
6076 | |||
6077 | copy = kmalloc(len + sizeof(u32), GFP_ATOMIC); | ||
6078 | if (copy) { | ||
6079 | copy[0] = len; | ||
6080 | memcpy(©[1], skb_network_header(skb), len); | ||
6081 | req->saved_syn = copy; | ||
6082 | } | ||
6083 | } | ||
6084 | } | ||
6085 | |||
6047 | int tcp_conn_request(struct request_sock_ops *rsk_ops, | 6086 | int tcp_conn_request(struct request_sock_ops *rsk_ops, |
6048 | const struct tcp_request_sock_ops *af_ops, | 6087 | const struct tcp_request_sock_ops *af_ops, |
6049 | struct sock *sk, struct sk_buff *skb) | 6088 | struct sock *sk, struct sk_buff *skb) |
@@ -6176,6 +6215,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
6176 | tcp_rsk(req)->tfo_listener = false; | 6215 | tcp_rsk(req)->tfo_listener = false; |
6177 | af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 6216 | af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
6178 | } | 6217 | } |
6218 | tcp_reqsk_record_syn(sk, req, skb); | ||
6179 | 6219 | ||
6180 | return 0; | 6220 | return 0; |
6181 | 6221 | ||