aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c157
1 files changed, 109 insertions, 48 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19c449f62672..bbb7d88a16b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1367,7 +1367,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1367 * a normal way 1367 * a normal way
1368 */ 1368 */
1369static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, 1369static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1370 u32 skip_to_seq) 1370 u32 skip_to_seq, int *fack_count)
1371{ 1371{
1372 tcp_for_write_queue_from(skb, sk) { 1372 tcp_for_write_queue_from(skb, sk) {
1373 if (skb == tcp_send_head(sk)) 1373 if (skb == tcp_send_head(sk))
@@ -1375,6 +1375,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1375 1375
1376 if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) 1376 if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1377 break; 1377 break;
1378
1379 *fack_count += tcp_skb_pcount(skb);
1378 } 1380 }
1379 return skb; 1381 return skb;
1380} 1382}
@@ -1390,7 +1392,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1390 return skb; 1392 return skb;
1391 1393
1392 if (before(next_dup->start_seq, skip_to_seq)) { 1394 if (before(next_dup->start_seq, skip_to_seq)) {
1393 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq); 1395 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1394 tcp_sacktag_walk(skb, sk, NULL, 1396 tcp_sacktag_walk(skb, sk, NULL,
1395 next_dup->start_seq, next_dup->end_seq, 1397 next_dup->start_seq, next_dup->end_seq,
1396 1, fack_count, reord, flag); 1398 1, fack_count, reord, flag);
@@ -1537,7 +1539,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1537 1539
1538 /* Head todo? */ 1540 /* Head todo? */
1539 if (before(start_seq, cache->start_seq)) { 1541 if (before(start_seq, cache->start_seq)) {
1540 skb = tcp_sacktag_skip(skb, sk, start_seq); 1542 skb = tcp_sacktag_skip(skb, sk, start_seq,
1543 &fack_count);
1541 skb = tcp_sacktag_walk(skb, sk, next_dup, 1544 skb = tcp_sacktag_walk(skb, sk, next_dup,
1542 start_seq, 1545 start_seq,
1543 cache->start_seq, 1546 cache->start_seq,
@@ -1565,7 +1568,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1565 goto walk; 1568 goto walk;
1566 } 1569 }
1567 1570
1568 skb = tcp_sacktag_skip(skb, sk, cache->end_seq); 1571 skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
1572 &fack_count);
1569 /* Check overlap against next cached too (past this one already) */ 1573 /* Check overlap against next cached too (past this one already) */
1570 cache++; 1574 cache++;
1571 continue; 1575 continue;
@@ -1577,7 +1581,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1577 break; 1581 break;
1578 fack_count = tp->fackets_out; 1582 fack_count = tp->fackets_out;
1579 } 1583 }
1580 skb = tcp_sacktag_skip(skb, sk, start_seq); 1584 skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
1581 1585
1582walk: 1586walk:
1583 skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, 1587 skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
@@ -1621,13 +1625,11 @@ out:
1621 return flag; 1625 return flag;
1622} 1626}
1623 1627
1624/* If we receive more dupacks than we expected counting segments 1628/* Limits sacked_out so that sum with lost_out isn't ever larger than
1625 * in assumption of absent reordering, interpret this as reordering. 1629 * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
1626 * The only another reason could be bug in receiver TCP.
1627 */ 1630 */
1628static void tcp_check_reno_reordering(struct sock *sk, const int addend) 1631int tcp_limit_reno_sacked(struct tcp_sock *tp)
1629{ 1632{
1630 struct tcp_sock *tp = tcp_sk(sk);
1631 u32 holes; 1633 u32 holes;
1632 1634
1633 holes = max(tp->lost_out, 1U); 1635 holes = max(tp->lost_out, 1U);
@@ -1635,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1635 1637
1636 if ((tp->sacked_out + holes) > tp->packets_out) { 1638 if ((tp->sacked_out + holes) > tp->packets_out) {
1637 tp->sacked_out = tp->packets_out - holes; 1639 tp->sacked_out = tp->packets_out - holes;
1638 tcp_update_reordering(sk, tp->packets_out + addend, 0); 1640 return 1;
1639 } 1641 }
1642 return 0;
1643}
1644
1645/* If we receive more dupacks than we expected counting segments
1646 * in assumption of absent reordering, interpret this as reordering.
1647 * The only another reason could be bug in receiver TCP.
1648 */
1649static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1650{
1651 struct tcp_sock *tp = tcp_sk(sk);
1652 if (tcp_limit_reno_sacked(tp))
1653 tcp_update_reordering(sk, tp->packets_out + addend, 0);
1640} 1654}
1641 1655
1642/* Emulate SACKs for SACKless connection: account for a new dupack. */ 1656/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1677,11 +1691,16 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1677int tcp_use_frto(struct sock *sk) 1691int tcp_use_frto(struct sock *sk)
1678{ 1692{
1679 const struct tcp_sock *tp = tcp_sk(sk); 1693 const struct tcp_sock *tp = tcp_sk(sk);
1694 const struct inet_connection_sock *icsk = inet_csk(sk);
1680 struct sk_buff *skb; 1695 struct sk_buff *skb;
1681 1696
1682 if (!sysctl_tcp_frto) 1697 if (!sysctl_tcp_frto)
1683 return 0; 1698 return 0;
1684 1699
1700 /* MTU probe and F-RTO won't really play nicely along currently */
1701 if (icsk->icsk_mtup.probe_size)
1702 return 0;
1703
1685 if (IsSackFrto()) 1704 if (IsSackFrto())
1686 return 1; 1705 return 1;
1687 1706
@@ -2130,11 +2149,13 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
2130/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2149/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2131 * is against sacked "cnt", otherwise it's against facked "cnt" 2150 * is against sacked "cnt", otherwise it's against facked "cnt"
2132 */ 2151 */
2133static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) 2152static void tcp_mark_head_lost(struct sock *sk, int packets)
2134{ 2153{
2135 struct tcp_sock *tp = tcp_sk(sk); 2154 struct tcp_sock *tp = tcp_sk(sk);
2136 struct sk_buff *skb; 2155 struct sk_buff *skb;
2137 int cnt; 2156 int cnt, oldcnt;
2157 int err;
2158 unsigned int mss;
2138 2159
2139 BUG_TRAP(packets <= tp->packets_out); 2160 BUG_TRAP(packets <= tp->packets_out);
2140 if (tp->lost_skb_hint) { 2161 if (tp->lost_skb_hint) {
@@ -2153,13 +2174,25 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
2153 tp->lost_skb_hint = skb; 2174 tp->lost_skb_hint = skb;
2154 tp->lost_cnt_hint = cnt; 2175 tp->lost_cnt_hint = cnt;
2155 2176
2177 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2178 break;
2179
2180 oldcnt = cnt;
2156 if (tcp_is_fack(tp) || tcp_is_reno(tp) || 2181 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2157 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2182 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2158 cnt += tcp_skb_pcount(skb); 2183 cnt += tcp_skb_pcount(skb);
2159 2184
2160 if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) || 2185 if (cnt > packets) {
2161 after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2186 if (tcp_is_sack(tp) || (oldcnt >= packets))
2162 break; 2187 break;
2188
2189 mss = skb_shinfo(skb)->gso_size;
2190 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2191 if (err < 0)
2192 break;
2193 cnt = packets;
2194 }
2195
2163 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2196 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2164 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 2197 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
2165 tp->lost_out += tcp_skb_pcount(skb); 2198 tp->lost_out += tcp_skb_pcount(skb);
@@ -2176,17 +2209,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2176 struct tcp_sock *tp = tcp_sk(sk); 2209 struct tcp_sock *tp = tcp_sk(sk);
2177 2210
2178 if (tcp_is_reno(tp)) { 2211 if (tcp_is_reno(tp)) {
2179 tcp_mark_head_lost(sk, 1, fast_rexmit); 2212 tcp_mark_head_lost(sk, 1);
2180 } else if (tcp_is_fack(tp)) { 2213 } else if (tcp_is_fack(tp)) {
2181 int lost = tp->fackets_out - tp->reordering; 2214 int lost = tp->fackets_out - tp->reordering;
2182 if (lost <= 0) 2215 if (lost <= 0)
2183 lost = 1; 2216 lost = 1;
2184 tcp_mark_head_lost(sk, lost, fast_rexmit); 2217 tcp_mark_head_lost(sk, lost);
2185 } else { 2218 } else {
2186 int sacked_upto = tp->sacked_out - tp->reordering; 2219 int sacked_upto = tp->sacked_out - tp->reordering;
2187 if (sacked_upto < 0) 2220 if (sacked_upto < fast_rexmit)
2188 sacked_upto = 0; 2221 sacked_upto = fast_rexmit;
2189 tcp_mark_head_lost(sk, sacked_upto, fast_rexmit); 2222 tcp_mark_head_lost(sk, sacked_upto);
2190 } 2223 }
2191 2224
2192 /* New heuristics: it is possible only after we switched 2225 /* New heuristics: it is possible only after we switched
@@ -2520,7 +2553,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2520 before(tp->snd_una, tp->high_seq) && 2553 before(tp->snd_una, tp->high_seq) &&
2521 icsk->icsk_ca_state != TCP_CA_Open && 2554 icsk->icsk_ca_state != TCP_CA_Open &&
2522 tp->fackets_out > tp->reordering) { 2555 tp->fackets_out > tp->reordering) {
2523 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); 2556 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2524 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2557 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
2525 } 2558 }
2526 2559
@@ -2582,6 +2615,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2582 case TCP_CA_Loss: 2615 case TCP_CA_Loss:
2583 if (flag & FLAG_DATA_ACKED) 2616 if (flag & FLAG_DATA_ACKED)
2584 icsk->icsk_retransmits = 0; 2617 icsk->icsk_retransmits = 0;
2618 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2619 tcp_reset_reno_sack(tp);
2585 if (!tcp_try_undo_loss(sk)) { 2620 if (!tcp_try_undo_loss(sk)) {
2586 tcp_moderate_cwnd(tp); 2621 tcp_moderate_cwnd(tp);
2587 tcp_xmit_retransmit_queue(sk); 2622 tcp_xmit_retransmit_queue(sk);
@@ -3806,8 +3841,28 @@ static void tcp_ofo_queue(struct sock *sk)
3806 } 3841 }
3807} 3842}
3808 3843
3844static int tcp_prune_ofo_queue(struct sock *sk);
3809static int tcp_prune_queue(struct sock *sk); 3845static int tcp_prune_queue(struct sock *sk);
3810 3846
3847static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
3848{
3849 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3850 !sk_rmem_schedule(sk, size)) {
3851
3852 if (tcp_prune_queue(sk) < 0)
3853 return -1;
3854
3855 if (!sk_rmem_schedule(sk, size)) {
3856 if (!tcp_prune_ofo_queue(sk))
3857 return -1;
3858
3859 if (!sk_rmem_schedule(sk, size))
3860 return -1;
3861 }
3862 }
3863 return 0;
3864}
3865
3811static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 3866static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3812{ 3867{
3813 struct tcphdr *th = tcp_hdr(skb); 3868 struct tcphdr *th = tcp_hdr(skb);
@@ -3857,12 +3912,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3857 if (eaten <= 0) { 3912 if (eaten <= 0) {
3858queue_and_out: 3913queue_and_out:
3859 if (eaten < 0 && 3914 if (eaten < 0 &&
3860 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3915 tcp_try_rmem_schedule(sk, skb->truesize))
3861 !sk_rmem_schedule(sk, skb->truesize))) { 3916 goto drop;
3862 if (tcp_prune_queue(sk) < 0 || 3917
3863 !sk_rmem_schedule(sk, skb->truesize))
3864 goto drop;
3865 }
3866 skb_set_owner_r(skb, sk); 3918 skb_set_owner_r(skb, sk);
3867 __skb_queue_tail(&sk->sk_receive_queue, skb); 3919 __skb_queue_tail(&sk->sk_receive_queue, skb);
3868 } 3920 }
@@ -3931,12 +3983,8 @@ drop:
3931 3983
3932 TCP_ECN_check_ce(tp, skb); 3984 TCP_ECN_check_ce(tp, skb);
3933 3985
3934 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3986 if (tcp_try_rmem_schedule(sk, skb->truesize))
3935 !sk_rmem_schedule(sk, skb->truesize)) { 3987 goto drop;
3936 if (tcp_prune_queue(sk) < 0 ||
3937 !sk_rmem_schedule(sk, skb->truesize))
3938 goto drop;
3939 }
3940 3988
3941 /* Disable header prediction. */ 3989 /* Disable header prediction. */
3942 tp->pred_flags = 0; 3990 tp->pred_flags = 0;
@@ -4163,6 +4211,32 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4163 } 4211 }
4164} 4212}
4165 4213
4214/*
4215 * Purge the out-of-order queue.
4216 * Return true if queue was pruned.
4217 */
4218static int tcp_prune_ofo_queue(struct sock *sk)
4219{
4220 struct tcp_sock *tp = tcp_sk(sk);
4221 int res = 0;
4222
4223 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4224 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
4225 __skb_queue_purge(&tp->out_of_order_queue);
4226
4227 /* Reset SACK state. A conforming SACK implementation will
4228 * do the same at a timeout based retransmit. When a connection
4229 * is in a sad state like this, we care only about integrity
4230 * of the connection not performance.
4231 */
4232 if (tp->rx_opt.sack_ok)
4233 tcp_sack_reset(&tp->rx_opt);
4234 sk_mem_reclaim(sk);
4235 res = 1;
4236 }
4237 return res;
4238}
4239
4166/* Reduce allocated memory if we can, trying to get 4240/* Reduce allocated memory if we can, trying to get
4167 * the socket within its memory limits again. 4241 * the socket within its memory limits again.
4168 * 4242 *
@@ -4196,20 +4270,7 @@ static int tcp_prune_queue(struct sock *sk)
4196 /* Collapsing did not help, destructive actions follow. 4270 /* Collapsing did not help, destructive actions follow.
4197 * This must not ever occur. */ 4271 * This must not ever occur. */
4198 4272
4199 /* First, purge the out_of_order queue. */ 4273 tcp_prune_ofo_queue(sk);
4200 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4201 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
4202 __skb_queue_purge(&tp->out_of_order_queue);
4203
4204 /* Reset SACK state. A conforming SACK implementation will
4205 * do the same at a timeout based retransmit. When a connection
4206 * is in a sad state like this, we care only about integrity
4207 * of the connection not performance.
4208 */
4209 if (tcp_is_sack(tp))
4210 tcp_sack_reset(&tp->rx_opt);
4211 sk_mem_reclaim(sk);
4212 }
4213 4274
4214 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4275 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4215 return 0; 4276 return 0;