diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 157 |
1 files changed, 109 insertions, 48 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 19c449f62672..bbb7d88a16b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -1367,7 +1367,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
| 1367 | * a normal way | 1367 | * a normal way |
| 1368 | */ | 1368 | */ |
| 1369 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | 1369 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, |
| 1370 | u32 skip_to_seq) | 1370 | u32 skip_to_seq, int *fack_count) |
| 1371 | { | 1371 | { |
| 1372 | tcp_for_write_queue_from(skb, sk) { | 1372 | tcp_for_write_queue_from(skb, sk) { |
| 1373 | if (skb == tcp_send_head(sk)) | 1373 | if (skb == tcp_send_head(sk)) |
| @@ -1375,6 +1375,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | |||
| 1375 | 1375 | ||
| 1376 | if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) | 1376 | if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) |
| 1377 | break; | 1377 | break; |
| 1378 | |||
| 1379 | *fack_count += tcp_skb_pcount(skb); | ||
| 1378 | } | 1380 | } |
| 1379 | return skb; | 1381 | return skb; |
| 1380 | } | 1382 | } |
| @@ -1390,7 +1392,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, | |||
| 1390 | return skb; | 1392 | return skb; |
| 1391 | 1393 | ||
| 1392 | if (before(next_dup->start_seq, skip_to_seq)) { | 1394 | if (before(next_dup->start_seq, skip_to_seq)) { |
| 1393 | skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq); | 1395 | skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); |
| 1394 | tcp_sacktag_walk(skb, sk, NULL, | 1396 | tcp_sacktag_walk(skb, sk, NULL, |
| 1395 | next_dup->start_seq, next_dup->end_seq, | 1397 | next_dup->start_seq, next_dup->end_seq, |
| 1396 | 1, fack_count, reord, flag); | 1398 | 1, fack_count, reord, flag); |
| @@ -1537,7 +1539,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
| 1537 | 1539 | ||
| 1538 | /* Head todo? */ | 1540 | /* Head todo? */ |
| 1539 | if (before(start_seq, cache->start_seq)) { | 1541 | if (before(start_seq, cache->start_seq)) { |
| 1540 | skb = tcp_sacktag_skip(skb, sk, start_seq); | 1542 | skb = tcp_sacktag_skip(skb, sk, start_seq, |
| 1543 | &fack_count); | ||
| 1541 | skb = tcp_sacktag_walk(skb, sk, next_dup, | 1544 | skb = tcp_sacktag_walk(skb, sk, next_dup, |
| 1542 | start_seq, | 1545 | start_seq, |
| 1543 | cache->start_seq, | 1546 | cache->start_seq, |
| @@ -1565,7 +1568,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
| 1565 | goto walk; | 1568 | goto walk; |
| 1566 | } | 1569 | } |
| 1567 | 1570 | ||
| 1568 | skb = tcp_sacktag_skip(skb, sk, cache->end_seq); | 1571 | skb = tcp_sacktag_skip(skb, sk, cache->end_seq, |
| 1572 | &fack_count); | ||
| 1569 | /* Check overlap against next cached too (past this one already) */ | 1573 | /* Check overlap against next cached too (past this one already) */ |
| 1570 | cache++; | 1574 | cache++; |
| 1571 | continue; | 1575 | continue; |
| @@ -1577,7 +1581,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
| 1577 | break; | 1581 | break; |
| 1578 | fack_count = tp->fackets_out; | 1582 | fack_count = tp->fackets_out; |
| 1579 | } | 1583 | } |
| 1580 | skb = tcp_sacktag_skip(skb, sk, start_seq); | 1584 | skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count); |
| 1581 | 1585 | ||
| 1582 | walk: | 1586 | walk: |
| 1583 | skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, | 1587 | skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, |
| @@ -1621,13 +1625,11 @@ out: | |||
| 1621 | return flag; | 1625 | return flag; |
| 1622 | } | 1626 | } |
| 1623 | 1627 | ||
| 1624 | /* If we receive more dupacks than we expected counting segments | 1628 | /* Limits sacked_out so that sum with lost_out isn't ever larger than |
| 1625 | * in assumption of absent reordering, interpret this as reordering. | 1629 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. |
| 1626 | * The only another reason could be bug in receiver TCP. | ||
| 1627 | */ | 1630 | */ |
| 1628 | static void tcp_check_reno_reordering(struct sock *sk, const int addend) | 1631 | int tcp_limit_reno_sacked(struct tcp_sock *tp) |
| 1629 | { | 1632 | { |
| 1630 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1631 | u32 holes; | 1633 | u32 holes; |
| 1632 | 1634 | ||
| 1633 | holes = max(tp->lost_out, 1U); | 1635 | holes = max(tp->lost_out, 1U); |
| @@ -1635,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) | |||
| 1635 | 1637 | ||
| 1636 | if ((tp->sacked_out + holes) > tp->packets_out) { | 1638 | if ((tp->sacked_out + holes) > tp->packets_out) { |
| 1637 | tp->sacked_out = tp->packets_out - holes; | 1639 | tp->sacked_out = tp->packets_out - holes; |
| 1638 | tcp_update_reordering(sk, tp->packets_out + addend, 0); | 1640 | return 1; |
| 1639 | } | 1641 | } |
| 1642 | return 0; | ||
| 1643 | } | ||
| 1644 | |||
| 1645 | /* If we receive more dupacks than we expected counting segments | ||
| 1646 | * in assumption of absent reordering, interpret this as reordering. | ||
| 1647 | * The only another reason could be bug in receiver TCP. | ||
| 1648 | */ | ||
| 1649 | static void tcp_check_reno_reordering(struct sock *sk, const int addend) | ||
| 1650 | { | ||
| 1651 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 1652 | if (tcp_limit_reno_sacked(tp)) | ||
| 1653 | tcp_update_reordering(sk, tp->packets_out + addend, 0); | ||
| 1640 | } | 1654 | } |
| 1641 | 1655 | ||
| 1642 | /* Emulate SACKs for SACKless connection: account for a new dupack. */ | 1656 | /* Emulate SACKs for SACKless connection: account for a new dupack. */ |
| @@ -1677,11 +1691,16 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) | |||
| 1677 | int tcp_use_frto(struct sock *sk) | 1691 | int tcp_use_frto(struct sock *sk) |
| 1678 | { | 1692 | { |
| 1679 | const struct tcp_sock *tp = tcp_sk(sk); | 1693 | const struct tcp_sock *tp = tcp_sk(sk); |
| 1694 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 1680 | struct sk_buff *skb; | 1695 | struct sk_buff *skb; |
| 1681 | 1696 | ||
| 1682 | if (!sysctl_tcp_frto) | 1697 | if (!sysctl_tcp_frto) |
| 1683 | return 0; | 1698 | return 0; |
| 1684 | 1699 | ||
| 1700 | /* MTU probe and F-RTO won't really play nicely along currently */ | ||
| 1701 | if (icsk->icsk_mtup.probe_size) | ||
| 1702 | return 0; | ||
| 1703 | |||
| 1685 | if (IsSackFrto()) | 1704 | if (IsSackFrto()) |
| 1686 | return 1; | 1705 | return 1; |
| 1687 | 1706 | ||
| @@ -2130,11 +2149,13 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | |||
| 2130 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2149 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
| 2131 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2150 | * is against sacked "cnt", otherwise it's against facked "cnt" |
| 2132 | */ | 2151 | */ |
| 2133 | static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) | 2152 | static void tcp_mark_head_lost(struct sock *sk, int packets) |
| 2134 | { | 2153 | { |
| 2135 | struct tcp_sock *tp = tcp_sk(sk); | 2154 | struct tcp_sock *tp = tcp_sk(sk); |
| 2136 | struct sk_buff *skb; | 2155 | struct sk_buff *skb; |
| 2137 | int cnt; | 2156 | int cnt, oldcnt; |
| 2157 | int err; | ||
| 2158 | unsigned int mss; | ||
| 2138 | 2159 | ||
| 2139 | BUG_TRAP(packets <= tp->packets_out); | 2160 | BUG_TRAP(packets <= tp->packets_out); |
| 2140 | if (tp->lost_skb_hint) { | 2161 | if (tp->lost_skb_hint) { |
| @@ -2153,13 +2174,25 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) | |||
| 2153 | tp->lost_skb_hint = skb; | 2174 | tp->lost_skb_hint = skb; |
| 2154 | tp->lost_cnt_hint = cnt; | 2175 | tp->lost_cnt_hint = cnt; |
| 2155 | 2176 | ||
| 2177 | if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | ||
| 2178 | break; | ||
| 2179 | |||
| 2180 | oldcnt = cnt; | ||
| 2156 | if (tcp_is_fack(tp) || tcp_is_reno(tp) || | 2181 | if (tcp_is_fack(tp) || tcp_is_reno(tp) || |
| 2157 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 2182 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
| 2158 | cnt += tcp_skb_pcount(skb); | 2183 | cnt += tcp_skb_pcount(skb); |
| 2159 | 2184 | ||
| 2160 | if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) || | 2185 | if (cnt > packets) { |
| 2161 | after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | 2186 | if (tcp_is_sack(tp) || (oldcnt >= packets)) |
| 2162 | break; | 2187 | break; |
| 2188 | |||
| 2189 | mss = skb_shinfo(skb)->gso_size; | ||
| 2190 | err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss); | ||
| 2191 | if (err < 0) | ||
| 2192 | break; | ||
| 2193 | cnt = packets; | ||
| 2194 | } | ||
| 2195 | |||
| 2163 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { | 2196 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { |
| 2164 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 2197 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
| 2165 | tp->lost_out += tcp_skb_pcount(skb); | 2198 | tp->lost_out += tcp_skb_pcount(skb); |
| @@ -2176,17 +2209,17 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
| 2176 | struct tcp_sock *tp = tcp_sk(sk); | 2209 | struct tcp_sock *tp = tcp_sk(sk); |
| 2177 | 2210 | ||
| 2178 | if (tcp_is_reno(tp)) { | 2211 | if (tcp_is_reno(tp)) { |
| 2179 | tcp_mark_head_lost(sk, 1, fast_rexmit); | 2212 | tcp_mark_head_lost(sk, 1); |
| 2180 | } else if (tcp_is_fack(tp)) { | 2213 | } else if (tcp_is_fack(tp)) { |
| 2181 | int lost = tp->fackets_out - tp->reordering; | 2214 | int lost = tp->fackets_out - tp->reordering; |
| 2182 | if (lost <= 0) | 2215 | if (lost <= 0) |
| 2183 | lost = 1; | 2216 | lost = 1; |
| 2184 | tcp_mark_head_lost(sk, lost, fast_rexmit); | 2217 | tcp_mark_head_lost(sk, lost); |
| 2185 | } else { | 2218 | } else { |
| 2186 | int sacked_upto = tp->sacked_out - tp->reordering; | 2219 | int sacked_upto = tp->sacked_out - tp->reordering; |
| 2187 | if (sacked_upto < 0) | 2220 | if (sacked_upto < fast_rexmit) |
| 2188 | sacked_upto = 0; | 2221 | sacked_upto = fast_rexmit; |
| 2189 | tcp_mark_head_lost(sk, sacked_upto, fast_rexmit); | 2222 | tcp_mark_head_lost(sk, sacked_upto); |
| 2190 | } | 2223 | } |
| 2191 | 2224 | ||
| 2192 | /* New heuristics: it is possible only after we switched | 2225 | /* New heuristics: it is possible only after we switched |
| @@ -2520,7 +2553,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
| 2520 | before(tp->snd_una, tp->high_seq) && | 2553 | before(tp->snd_una, tp->high_seq) && |
| 2521 | icsk->icsk_ca_state != TCP_CA_Open && | 2554 | icsk->icsk_ca_state != TCP_CA_Open && |
| 2522 | tp->fackets_out > tp->reordering) { | 2555 | tp->fackets_out > tp->reordering) { |
| 2523 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); | 2556 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); |
| 2524 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); | 2557 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); |
| 2525 | } | 2558 | } |
| 2526 | 2559 | ||
| @@ -2582,6 +2615,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
| 2582 | case TCP_CA_Loss: | 2615 | case TCP_CA_Loss: |
| 2583 | if (flag & FLAG_DATA_ACKED) | 2616 | if (flag & FLAG_DATA_ACKED) |
| 2584 | icsk->icsk_retransmits = 0; | 2617 | icsk->icsk_retransmits = 0; |
| 2618 | if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) | ||
| 2619 | tcp_reset_reno_sack(tp); | ||
| 2585 | if (!tcp_try_undo_loss(sk)) { | 2620 | if (!tcp_try_undo_loss(sk)) { |
| 2586 | tcp_moderate_cwnd(tp); | 2621 | tcp_moderate_cwnd(tp); |
| 2587 | tcp_xmit_retransmit_queue(sk); | 2622 | tcp_xmit_retransmit_queue(sk); |
| @@ -3806,8 +3841,28 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 3806 | } | 3841 | } |
| 3807 | } | 3842 | } |
| 3808 | 3843 | ||
| 3844 | static int tcp_prune_ofo_queue(struct sock *sk); | ||
| 3809 | static int tcp_prune_queue(struct sock *sk); | 3845 | static int tcp_prune_queue(struct sock *sk); |
| 3810 | 3846 | ||
| 3847 | static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | ||
| 3848 | { | ||
| 3849 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | ||
| 3850 | !sk_rmem_schedule(sk, size)) { | ||
| 3851 | |||
| 3852 | if (tcp_prune_queue(sk) < 0) | ||
| 3853 | return -1; | ||
| 3854 | |||
| 3855 | if (!sk_rmem_schedule(sk, size)) { | ||
| 3856 | if (!tcp_prune_ofo_queue(sk)) | ||
| 3857 | return -1; | ||
| 3858 | |||
| 3859 | if (!sk_rmem_schedule(sk, size)) | ||
| 3860 | return -1; | ||
| 3861 | } | ||
| 3862 | } | ||
| 3863 | return 0; | ||
| 3864 | } | ||
| 3865 | |||
| 3811 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 3866 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
| 3812 | { | 3867 | { |
| 3813 | struct tcphdr *th = tcp_hdr(skb); | 3868 | struct tcphdr *th = tcp_hdr(skb); |
| @@ -3857,12 +3912,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 3857 | if (eaten <= 0) { | 3912 | if (eaten <= 0) { |
| 3858 | queue_and_out: | 3913 | queue_and_out: |
| 3859 | if (eaten < 0 && | 3914 | if (eaten < 0 && |
| 3860 | (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3915 | tcp_try_rmem_schedule(sk, skb->truesize)) |
| 3861 | !sk_rmem_schedule(sk, skb->truesize))) { | 3916 | goto drop; |
| 3862 | if (tcp_prune_queue(sk) < 0 || | 3917 | |
| 3863 | !sk_rmem_schedule(sk, skb->truesize)) | ||
| 3864 | goto drop; | ||
| 3865 | } | ||
| 3866 | skb_set_owner_r(skb, sk); | 3918 | skb_set_owner_r(skb, sk); |
| 3867 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 3919 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
| 3868 | } | 3920 | } |
| @@ -3931,12 +3983,8 @@ drop: | |||
| 3931 | 3983 | ||
| 3932 | TCP_ECN_check_ce(tp, skb); | 3984 | TCP_ECN_check_ce(tp, skb); |
| 3933 | 3985 | ||
| 3934 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3986 | if (tcp_try_rmem_schedule(sk, skb->truesize)) |
| 3935 | !sk_rmem_schedule(sk, skb->truesize)) { | 3987 | goto drop; |
| 3936 | if (tcp_prune_queue(sk) < 0 || | ||
| 3937 | !sk_rmem_schedule(sk, skb->truesize)) | ||
| 3938 | goto drop; | ||
| 3939 | } | ||
| 3940 | 3988 | ||
| 3941 | /* Disable header prediction. */ | 3989 | /* Disable header prediction. */ |
| 3942 | tp->pred_flags = 0; | 3990 | tp->pred_flags = 0; |
| @@ -4163,6 +4211,32 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
| 4163 | } | 4211 | } |
| 4164 | } | 4212 | } |
| 4165 | 4213 | ||
| 4214 | /* | ||
| 4215 | * Purge the out-of-order queue. | ||
| 4216 | * Return true if queue was pruned. | ||
| 4217 | */ | ||
| 4218 | static int tcp_prune_ofo_queue(struct sock *sk) | ||
| 4219 | { | ||
| 4220 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 4221 | int res = 0; | ||
| 4222 | |||
| 4223 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
| 4224 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
| 4225 | __skb_queue_purge(&tp->out_of_order_queue); | ||
| 4226 | |||
| 4227 | /* Reset SACK state. A conforming SACK implementation will | ||
| 4228 | * do the same at a timeout based retransmit. When a connection | ||
| 4229 | * is in a sad state like this, we care only about integrity | ||
| 4230 | * of the connection not performance. | ||
| 4231 | */ | ||
| 4232 | if (tp->rx_opt.sack_ok) | ||
| 4233 | tcp_sack_reset(&tp->rx_opt); | ||
| 4234 | sk_mem_reclaim(sk); | ||
| 4235 | res = 1; | ||
| 4236 | } | ||
| 4237 | return res; | ||
| 4238 | } | ||
| 4239 | |||
| 4166 | /* Reduce allocated memory if we can, trying to get | 4240 | /* Reduce allocated memory if we can, trying to get |
| 4167 | * the socket within its memory limits again. | 4241 | * the socket within its memory limits again. |
| 4168 | * | 4242 | * |
| @@ -4196,20 +4270,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
| 4196 | /* Collapsing did not help, destructive actions follow. | 4270 | /* Collapsing did not help, destructive actions follow. |
| 4197 | * This must not ever occur. */ | 4271 | * This must not ever occur. */ |
| 4198 | 4272 | ||
| 4199 | /* First, purge the out_of_order queue. */ | 4273 | tcp_prune_ofo_queue(sk); |
| 4200 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
| 4201 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
| 4202 | __skb_queue_purge(&tp->out_of_order_queue); | ||
| 4203 | |||
| 4204 | /* Reset SACK state. A conforming SACK implementation will | ||
| 4205 | * do the same at a timeout based retransmit. When a connection | ||
| 4206 | * is in a sad state like this, we care only about integrity | ||
| 4207 | * of the connection not performance. | ||
| 4208 | */ | ||
| 4209 | if (tcp_is_sack(tp)) | ||
| 4210 | tcp_sack_reset(&tp->rx_opt); | ||
| 4211 | sk_mem_reclaim(sk); | ||
| 4212 | } | ||
| 4213 | 4274 | ||
| 4214 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | 4275 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) |
| 4215 | return 0; | 4276 | return 0; |
