aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c133
1 files changed, 71 insertions, 62 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 72c4732ae2da..d0682ce2a5d6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1142,6 +1142,7 @@ struct tcp_sacktag_state {
1142 u64 last_sackt; 1142 u64 last_sackt;
1143 struct rate_sample *rate; 1143 struct rate_sample *rate;
1144 int flag; 1144 int flag;
1145 unsigned int mss_now;
1145}; 1146};
1146 1147
1147/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1148/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1192,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1191 if (pkt_len >= skb->len && !in_sack) 1192 if (pkt_len >= skb->len && !in_sack)
1192 return 0; 1193 return 0;
1193 1194
1194 err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); 1195 err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
1196 pkt_len, mss, GFP_ATOMIC);
1195 if (err < 0) 1197 if (err < 0)
1196 return err; 1198 return err;
1197 } 1199 }
@@ -1363,8 +1365,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
1363 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) 1365 if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
1364 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; 1366 TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
1365 1367
1366 tcp_unlink_write_queue(skb, sk); 1368 tcp_rtx_queue_unlink_and_free(skb, sk);
1367 sk_wmem_free_skb(sk, skb);
1368 1369
1369 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); 1370 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1370 1371
@@ -1414,9 +1415,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1414 goto fallback; 1415 goto fallback;
1415 1416
1416 /* Can only happen with delayed DSACK + discard craziness */ 1417 /* Can only happen with delayed DSACK + discard craziness */
1417 if (unlikely(skb == tcp_write_queue_head(sk))) 1418 prev = skb_rb_prev(skb);
1419 if (!prev)
1418 goto fallback; 1420 goto fallback;
1419 prev = tcp_write_queue_prev(sk, skb);
1420 1421
1421 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) 1422 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1422 goto fallback; 1423 goto fallback;
@@ -1501,12 +1502,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1501 /* Hole filled allows collapsing with the next as well, this is very 1502 /* Hole filled allows collapsing with the next as well, this is very
1502 * useful when hole on every nth skb pattern happens 1503 * useful when hole on every nth skb pattern happens
1503 */ 1504 */
1504 if (prev == tcp_write_queue_tail(sk)) 1505 skb = skb_rb_next(prev);
1506 if (!skb)
1505 goto out; 1507 goto out;
1506 skb = tcp_write_queue_next(sk, prev);
1507 1508
1508 if (!skb_can_shift(skb) || 1509 if (!skb_can_shift(skb) ||
1509 (skb == tcp_send_head(sk)) ||
1510 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || 1510 ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1511 (mss != tcp_skb_seglen(skb))) 1511 (mss != tcp_skb_seglen(skb)))
1512 goto out; 1512 goto out;
@@ -1539,13 +1539,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1539 struct tcp_sock *tp = tcp_sk(sk); 1539 struct tcp_sock *tp = tcp_sk(sk);
1540 struct sk_buff *tmp; 1540 struct sk_buff *tmp;
1541 1541
1542 tcp_for_write_queue_from(skb, sk) { 1542 skb_rbtree_walk_from(skb) {
1543 int in_sack = 0; 1543 int in_sack = 0;
1544 bool dup_sack = dup_sack_in; 1544 bool dup_sack = dup_sack_in;
1545 1545
1546 if (skb == tcp_send_head(sk))
1547 break;
1548
1549 /* queue is in-order => we can short-circuit the walk early */ 1546 /* queue is in-order => we can short-circuit the walk early */
1550 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 1547 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1551 break; 1548 break;
@@ -1607,23 +1604,44 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1607 return skb; 1604 return skb;
1608} 1605}
1609 1606
1610/* Avoid all extra work that is being done by sacktag while walking in 1607static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
1611 * a normal way 1608 struct tcp_sacktag_state *state,
1612 */ 1609 u32 seq)
1610{
1611 struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
1612 struct sk_buff *skb;
1613 int unack_bytes;
1614
1615 while (*p) {
1616 parent = *p;
1617 skb = rb_to_skb(parent);
1618 if (before(seq, TCP_SKB_CB(skb)->seq)) {
1619 p = &parent->rb_left;
1620 continue;
1621 }
1622 if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
1623 p = &parent->rb_right;
1624 continue;
1625 }
1626
1627 state->fack_count = 0;
1628 unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
1629 if (state->mss_now && unack_bytes > 0)
1630 state->fack_count = unack_bytes / state->mss_now;
1631
1632 return skb;
1633 }
1634 return NULL;
1635}
1636
1613static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, 1637static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1614 struct tcp_sacktag_state *state, 1638 struct tcp_sacktag_state *state,
1615 u32 skip_to_seq) 1639 u32 skip_to_seq)
1616{ 1640{
1617 tcp_for_write_queue_from(skb, sk) { 1641 if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
1618 if (skb == tcp_send_head(sk)) 1642 return skb;
1619 break;
1620
1621 if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1622 break;
1623 1643
1624 state->fack_count += tcp_skb_pcount(skb); 1644 return tcp_sacktag_bsearch(sk, state, skip_to_seq);
1625 }
1626 return skb;
1627} 1645}
1628 1646
1629static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, 1647static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1745,8 +1763,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1745 } 1763 }
1746 } 1764 }
1747 1765
1748 skb = tcp_write_queue_head(sk); 1766 state->mss_now = tcp_current_mss(sk);
1749 state->fack_count = 0; 1767 state->fack_count = 0;
1768 skb = NULL;
1750 i = 0; 1769 i = 0;
1751 1770
1752 if (!tp->sacked_out) { 1771 if (!tp->sacked_out) {
@@ -1970,7 +1989,7 @@ void tcp_enter_loss(struct sock *sk)
1970 if (tcp_is_reno(tp)) 1989 if (tcp_is_reno(tp))
1971 tcp_reset_reno_sack(tp); 1990 tcp_reset_reno_sack(tp);
1972 1991
1973 skb = tcp_write_queue_head(sk); 1992 skb = tcp_rtx_queue_head(sk);
1974 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); 1993 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1975 if (is_reneg) { 1994 if (is_reneg) {
1976 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1995 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
@@ -1979,10 +1998,7 @@ void tcp_enter_loss(struct sock *sk)
1979 } 1998 }
1980 tcp_clear_all_retrans_hints(tp); 1999 tcp_clear_all_retrans_hints(tp);
1981 2000
1982 tcp_for_write_queue(skb, sk) { 2001 skb_rbtree_walk_from(skb) {
1983 if (skb == tcp_send_head(sk))
1984 break;
1985
1986 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || 2002 mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
1987 is_reneg); 2003 is_reneg);
1988 if (mark_lost) 2004 if (mark_lost)
@@ -2215,13 +2231,11 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2215 return; 2231 return;
2216 cnt = tp->lost_cnt_hint; 2232 cnt = tp->lost_cnt_hint;
2217 } else { 2233 } else {
2218 skb = tcp_write_queue_head(sk); 2234 skb = tcp_rtx_queue_head(sk);
2219 cnt = 0; 2235 cnt = 0;
2220 } 2236 }
2221 2237
2222 tcp_for_write_queue_from(skb, sk) { 2238 skb_rbtree_walk_from(skb) {
2223 if (skb == tcp_send_head(sk))
2224 break;
2225 /* TODO: do this better */ 2239 /* TODO: do this better */
2226 /* this is not the most efficient way to do this... */ 2240 /* this is not the most efficient way to do this... */
2227 tp->lost_skb_hint = skb; 2241 tp->lost_skb_hint = skb;
@@ -2245,7 +2259,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2245 /* If needed, chop off the prefix to mark as lost. */ 2259 /* If needed, chop off the prefix to mark as lost. */
2246 lost = (packets - oldcnt) * mss; 2260 lost = (packets - oldcnt) * mss;
2247 if (lost < skb->len && 2261 if (lost < skb->len &&
2248 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) 2262 tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
2263 lost, mss, GFP_ATOMIC) < 0)
2249 break; 2264 break;
2250 cnt = packets; 2265 cnt = packets;
2251 } 2266 }
@@ -2329,7 +2344,7 @@ static bool tcp_any_retrans_done(const struct sock *sk)
2329 if (tp->retrans_out) 2344 if (tp->retrans_out)
2330 return true; 2345 return true;
2331 2346
2332 skb = tcp_write_queue_head(sk); 2347 skb = tcp_rtx_queue_head(sk);
2333 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) 2348 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2334 return true; 2349 return true;
2335 2350
@@ -2370,9 +2385,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2370 if (unmark_loss) { 2385 if (unmark_loss) {
2371 struct sk_buff *skb; 2386 struct sk_buff *skb;
2372 2387
2373 tcp_for_write_queue(skb, sk) { 2388 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2374 if (skb == tcp_send_head(sk))
2375 break;
2376 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 2389 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
2377 } 2390 }
2378 tp->lost_out = 0; 2391 tp->lost_out = 0;
@@ -2617,9 +2630,7 @@ void tcp_simple_retransmit(struct sock *sk)
2617 unsigned int mss = tcp_current_mss(sk); 2630 unsigned int mss = tcp_current_mss(sk);
2618 u32 prior_lost = tp->lost_out; 2631 u32 prior_lost = tp->lost_out;
2619 2632
2620 tcp_for_write_queue(skb, sk) { 2633 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
2621 if (skb == tcp_send_head(sk))
2622 break;
2623 if (tcp_skb_seglen(skb) > mss && 2634 if (tcp_skb_seglen(skb) > mss &&
2624 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 2635 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2625 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2636 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2713,7 +2724,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2713 * is updated in tcp_ack()). Otherwise fall back to 2724 * is updated in tcp_ack()). Otherwise fall back to
2714 * the conventional recovery. 2725 * the conventional recovery.
2715 */ 2726 */
2716 if (tcp_send_head(sk) && 2727 if (!tcp_write_queue_empty(sk) &&
2717 after(tcp_wnd_end(tp), tp->snd_nxt)) { 2728 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2718 *rexmit = REXMIT_NEW; 2729 *rexmit = REXMIT_NEW;
2719 return; 2730 return;
@@ -3077,11 +3088,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3077 struct tcp_sock *tp = tcp_sk(sk); 3088 struct tcp_sock *tp = tcp_sk(sk);
3078 u32 prior_sacked = tp->sacked_out; 3089 u32 prior_sacked = tp->sacked_out;
3079 u32 reord = tp->packets_out; 3090 u32 reord = tp->packets_out;
3091 struct sk_buff *skb, *next;
3080 bool fully_acked = true; 3092 bool fully_acked = true;
3081 long sack_rtt_us = -1L; 3093 long sack_rtt_us = -1L;
3082 long seq_rtt_us = -1L; 3094 long seq_rtt_us = -1L;
3083 long ca_rtt_us = -1L; 3095 long ca_rtt_us = -1L;
3084 struct sk_buff *skb;
3085 u32 pkts_acked = 0; 3096 u32 pkts_acked = 0;
3086 u32 last_in_flight = 0; 3097 u32 last_in_flight = 0;
3087 bool rtt_update; 3098 bool rtt_update;
@@ -3089,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3089 3100
3090 first_ackt = 0; 3101 first_ackt = 0;
3091 3102
3092 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3103 for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
3093 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3104 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3094 u8 sacked = scb->sacked; 3105 u8 sacked = scb->sacked;
3095 u32 acked_pcount; 3106 u32 acked_pcount;
@@ -3107,8 +3118,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3107 break; 3118 break;
3108 fully_acked = false; 3119 fully_acked = false;
3109 } else { 3120 } else {
3110 /* Speedup tcp_unlink_write_queue() and next loop */
3111 prefetchw(skb->next);
3112 acked_pcount = tcp_skb_pcount(skb); 3121 acked_pcount = tcp_skb_pcount(skb);
3113 } 3122 }
3114 3123
@@ -3160,12 +3169,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3160 if (!fully_acked) 3169 if (!fully_acked)
3161 break; 3170 break;
3162 3171
3163 tcp_unlink_write_queue(skb, sk); 3172 next = skb_rb_next(skb);
3164 sk_wmem_free_skb(sk, skb);
3165 if (unlikely(skb == tp->retransmit_skb_hint)) 3173 if (unlikely(skb == tp->retransmit_skb_hint))
3166 tp->retransmit_skb_hint = NULL; 3174 tp->retransmit_skb_hint = NULL;
3167 if (unlikely(skb == tp->lost_skb_hint)) 3175 if (unlikely(skb == tp->lost_skb_hint))
3168 tp->lost_skb_hint = NULL; 3176 tp->lost_skb_hint = NULL;
3177 tcp_rtx_queue_unlink_and_free(skb, sk);
3169 } 3178 }
3170 3179
3171 if (!skb) 3180 if (!skb)
@@ -3257,12 +3266,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3257 3266
3258static void tcp_ack_probe(struct sock *sk) 3267static void tcp_ack_probe(struct sock *sk)
3259{ 3268{
3260 const struct tcp_sock *tp = tcp_sk(sk);
3261 struct inet_connection_sock *icsk = inet_csk(sk); 3269 struct inet_connection_sock *icsk = inet_csk(sk);
3270 struct sk_buff *head = tcp_send_head(sk);
3271 const struct tcp_sock *tp = tcp_sk(sk);
3262 3272
3263 /* Was it a usable window open? */ 3273 /* Was it a usable window open? */
3264 3274 if (!head)
3265 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { 3275 return;
3276 if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
3266 icsk->icsk_backoff = 0; 3277 icsk->icsk_backoff = 0;
3267 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 3278 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
3268 /* Socket must be waked up by subsequent tcp_data_snd_check(). 3279 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3382,7 +3393,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3382 tp->pred_flags = 0; 3393 tp->pred_flags = 0;
3383 tcp_fast_path_check(sk); 3394 tcp_fast_path_check(sk);
3384 3395
3385 if (tcp_send_head(sk)) 3396 if (!tcp_write_queue_empty(sk))
3386 tcp_slow_start_after_idle_check(sk); 3397 tcp_slow_start_after_idle_check(sk);
3387 3398
3388 if (nwin > tp->max_window) { 3399 if (nwin > tp->max_window) {
@@ -3567,8 +3578,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3567 sack_state.first_sackt = 0; 3578 sack_state.first_sackt = 0;
3568 sack_state.rate = &rs; 3579 sack_state.rate = &rs;
3569 3580
3570 /* We very likely will need to access write queue head. */ 3581 /* We very likely will need to access rtx queue. */
3571 prefetchw(sk->sk_write_queue.next); 3582 prefetch(sk->tcp_rtx_queue.rb_node);
3572 3583
3573 /* If the ack is older than previous acks 3584 /* If the ack is older than previous acks
3574 * then we can probably ignore it. 3585 * then we can probably ignore it.
@@ -3682,8 +3693,7 @@ no_queue:
3682 * being used to time the probes, and is probably far higher than 3693 * being used to time the probes, and is probably far higher than
3683 * it needs to be for normal retransmission. 3694 * it needs to be for normal retransmission.
3684 */ 3695 */
3685 if (tcp_send_head(sk)) 3696 tcp_ack_probe(sk);
3686 tcp_ack_probe(sk);
3687 3697
3688 if (tp->tlp_high_seq) 3698 if (tp->tlp_high_seq)
3689 tcp_process_tlp_ack(sk, ack, flag); 3699 tcp_process_tlp_ack(sk, ack, flag);
@@ -4726,7 +4736,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4726} 4736}
4727 4737
4728/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ 4738/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
4729static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) 4739void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
4730{ 4740{
4731 struct rb_node **p = &root->rb_node; 4741 struct rb_node **p = &root->rb_node;
4732 struct rb_node *parent = NULL; 4742 struct rb_node *parent = NULL;
@@ -5530,7 +5540,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5530 struct tcp_fastopen_cookie *cookie) 5540 struct tcp_fastopen_cookie *cookie)
5531{ 5541{
5532 struct tcp_sock *tp = tcp_sk(sk); 5542 struct tcp_sock *tp = tcp_sk(sk);
5533 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; 5543 struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
5534 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; 5544 u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
5535 bool syn_drop = false; 5545 bool syn_drop = false;
5536 5546
@@ -5565,9 +5575,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5565 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); 5575 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
5566 5576
5567 if (data) { /* Retransmit unacked data in SYN */ 5577 if (data) { /* Retransmit unacked data in SYN */
5568 tcp_for_write_queue_from(data, sk) { 5578 skb_rbtree_walk_from(data) {
5569 if (data == tcp_send_head(sk) || 5579 if (__tcp_retransmit_skb(sk, data, 1))
5570 __tcp_retransmit_skb(sk, data, 1))
5571 break; 5580 break;
5572 } 5581 }
5573 tcp_rearm_rto(sk); 5582 tcp_rearm_rto(sk);