aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c334
1 files changed, 189 insertions, 145 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..e886e2f7fa8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -61,6 +61,8 @@
61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs 61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
62 */ 62 */
63 63
64#define pr_fmt(fmt) "TCP: " fmt
65
64#include <linux/mm.h> 66#include <linux/mm.h>
65#include <linux/slab.h> 67#include <linux/slab.h>
66#include <linux/module.h> 68#include <linux/module.h>
@@ -105,7 +107,6 @@ int sysctl_tcp_abc __read_mostly;
105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 107#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
106#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 108#define FLAG_DATA_SACKED 0x20 /* New SACK. */
107#define FLAG_ECE 0x40 /* ECE in this ACK */ 109#define FLAG_ECE 0x40 /* ECE in this ACK */
108#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 110#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
110#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 111#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 112#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1041,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
1040 * These 6 states form finite state machine, controlled by the following events: 1041 * These 6 states form finite state machine, controlled by the following events:
1041 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) 1042 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
1042 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) 1043 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
1043 * 3. Loss detection event of one of three flavors: 1044 * 3. Loss detection event of two flavors:
1044 * A. Scoreboard estimator decided the packet is lost. 1045 * A. Scoreboard estimator decided the packet is lost.
1045 * A'. Reno "three dupacks" marks head of queue lost. 1046 * A'. Reno "three dupacks" marks head of queue lost.
1046 * A''. Its FACK modfication, head until snd.fack is lost. 1047 * A''. Its FACK modification, head until snd.fack is lost.
1047 * B. SACK arrives sacking data transmitted after never retransmitted 1048 * B. SACK arrives sacking SND.NXT at the moment, when the
1048 * hole was sent out.
1049 * C. SACK arrives sacking SND.NXT at the moment, when the
1050 * segment was retransmitted. 1049 * segment was retransmitted.
1051 * 4. D-SACK added new rule: D-SACK changes any tag to S. 1050 * 4. D-SACK added new rule: D-SACK changes any tag to S.
1052 * 1051 *
@@ -1153,7 +1152,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1153} 1152}
1154 1153
1155/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". 1154/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
1156 * Event "C". Later note: FACK people cheated me again 8), we have to account 1155 * Event "B". Later note: FACK people cheated me again 8), we have to account
1157 * for reordering! Ugly, but should help. 1156 * for reordering! Ugly, but should help.
1158 * 1157 *
1159 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1158 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1310,25 +1309,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1310 return in_sack; 1309 return in_sack;
1311} 1310}
1312 1311
1313static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, 1312/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
1314 struct tcp_sacktag_state *state, 1313static u8 tcp_sacktag_one(struct sock *sk,
1314 struct tcp_sacktag_state *state, u8 sacked,
1315 u32 start_seq, u32 end_seq,
1315 int dup_sack, int pcount) 1316 int dup_sack, int pcount)
1316{ 1317{
1317 struct tcp_sock *tp = tcp_sk(sk); 1318 struct tcp_sock *tp = tcp_sk(sk);
1318 u8 sacked = TCP_SKB_CB(skb)->sacked;
1319 int fack_count = state->fack_count; 1319 int fack_count = state->fack_count;
1320 1320
1321 /* Account D-SACK for retransmitted packet. */ 1321 /* Account D-SACK for retransmitted packet. */
1322 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1322 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1323 if (tp->undo_marker && tp->undo_retrans && 1323 if (tp->undo_marker && tp->undo_retrans &&
1324 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1324 after(end_seq, tp->undo_marker))
1325 tp->undo_retrans--; 1325 tp->undo_retrans--;
1326 if (sacked & TCPCB_SACKED_ACKED) 1326 if (sacked & TCPCB_SACKED_ACKED)
1327 state->reord = min(fack_count, state->reord); 1327 state->reord = min(fack_count, state->reord);
1328 } 1328 }
1329 1329
1330 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1330 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1331 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 1331 if (!after(end_seq, tp->snd_una))
1332 return sacked; 1332 return sacked;
1333 1333
1334 if (!(sacked & TCPCB_SACKED_ACKED)) { 1334 if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1347,13 +1347,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1347 /* New sack for not retransmitted frame, 1347 /* New sack for not retransmitted frame,
1348 * which was in hole. It is reordering. 1348 * which was in hole. It is reordering.
1349 */ 1349 */
1350 if (before(TCP_SKB_CB(skb)->seq, 1350 if (before(start_seq,
1351 tcp_highest_sack_seq(tp))) 1351 tcp_highest_sack_seq(tp)))
1352 state->reord = min(fack_count, 1352 state->reord = min(fack_count,
1353 state->reord); 1353 state->reord);
1354 1354
1355 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1355 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1356 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) 1356 if (!after(end_seq, tp->frto_highmark))
1357 state->flag |= FLAG_ONLY_ORIG_SACKED; 1357 state->flag |= FLAG_ONLY_ORIG_SACKED;
1358 } 1358 }
1359 1359
@@ -1371,8 +1371,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1371 1371
1372 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1372 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1373 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && 1373 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1374 before(TCP_SKB_CB(skb)->seq, 1374 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1375 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1376 tp->lost_cnt_hint += pcount; 1375 tp->lost_cnt_hint += pcount;
1377 1376
1378 if (fack_count > tp->fackets_out) 1377 if (fack_count > tp->fackets_out)
@@ -1391,6 +1390,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1391 return sacked; 1390 return sacked;
1392} 1391}
1393 1392
1393/* Shift newly-SACKed bytes from this skb to the immediately previous
1394 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
1395 */
1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1396static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1395 struct tcp_sacktag_state *state, 1397 struct tcp_sacktag_state *state,
1396 unsigned int pcount, int shifted, int mss, 1398 unsigned int pcount, int shifted, int mss,
@@ -1398,9 +1400,20 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1398{ 1400{
1399 struct tcp_sock *tp = tcp_sk(sk); 1401 struct tcp_sock *tp = tcp_sk(sk);
1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb); 1402 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1403 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1404 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1401 1405
1402 BUG_ON(!pcount); 1406 BUG_ON(!pcount);
1403 1407
1408 /* Adjust counters and hints for the newly sacked sequence
1409 * range but discard the return value since prev is already
1410 * marked. We must tag the range first because the seq
1411 * advancement below implicitly advances
1412 * tcp_highest_sack_seq() when skb is highest_sack.
1413 */
1414 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1415 start_seq, end_seq, dup_sack, pcount);
1416
1404 if (skb == tp->lost_skb_hint) 1417 if (skb == tp->lost_skb_hint)
1405 tp->lost_cnt_hint += pcount; 1418 tp->lost_cnt_hint += pcount;
1406 1419
@@ -1427,9 +1440,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1427 skb_shinfo(skb)->gso_type = 0; 1440 skb_shinfo(skb)->gso_type = 0;
1428 } 1441 }
1429 1442
1430 /* We discard results */
1431 tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
1432
1433 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1443 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1434 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1444 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1435 1445
@@ -1577,6 +1587,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1577 } 1587 }
1578 } 1588 }
1579 1589
1590 /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
1591 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1592 goto fallback;
1593
1580 if (!skb_shift(prev, skb, len)) 1594 if (!skb_shift(prev, skb, len))
1581 goto fallback; 1595 goto fallback;
1582 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) 1596 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -1667,10 +1681,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1667 break; 1681 break;
1668 1682
1669 if (in_sack) { 1683 if (in_sack) {
1670 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, 1684 TCP_SKB_CB(skb)->sacked =
1671 state, 1685 tcp_sacktag_one(sk,
1672 dup_sack, 1686 state,
1673 tcp_skb_pcount(skb)); 1687 TCP_SKB_CB(skb)->sacked,
1688 TCP_SKB_CB(skb)->seq,
1689 TCP_SKB_CB(skb)->end_seq,
1690 dup_sack,
1691 tcp_skb_pcount(skb));
1674 1692
1675 if (!before(TCP_SKB_CB(skb)->seq, 1693 if (!before(TCP_SKB_CB(skb)->seq,
1676 tcp_highest_sack_seq(tp))) 1694 tcp_highest_sack_seq(tp)))
@@ -1844,10 +1862,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1844 if (found_dup_sack && ((i + 1) == first_sack_index)) 1862 if (found_dup_sack && ((i + 1) == first_sack_index))
1845 next_dup = &sp[i + 1]; 1863 next_dup = &sp[i + 1];
1846 1864
1847 /* Event "B" in the comment above. */
1848 if (after(end_seq, tp->high_seq))
1849 state.flag |= FLAG_DATA_LOST;
1850
1851 /* Skip too early cached blocks */ 1865 /* Skip too early cached blocks */
1852 while (tcp_sack_cache_ok(tp, cache) && 1866 while (tcp_sack_cache_ok(tp, cache) &&
1853 !before(start_seq, cache->end_seq)) 1867 !before(start_seq, cache->end_seq))
@@ -2515,8 +2529,11 @@ static void tcp_timeout_skbs(struct sock *sk)
2515 tcp_verify_left_out(tp); 2529 tcp_verify_left_out(tp);
2516} 2530}
2517 2531
2518/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2532/* Detect loss in event "A" above by marking head of queue up as lost.
2519 * is against sacked "cnt", otherwise it's against facked "cnt" 2533 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
2534 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2535 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2536 * the maximum SACKed segments to pass before reaching this limit.
2520 */ 2537 */
2521static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) 2538static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2522{ 2539{
@@ -2525,6 +2542,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2525 int cnt, oldcnt; 2542 int cnt, oldcnt;
2526 int err; 2543 int err;
2527 unsigned int mss; 2544 unsigned int mss;
2545 /* Use SACK to deduce losses of new sequences sent during recovery */
2546 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2528 2547
2529 WARN_ON(packets > tp->packets_out); 2548 WARN_ON(packets > tp->packets_out);
2530 if (tp->lost_skb_hint) { 2549 if (tp->lost_skb_hint) {
@@ -2546,7 +2565,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2546 tp->lost_skb_hint = skb; 2565 tp->lost_skb_hint = skb;
2547 tp->lost_cnt_hint = cnt; 2566 tp->lost_cnt_hint = cnt;
2548 2567
2549 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2568 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2550 break; 2569 break;
2551 2570
2552 oldcnt = cnt; 2571 oldcnt = cnt;
@@ -2556,6 +2575,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2556 2575
2557 if (cnt > packets) { 2576 if (cnt > packets) {
2558 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2577 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2578 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2559 (oldcnt >= packets)) 2579 (oldcnt >= packets))
2560 break; 2580 break;
2561 2581
@@ -3033,19 +3053,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3033 if (tcp_check_sack_reneging(sk, flag)) 3053 if (tcp_check_sack_reneging(sk, flag))
3034 return; 3054 return;
3035 3055
3036 /* C. Process data loss notification, provided it is valid. */ 3056 /* C. Check consistency of the current state. */
3037 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
3038 before(tp->snd_una, tp->high_seq) &&
3039 icsk->icsk_ca_state != TCP_CA_Open &&
3040 tp->fackets_out > tp->reordering) {
3041 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
3042 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
3043 }
3044
3045 /* D. Check consistency of the current state. */
3046 tcp_verify_left_out(tp); 3057 tcp_verify_left_out(tp);
3047 3058
3048 /* E. Check state exit conditions. State can be terminated 3059 /* D. Check state exit conditions. State can be terminated
3049 * when high_seq is ACKed. */ 3060 * when high_seq is ACKed. */
3050 if (icsk->icsk_ca_state == TCP_CA_Open) { 3061 if (icsk->icsk_ca_state == TCP_CA_Open) {
3051 WARN_ON(tp->retrans_out != 0); 3062 WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3088,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3077 } 3088 }
3078 } 3089 }
3079 3090
3080 /* F. Process state. */ 3091 /* E. Process state. */
3081 switch (icsk->icsk_ca_state) { 3092 switch (icsk->icsk_ca_state) {
3082 case TCP_CA_Recovery: 3093 case TCP_CA_Recovery:
3083 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 3094 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
@@ -3858,9 +3869,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3858 opt_rx->wscale_ok = 1; 3869 opt_rx->wscale_ok = 1;
3859 if (snd_wscale > 14) { 3870 if (snd_wscale > 14) {
3860 if (net_ratelimit()) 3871 if (net_ratelimit())
3861 printk(KERN_INFO "tcp_parse_options: Illegal window " 3872 pr_info("%s: Illegal window scaling value %d >14 received\n",
3862 "scaling value %d >14 received.\n", 3873 __func__,
3863 snd_wscale); 3874 snd_wscale);
3864 snd_wscale = 14; 3875 snd_wscale = 14;
3865 } 3876 }
3866 opt_rx->snd_wscale = snd_wscale; 3877 opt_rx->snd_wscale = snd_wscale;
@@ -4182,7 +4193,7 @@ static void tcp_fin(struct sock *sk)
4182 /* Only TCP_LISTEN and TCP_CLOSE are left, in these 4193 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
4183 * cases we should never reach this piece of code. 4194 * cases we should never reach this piece of code.
4184 */ 4195 */
4185 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 4196 pr_err("%s: Impossible, sk->sk_state=%d\n",
4186 __func__, sk->sk_state); 4197 __func__, sk->sk_state);
4187 break; 4198 break;
4188 } 4199 }
@@ -4435,6 +4446,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4435 return 0; 4446 return 0;
4436} 4447}
4437 4448
4449static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4450{
4451 struct tcp_sock *tp = tcp_sk(sk);
4452 struct sk_buff *skb1;
4453 u32 seq, end_seq;
4454
4455 TCP_ECN_check_ce(tp, skb);
4456
4457 if (tcp_try_rmem_schedule(sk, skb->truesize)) {
4458 /* TODO: should increment a counter */
4459 __kfree_skb(skb);
4460 return;
4461 }
4462
4463 /* Disable header prediction. */
4464 tp->pred_flags = 0;
4465 inet_csk_schedule_ack(sk);
4466
4467 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4468 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4469
4470 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4471 if (!skb1) {
4472 /* Initial out of order segment, build 1 SACK. */
4473 if (tcp_is_sack(tp)) {
4474 tp->rx_opt.num_sacks = 1;
4475 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4476 tp->selective_acks[0].end_seq =
4477 TCP_SKB_CB(skb)->end_seq;
4478 }
4479 __skb_queue_head(&tp->out_of_order_queue, skb);
4480 goto end;
4481 }
4482
4483 seq = TCP_SKB_CB(skb)->seq;
4484 end_seq = TCP_SKB_CB(skb)->end_seq;
4485
4486 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4487 /* Packets in ofo can stay in queue a long time.
4488 * Better try to coalesce them right now
4489 * to avoid future tcp_collapse_ofo_queue(),
4490 * probably the most expensive function in tcp stack.
4491 */
4492 if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
4493 NET_INC_STATS_BH(sock_net(sk),
4494 LINUX_MIB_TCPRCVCOALESCE);
4495 BUG_ON(skb_copy_bits(skb, 0,
4496 skb_put(skb1, skb->len),
4497 skb->len));
4498 TCP_SKB_CB(skb1)->end_seq = end_seq;
4499 TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
4500 __kfree_skb(skb);
4501 skb = NULL;
4502 } else {
4503 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4504 }
4505
4506 if (!tp->rx_opt.num_sacks ||
4507 tp->selective_acks[0].end_seq != seq)
4508 goto add_sack;
4509
4510 /* Common case: data arrive in order after hole. */
4511 tp->selective_acks[0].end_seq = end_seq;
4512 goto end;
4513 }
4514
4515 /* Find place to insert this segment. */
4516 while (1) {
4517 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4518 break;
4519 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4520 skb1 = NULL;
4521 break;
4522 }
4523 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4524 }
4525
4526 /* Do skb overlap to previous one? */
4527 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4528 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4529 /* All the bits are present. Drop. */
4530 __kfree_skb(skb);
4531 skb = NULL;
4532 tcp_dsack_set(sk, seq, end_seq);
4533 goto add_sack;
4534 }
4535 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4536 /* Partial overlap. */
4537 tcp_dsack_set(sk, seq,
4538 TCP_SKB_CB(skb1)->end_seq);
4539 } else {
4540 if (skb_queue_is_first(&tp->out_of_order_queue,
4541 skb1))
4542 skb1 = NULL;
4543 else
4544 skb1 = skb_queue_prev(
4545 &tp->out_of_order_queue,
4546 skb1);
4547 }
4548 }
4549 if (!skb1)
4550 __skb_queue_head(&tp->out_of_order_queue, skb);
4551 else
4552 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4553
4554 /* And clean segments covered by new one as whole. */
4555 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4556 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4557
4558 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4559 break;
4560 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4561 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4562 end_seq);
4563 break;
4564 }
4565 __skb_unlink(skb1, &tp->out_of_order_queue);
4566 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4567 TCP_SKB_CB(skb1)->end_seq);
4568 __kfree_skb(skb1);
4569 }
4570
4571add_sack:
4572 if (tcp_is_sack(tp))
4573 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4574end:
4575 if (skb)
4576 skb_set_owner_r(skb, sk);
4577}
4578
4579
4438static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4580static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4439{ 4581{
4440 const struct tcphdr *th = tcp_hdr(skb); 4582 const struct tcphdr *th = tcp_hdr(skb);
@@ -4550,105 +4692,7 @@ drop:
4550 goto queue_and_out; 4692 goto queue_and_out;
4551 } 4693 }
4552 4694
4553 TCP_ECN_check_ce(tp, skb); 4695 tcp_data_queue_ofo(sk, skb);
4554
4555 if (tcp_try_rmem_schedule(sk, skb->truesize))
4556 goto drop;
4557
4558 /* Disable header prediction. */
4559 tp->pred_flags = 0;
4560 inet_csk_schedule_ack(sk);
4561
4562 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4563 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4564
4565 skb_set_owner_r(skb, sk);
4566
4567 if (!skb_peek(&tp->out_of_order_queue)) {
4568 /* Initial out of order segment, build 1 SACK. */
4569 if (tcp_is_sack(tp)) {
4570 tp->rx_opt.num_sacks = 1;
4571 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4572 tp->selective_acks[0].end_seq =
4573 TCP_SKB_CB(skb)->end_seq;
4574 }
4575 __skb_queue_head(&tp->out_of_order_queue, skb);
4576 } else {
4577 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4578 u32 seq = TCP_SKB_CB(skb)->seq;
4579 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4580
4581 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4582 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4583
4584 if (!tp->rx_opt.num_sacks ||
4585 tp->selective_acks[0].end_seq != seq)
4586 goto add_sack;
4587
4588 /* Common case: data arrive in order after hole. */
4589 tp->selective_acks[0].end_seq = end_seq;
4590 return;
4591 }
4592
4593 /* Find place to insert this segment. */
4594 while (1) {
4595 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4596 break;
4597 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4598 skb1 = NULL;
4599 break;
4600 }
4601 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4602 }
4603
4604 /* Do skb overlap to previous one? */
4605 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4606 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4607 /* All the bits are present. Drop. */
4608 __kfree_skb(skb);
4609 tcp_dsack_set(sk, seq, end_seq);
4610 goto add_sack;
4611 }
4612 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4613 /* Partial overlap. */
4614 tcp_dsack_set(sk, seq,
4615 TCP_SKB_CB(skb1)->end_seq);
4616 } else {
4617 if (skb_queue_is_first(&tp->out_of_order_queue,
4618 skb1))
4619 skb1 = NULL;
4620 else
4621 skb1 = skb_queue_prev(
4622 &tp->out_of_order_queue,
4623 skb1);
4624 }
4625 }
4626 if (!skb1)
4627 __skb_queue_head(&tp->out_of_order_queue, skb);
4628 else
4629 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4630
4631 /* And clean segments covered by new one as whole. */
4632 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4633 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4634
4635 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4636 break;
4637 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4638 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4639 end_seq);
4640 break;
4641 }
4642 __skb_unlink(skb1, &tp->out_of_order_queue);
4643 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4644 TCP_SKB_CB(skb1)->end_seq);
4645 __kfree_skb(skb1);
4646 }
4647
4648add_sack:
4649 if (tcp_is_sack(tp))
4650 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4651 }
4652} 4696}
4653 4697
4654static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, 4698static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,