aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c264
1 files changed, 153 insertions, 111 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53c8ce4046b2..e886e2f7fa8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -61,6 +61,8 @@
61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs 61 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
62 */ 62 */
63 63
64#define pr_fmt(fmt) "TCP: " fmt
65
64#include <linux/mm.h> 66#include <linux/mm.h>
65#include <linux/slab.h> 67#include <linux/slab.h>
66#include <linux/module.h> 68#include <linux/module.h>
@@ -1403,8 +1405,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1403 1405
1404 BUG_ON(!pcount); 1406 BUG_ON(!pcount);
1405 1407
1406 /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ 1408 /* Adjust counters and hints for the newly sacked sequence
1407 if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) 1409 * range but discard the return value since prev is already
1410 * marked. We must tag the range first because the seq
1411 * advancement below implicitly advances
1412 * tcp_highest_sack_seq() when skb is highest_sack.
1413 */
1414 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1415 start_seq, end_seq, dup_sack, pcount);
1416
1417 if (skb == tp->lost_skb_hint)
1408 tp->lost_cnt_hint += pcount; 1418 tp->lost_cnt_hint += pcount;
1409 1419
1410 TCP_SKB_CB(prev)->end_seq += shifted; 1420 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1430,12 +1440,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1430 skb_shinfo(skb)->gso_type = 0; 1440 skb_shinfo(skb)->gso_type = 0;
1431 } 1441 }
1432 1442
1433 /* Adjust counters and hints for the newly sacked sequence range but
1434 * discard the return value since prev is already marked.
1435 */
1436 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1437 start_seq, end_seq, dup_sack, pcount);
1438
1439 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1443 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1440 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1444 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
1441 1445
@@ -1583,6 +1587,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1583 } 1587 }
1584 } 1588 }
1585 1589
1590 /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
1591 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
1592 goto fallback;
1593
1586 if (!skb_shift(prev, skb, len)) 1594 if (!skb_shift(prev, skb, len))
1587 goto fallback; 1595 goto fallback;
1588 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) 1596 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -2567,6 +2575,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2567 2575
2568 if (cnt > packets) { 2576 if (cnt > packets) {
2569 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || 2577 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2578 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2570 (oldcnt >= packets)) 2579 (oldcnt >= packets))
2571 break; 2580 break;
2572 2581
@@ -3860,9 +3869,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3860 opt_rx->wscale_ok = 1; 3869 opt_rx->wscale_ok = 1;
3861 if (snd_wscale > 14) { 3870 if (snd_wscale > 14) {
3862 if (net_ratelimit()) 3871 if (net_ratelimit())
3863 printk(KERN_INFO "tcp_parse_options: Illegal window " 3872 pr_info("%s: Illegal window scaling value %d >14 received\n",
3864 "scaling value %d >14 received.\n", 3873 __func__,
3865 snd_wscale); 3874 snd_wscale);
3866 snd_wscale = 14; 3875 snd_wscale = 14;
3867 } 3876 }
3868 opt_rx->snd_wscale = snd_wscale; 3877 opt_rx->snd_wscale = snd_wscale;
@@ -4184,7 +4193,7 @@ static void tcp_fin(struct sock *sk)
4184 /* Only TCP_LISTEN and TCP_CLOSE are left, in these 4193 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
4185 * cases we should never reach this piece of code. 4194 * cases we should never reach this piece of code.
4186 */ 4195 */
4187 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 4196 pr_err("%s: Impossible, sk->sk_state=%d\n",
4188 __func__, sk->sk_state); 4197 __func__, sk->sk_state);
4189 break; 4198 break;
4190 } 4199 }
@@ -4437,6 +4446,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4437 return 0; 4446 return 0;
4438} 4447}
4439 4448
4449static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4450{
4451 struct tcp_sock *tp = tcp_sk(sk);
4452 struct sk_buff *skb1;
4453 u32 seq, end_seq;
4454
4455 TCP_ECN_check_ce(tp, skb);
4456
4457 if (tcp_try_rmem_schedule(sk, skb->truesize)) {
4458 /* TODO: should increment a counter */
4459 __kfree_skb(skb);
4460 return;
4461 }
4462
4463 /* Disable header prediction. */
4464 tp->pred_flags = 0;
4465 inet_csk_schedule_ack(sk);
4466
4467 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4468 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4469
4470 skb1 = skb_peek_tail(&tp->out_of_order_queue);
4471 if (!skb1) {
4472 /* Initial out of order segment, build 1 SACK. */
4473 if (tcp_is_sack(tp)) {
4474 tp->rx_opt.num_sacks = 1;
4475 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4476 tp->selective_acks[0].end_seq =
4477 TCP_SKB_CB(skb)->end_seq;
4478 }
4479 __skb_queue_head(&tp->out_of_order_queue, skb);
4480 goto end;
4481 }
4482
4483 seq = TCP_SKB_CB(skb)->seq;
4484 end_seq = TCP_SKB_CB(skb)->end_seq;
4485
4486 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4487 /* Packets in ofo can stay in queue a long time.
4488 * Better try to coalesce them right now
4489 * to avoid future tcp_collapse_ofo_queue(),
4490 * probably the most expensive function in tcp stack.
4491 */
4492 if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
4493 NET_INC_STATS_BH(sock_net(sk),
4494 LINUX_MIB_TCPRCVCOALESCE);
4495 BUG_ON(skb_copy_bits(skb, 0,
4496 skb_put(skb1, skb->len),
4497 skb->len));
4498 TCP_SKB_CB(skb1)->end_seq = end_seq;
4499 TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
4500 __kfree_skb(skb);
4501 skb = NULL;
4502 } else {
4503 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4504 }
4505
4506 if (!tp->rx_opt.num_sacks ||
4507 tp->selective_acks[0].end_seq != seq)
4508 goto add_sack;
4509
4510 /* Common case: data arrive in order after hole. */
4511 tp->selective_acks[0].end_seq = end_seq;
4512 goto end;
4513 }
4514
4515 /* Find place to insert this segment. */
4516 while (1) {
4517 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4518 break;
4519 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4520 skb1 = NULL;
4521 break;
4522 }
4523 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4524 }
4525
4526 /* Do skb overlap to previous one? */
4527 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4528 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4529 /* All the bits are present. Drop. */
4530 __kfree_skb(skb);
4531 skb = NULL;
4532 tcp_dsack_set(sk, seq, end_seq);
4533 goto add_sack;
4534 }
4535 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4536 /* Partial overlap. */
4537 tcp_dsack_set(sk, seq,
4538 TCP_SKB_CB(skb1)->end_seq);
4539 } else {
4540 if (skb_queue_is_first(&tp->out_of_order_queue,
4541 skb1))
4542 skb1 = NULL;
4543 else
4544 skb1 = skb_queue_prev(
4545 &tp->out_of_order_queue,
4546 skb1);
4547 }
4548 }
4549 if (!skb1)
4550 __skb_queue_head(&tp->out_of_order_queue, skb);
4551 else
4552 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4553
4554 /* And clean segments covered by new one as whole. */
4555 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4556 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4557
4558 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4559 break;
4560 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4561 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4562 end_seq);
4563 break;
4564 }
4565 __skb_unlink(skb1, &tp->out_of_order_queue);
4566 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4567 TCP_SKB_CB(skb1)->end_seq);
4568 __kfree_skb(skb1);
4569 }
4570
4571add_sack:
4572 if (tcp_is_sack(tp))
4573 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4574end:
4575 if (skb)
4576 skb_set_owner_r(skb, sk);
4577}
4578
4579
4440static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4580static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4441{ 4581{
4442 const struct tcphdr *th = tcp_hdr(skb); 4582 const struct tcphdr *th = tcp_hdr(skb);
@@ -4552,105 +4692,7 @@ drop:
4552 goto queue_and_out; 4692 goto queue_and_out;
4553 } 4693 }
4554 4694
4555 TCP_ECN_check_ce(tp, skb); 4695 tcp_data_queue_ofo(sk, skb);
4556
4557 if (tcp_try_rmem_schedule(sk, skb->truesize))
4558 goto drop;
4559
4560 /* Disable header prediction. */
4561 tp->pred_flags = 0;
4562 inet_csk_schedule_ack(sk);
4563
4564 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4565 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4566
4567 skb_set_owner_r(skb, sk);
4568
4569 if (!skb_peek(&tp->out_of_order_queue)) {
4570 /* Initial out of order segment, build 1 SACK. */
4571 if (tcp_is_sack(tp)) {
4572 tp->rx_opt.num_sacks = 1;
4573 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4574 tp->selective_acks[0].end_seq =
4575 TCP_SKB_CB(skb)->end_seq;
4576 }
4577 __skb_queue_head(&tp->out_of_order_queue, skb);
4578 } else {
4579 struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
4580 u32 seq = TCP_SKB_CB(skb)->seq;
4581 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4582
4583 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4584 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4585
4586 if (!tp->rx_opt.num_sacks ||
4587 tp->selective_acks[0].end_seq != seq)
4588 goto add_sack;
4589
4590 /* Common case: data arrive in order after hole. */
4591 tp->selective_acks[0].end_seq = end_seq;
4592 return;
4593 }
4594
4595 /* Find place to insert this segment. */
4596 while (1) {
4597 if (!after(TCP_SKB_CB(skb1)->seq, seq))
4598 break;
4599 if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
4600 skb1 = NULL;
4601 break;
4602 }
4603 skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
4604 }
4605
4606 /* Do skb overlap to previous one? */
4607 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4608 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4609 /* All the bits are present. Drop. */
4610 __kfree_skb(skb);
4611 tcp_dsack_set(sk, seq, end_seq);
4612 goto add_sack;
4613 }
4614 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4615 /* Partial overlap. */
4616 tcp_dsack_set(sk, seq,
4617 TCP_SKB_CB(skb1)->end_seq);
4618 } else {
4619 if (skb_queue_is_first(&tp->out_of_order_queue,
4620 skb1))
4621 skb1 = NULL;
4622 else
4623 skb1 = skb_queue_prev(
4624 &tp->out_of_order_queue,
4625 skb1);
4626 }
4627 }
4628 if (!skb1)
4629 __skb_queue_head(&tp->out_of_order_queue, skb);
4630 else
4631 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4632
4633 /* And clean segments covered by new one as whole. */
4634 while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
4635 skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
4636
4637 if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
4638 break;
4639 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4640 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4641 end_seq);
4642 break;
4643 }
4644 __skb_unlink(skb1, &tp->out_of_order_queue);
4645 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4646 TCP_SKB_CB(skb1)->end_seq);
4647 __kfree_skb(skb1);
4648 }
4649
4650add_sack:
4651 if (tcp_is_sack(tp))
4652 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4653 }
4654} 4696}
4655 4697
4656static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, 4698static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,