diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 264 |
1 files changed, 153 insertions, 111 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53c8ce4046b2..e886e2f7fa8d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -61,6 +61,8 @@ | |||
| 61 | * Pasi Sarolahti: F-RTO for dealing with spurious RTOs | 61 | * Pasi Sarolahti: F-RTO for dealing with spurious RTOs |
| 62 | */ | 62 | */ |
| 63 | 63 | ||
| 64 | #define pr_fmt(fmt) "TCP: " fmt | ||
| 65 | |||
| 64 | #include <linux/mm.h> | 66 | #include <linux/mm.h> |
| 65 | #include <linux/slab.h> | 67 | #include <linux/slab.h> |
| 66 | #include <linux/module.h> | 68 | #include <linux/module.h> |
| @@ -1403,8 +1405,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
| 1403 | 1405 | ||
| 1404 | BUG_ON(!pcount); | 1406 | BUG_ON(!pcount); |
| 1405 | 1407 | ||
| 1406 | /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ | 1408 | /* Adjust counters and hints for the newly sacked sequence |
| 1407 | if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) | 1409 | * range but discard the return value since prev is already |
| 1410 | * marked. We must tag the range first because the seq | ||
| 1411 | * advancement below implicitly advances | ||
| 1412 | * tcp_highest_sack_seq() when skb is highest_sack. | ||
| 1413 | */ | ||
| 1414 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | ||
| 1415 | start_seq, end_seq, dup_sack, pcount); | ||
| 1416 | |||
| 1417 | if (skb == tp->lost_skb_hint) | ||
| 1408 | tp->lost_cnt_hint += pcount; | 1418 | tp->lost_cnt_hint += pcount; |
| 1409 | 1419 | ||
| 1410 | TCP_SKB_CB(prev)->end_seq += shifted; | 1420 | TCP_SKB_CB(prev)->end_seq += shifted; |
| @@ -1430,12 +1440,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
| 1430 | skb_shinfo(skb)->gso_type = 0; | 1440 | skb_shinfo(skb)->gso_type = 0; |
| 1431 | } | 1441 | } |
| 1432 | 1442 | ||
| 1433 | /* Adjust counters and hints for the newly sacked sequence range but | ||
| 1434 | * discard the return value since prev is already marked. | ||
| 1435 | */ | ||
| 1436 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | ||
| 1437 | start_seq, end_seq, dup_sack, pcount); | ||
| 1438 | |||
| 1439 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1443 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
| 1440 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1444 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
| 1441 | 1445 | ||
| @@ -1583,6 +1587,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | |||
| 1583 | } | 1587 | } |
| 1584 | } | 1588 | } |
| 1585 | 1589 | ||
| 1590 | /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ | ||
| 1591 | if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) | ||
| 1592 | goto fallback; | ||
| 1593 | |||
| 1586 | if (!skb_shift(prev, skb, len)) | 1594 | if (!skb_shift(prev, skb, len)) |
| 1587 | goto fallback; | 1595 | goto fallback; |
| 1588 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) | 1596 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) |
| @@ -2567,6 +2575,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
| 2567 | 2575 | ||
| 2568 | if (cnt > packets) { | 2576 | if (cnt > packets) { |
| 2569 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || | 2577 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || |
| 2578 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || | ||
| 2570 | (oldcnt >= packets)) | 2579 | (oldcnt >= packets)) |
| 2571 | break; | 2580 | break; |
| 2572 | 2581 | ||
| @@ -3860,9 +3869,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
| 3860 | opt_rx->wscale_ok = 1; | 3869 | opt_rx->wscale_ok = 1; |
| 3861 | if (snd_wscale > 14) { | 3870 | if (snd_wscale > 14) { |
| 3862 | if (net_ratelimit()) | 3871 | if (net_ratelimit()) |
| 3863 | printk(KERN_INFO "tcp_parse_options: Illegal window " | 3872 | pr_info("%s: Illegal window scaling value %d >14 received\n", |
| 3864 | "scaling value %d >14 received.\n", | 3873 | __func__, |
| 3865 | snd_wscale); | 3874 | snd_wscale); |
| 3866 | snd_wscale = 14; | 3875 | snd_wscale = 14; |
| 3867 | } | 3876 | } |
| 3868 | opt_rx->snd_wscale = snd_wscale; | 3877 | opt_rx->snd_wscale = snd_wscale; |
| @@ -4184,7 +4193,7 @@ static void tcp_fin(struct sock *sk) | |||
| 4184 | /* Only TCP_LISTEN and TCP_CLOSE are left, in these | 4193 | /* Only TCP_LISTEN and TCP_CLOSE are left, in these |
| 4185 | * cases we should never reach this piece of code. | 4194 | * cases we should never reach this piece of code. |
| 4186 | */ | 4195 | */ |
| 4187 | printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", | 4196 | pr_err("%s: Impossible, sk->sk_state=%d\n", |
| 4188 | __func__, sk->sk_state); | 4197 | __func__, sk->sk_state); |
| 4189 | break; | 4198 | break; |
| 4190 | } | 4199 | } |
| @@ -4437,6 +4446,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | |||
| 4437 | return 0; | 4446 | return 0; |
| 4438 | } | 4447 | } |
| 4439 | 4448 | ||
| 4449 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||
| 4450 | { | ||
| 4451 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 4452 | struct sk_buff *skb1; | ||
| 4453 | u32 seq, end_seq; | ||
| 4454 | |||
| 4455 | TCP_ECN_check_ce(tp, skb); | ||
| 4456 | |||
| 4457 | if (tcp_try_rmem_schedule(sk, skb->truesize)) { | ||
| 4458 | /* TODO: should increment a counter */ | ||
| 4459 | __kfree_skb(skb); | ||
| 4460 | return; | ||
| 4461 | } | ||
| 4462 | |||
| 4463 | /* Disable header prediction. */ | ||
| 4464 | tp->pred_flags = 0; | ||
| 4465 | inet_csk_schedule_ack(sk); | ||
| 4466 | |||
| 4467 | SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", | ||
| 4468 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); | ||
| 4469 | |||
| 4470 | skb1 = skb_peek_tail(&tp->out_of_order_queue); | ||
| 4471 | if (!skb1) { | ||
| 4472 | /* Initial out of order segment, build 1 SACK. */ | ||
| 4473 | if (tcp_is_sack(tp)) { | ||
| 4474 | tp->rx_opt.num_sacks = 1; | ||
| 4475 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; | ||
| 4476 | tp->selective_acks[0].end_seq = | ||
| 4477 | TCP_SKB_CB(skb)->end_seq; | ||
| 4478 | } | ||
| 4479 | __skb_queue_head(&tp->out_of_order_queue, skb); | ||
| 4480 | goto end; | ||
| 4481 | } | ||
| 4482 | |||
| 4483 | seq = TCP_SKB_CB(skb)->seq; | ||
| 4484 | end_seq = TCP_SKB_CB(skb)->end_seq; | ||
| 4485 | |||
| 4486 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | ||
| 4487 | /* Packets in ofo can stay in queue a long time. | ||
| 4488 | * Better try to coalesce them right now | ||
| 4489 | * to avoid future tcp_collapse_ofo_queue(), | ||
| 4490 | * probably the most expensive function in tcp stack. | ||
| 4491 | */ | ||
| 4492 | if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { | ||
| 4493 | NET_INC_STATS_BH(sock_net(sk), | ||
| 4494 | LINUX_MIB_TCPRCVCOALESCE); | ||
| 4495 | BUG_ON(skb_copy_bits(skb, 0, | ||
| 4496 | skb_put(skb1, skb->len), | ||
| 4497 | skb->len)); | ||
| 4498 | TCP_SKB_CB(skb1)->end_seq = end_seq; | ||
| 4499 | TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; | ||
| 4500 | __kfree_skb(skb); | ||
| 4501 | skb = NULL; | ||
| 4502 | } else { | ||
| 4503 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | ||
| 4504 | } | ||
| 4505 | |||
| 4506 | if (!tp->rx_opt.num_sacks || | ||
| 4507 | tp->selective_acks[0].end_seq != seq) | ||
| 4508 | goto add_sack; | ||
| 4509 | |||
| 4510 | /* Common case: data arrive in order after hole. */ | ||
| 4511 | tp->selective_acks[0].end_seq = end_seq; | ||
| 4512 | goto end; | ||
| 4513 | } | ||
| 4514 | |||
| 4515 | /* Find place to insert this segment. */ | ||
| 4516 | while (1) { | ||
| 4517 | if (!after(TCP_SKB_CB(skb1)->seq, seq)) | ||
| 4518 | break; | ||
| 4519 | if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { | ||
| 4520 | skb1 = NULL; | ||
| 4521 | break; | ||
| 4522 | } | ||
| 4523 | skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); | ||
| 4524 | } | ||
| 4525 | |||
| 4526 | /* Do skb overlap to previous one? */ | ||
| 4527 | if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4528 | if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4529 | /* All the bits are present. Drop. */ | ||
| 4530 | __kfree_skb(skb); | ||
| 4531 | skb = NULL; | ||
| 4532 | tcp_dsack_set(sk, seq, end_seq); | ||
| 4533 | goto add_sack; | ||
| 4534 | } | ||
| 4535 | if (after(seq, TCP_SKB_CB(skb1)->seq)) { | ||
| 4536 | /* Partial overlap. */ | ||
| 4537 | tcp_dsack_set(sk, seq, | ||
| 4538 | TCP_SKB_CB(skb1)->end_seq); | ||
| 4539 | } else { | ||
| 4540 | if (skb_queue_is_first(&tp->out_of_order_queue, | ||
| 4541 | skb1)) | ||
| 4542 | skb1 = NULL; | ||
| 4543 | else | ||
| 4544 | skb1 = skb_queue_prev( | ||
| 4545 | &tp->out_of_order_queue, | ||
| 4546 | skb1); | ||
| 4547 | } | ||
| 4548 | } | ||
| 4549 | if (!skb1) | ||
| 4550 | __skb_queue_head(&tp->out_of_order_queue, skb); | ||
| 4551 | else | ||
| 4552 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | ||
| 4553 | |||
| 4554 | /* And clean segments covered by new one as whole. */ | ||
| 4555 | while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { | ||
| 4556 | skb1 = skb_queue_next(&tp->out_of_order_queue, skb); | ||
| 4557 | |||
| 4558 | if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) | ||
| 4559 | break; | ||
| 4560 | if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4561 | tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, | ||
| 4562 | end_seq); | ||
| 4563 | break; | ||
| 4564 | } | ||
| 4565 | __skb_unlink(skb1, &tp->out_of_order_queue); | ||
| 4566 | tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, | ||
| 4567 | TCP_SKB_CB(skb1)->end_seq); | ||
| 4568 | __kfree_skb(skb1); | ||
| 4569 | } | ||
| 4570 | |||
| 4571 | add_sack: | ||
| 4572 | if (tcp_is_sack(tp)) | ||
| 4573 | tcp_sack_new_ofo_skb(sk, seq, end_seq); | ||
| 4574 | end: | ||
| 4575 | if (skb) | ||
| 4576 | skb_set_owner_r(skb, sk); | ||
| 4577 | } | ||
| 4578 | |||
| 4579 | |||
| 4440 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 4580 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
| 4441 | { | 4581 | { |
| 4442 | const struct tcphdr *th = tcp_hdr(skb); | 4582 | const struct tcphdr *th = tcp_hdr(skb); |
| @@ -4552,105 +4692,7 @@ drop: | |||
| 4552 | goto queue_and_out; | 4692 | goto queue_and_out; |
| 4553 | } | 4693 | } |
| 4554 | 4694 | ||
| 4555 | TCP_ECN_check_ce(tp, skb); | 4695 | tcp_data_queue_ofo(sk, skb); |
| 4556 | |||
| 4557 | if (tcp_try_rmem_schedule(sk, skb->truesize)) | ||
| 4558 | goto drop; | ||
| 4559 | |||
| 4560 | /* Disable header prediction. */ | ||
| 4561 | tp->pred_flags = 0; | ||
| 4562 | inet_csk_schedule_ack(sk); | ||
| 4563 | |||
| 4564 | SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", | ||
| 4565 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); | ||
| 4566 | |||
| 4567 | skb_set_owner_r(skb, sk); | ||
| 4568 | |||
| 4569 | if (!skb_peek(&tp->out_of_order_queue)) { | ||
| 4570 | /* Initial out of order segment, build 1 SACK. */ | ||
| 4571 | if (tcp_is_sack(tp)) { | ||
| 4572 | tp->rx_opt.num_sacks = 1; | ||
| 4573 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; | ||
| 4574 | tp->selective_acks[0].end_seq = | ||
| 4575 | TCP_SKB_CB(skb)->end_seq; | ||
| 4576 | } | ||
| 4577 | __skb_queue_head(&tp->out_of_order_queue, skb); | ||
| 4578 | } else { | ||
| 4579 | struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); | ||
| 4580 | u32 seq = TCP_SKB_CB(skb)->seq; | ||
| 4581 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; | ||
| 4582 | |||
| 4583 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | ||
| 4584 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | ||
| 4585 | |||
| 4586 | if (!tp->rx_opt.num_sacks || | ||
| 4587 | tp->selective_acks[0].end_seq != seq) | ||
| 4588 | goto add_sack; | ||
| 4589 | |||
| 4590 | /* Common case: data arrive in order after hole. */ | ||
| 4591 | tp->selective_acks[0].end_seq = end_seq; | ||
| 4592 | return; | ||
| 4593 | } | ||
| 4594 | |||
| 4595 | /* Find place to insert this segment. */ | ||
| 4596 | while (1) { | ||
| 4597 | if (!after(TCP_SKB_CB(skb1)->seq, seq)) | ||
| 4598 | break; | ||
| 4599 | if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { | ||
| 4600 | skb1 = NULL; | ||
| 4601 | break; | ||
| 4602 | } | ||
| 4603 | skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); | ||
| 4604 | } | ||
| 4605 | |||
| 4606 | /* Do skb overlap to previous one? */ | ||
| 4607 | if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4608 | if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4609 | /* All the bits are present. Drop. */ | ||
| 4610 | __kfree_skb(skb); | ||
| 4611 | tcp_dsack_set(sk, seq, end_seq); | ||
| 4612 | goto add_sack; | ||
| 4613 | } | ||
| 4614 | if (after(seq, TCP_SKB_CB(skb1)->seq)) { | ||
| 4615 | /* Partial overlap. */ | ||
| 4616 | tcp_dsack_set(sk, seq, | ||
| 4617 | TCP_SKB_CB(skb1)->end_seq); | ||
| 4618 | } else { | ||
| 4619 | if (skb_queue_is_first(&tp->out_of_order_queue, | ||
| 4620 | skb1)) | ||
| 4621 | skb1 = NULL; | ||
| 4622 | else | ||
| 4623 | skb1 = skb_queue_prev( | ||
| 4624 | &tp->out_of_order_queue, | ||
| 4625 | skb1); | ||
| 4626 | } | ||
| 4627 | } | ||
| 4628 | if (!skb1) | ||
| 4629 | __skb_queue_head(&tp->out_of_order_queue, skb); | ||
| 4630 | else | ||
| 4631 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | ||
| 4632 | |||
| 4633 | /* And clean segments covered by new one as whole. */ | ||
| 4634 | while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { | ||
| 4635 | skb1 = skb_queue_next(&tp->out_of_order_queue, skb); | ||
| 4636 | |||
| 4637 | if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) | ||
| 4638 | break; | ||
| 4639 | if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { | ||
| 4640 | tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, | ||
| 4641 | end_seq); | ||
| 4642 | break; | ||
| 4643 | } | ||
| 4644 | __skb_unlink(skb1, &tp->out_of_order_queue); | ||
| 4645 | tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, | ||
| 4646 | TCP_SKB_CB(skb1)->end_seq); | ||
| 4647 | __kfree_skb(skb1); | ||
| 4648 | } | ||
| 4649 | |||
| 4650 | add_sack: | ||
| 4651 | if (tcp_is_sack(tp)) | ||
| 4652 | tcp_sack_new_ofo_skb(sk, seq, end_seq); | ||
| 4653 | } | ||
| 4654 | } | 4696 | } |
| 4655 | 4697 | ||
| 4656 | static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, | 4698 | static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, |
