diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 132 |
1 files changed, 109 insertions, 23 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index de47038afdf0..efc6fef692ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -423,7 +423,7 @@ EXPORT_SYMBOL(tcp_req_err); | |||
423 | * | 423 | * |
424 | */ | 424 | */ |
425 | 425 | ||
426 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | 426 | int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) |
427 | { | 427 | { |
428 | const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; | 428 | const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; |
429 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); | 429 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
@@ -446,20 +446,21 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
446 | inet_iif(icmp_skb), 0); | 446 | inet_iif(icmp_skb), 0); |
447 | if (!sk) { | 447 | if (!sk) { |
448 | __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); | 448 | __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); |
449 | return; | 449 | return -ENOENT; |
450 | } | 450 | } |
451 | if (sk->sk_state == TCP_TIME_WAIT) { | 451 | if (sk->sk_state == TCP_TIME_WAIT) { |
452 | inet_twsk_put(inet_twsk(sk)); | 452 | inet_twsk_put(inet_twsk(sk)); |
453 | return; | 453 | return 0; |
454 | } | 454 | } |
455 | seq = ntohl(th->seq); | 455 | seq = ntohl(th->seq); |
456 | if (sk->sk_state == TCP_NEW_SYN_RECV) | 456 | if (sk->sk_state == TCP_NEW_SYN_RECV) { |
457 | return tcp_req_err(sk, seq, | 457 | tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB || |
458 | type == ICMP_PARAMETERPROB || | 458 | type == ICMP_TIME_EXCEEDED || |
459 | type == ICMP_TIME_EXCEEDED || | 459 | (type == ICMP_DEST_UNREACH && |
460 | (type == ICMP_DEST_UNREACH && | 460 | (code == ICMP_NET_UNREACH || |
461 | (code == ICMP_NET_UNREACH || | 461 | code == ICMP_HOST_UNREACH))); |
462 | code == ICMP_HOST_UNREACH))); | 462 | return 0; |
463 | } | ||
463 | 464 | ||
464 | bh_lock_sock(sk); | 465 | bh_lock_sock(sk); |
465 | /* If too many ICMPs get dropped on busy | 466 | /* If too many ICMPs get dropped on busy |
@@ -541,7 +542,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
541 | icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); | 542 | icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); |
542 | 543 | ||
543 | skb = tcp_rtx_queue_head(sk); | 544 | skb = tcp_rtx_queue_head(sk); |
544 | BUG_ON(!skb); | ||
545 | 545 | ||
546 | tcp_mstamp_refresh(tp); | 546 | tcp_mstamp_refresh(tp); |
547 | delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); | 547 | delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); |
@@ -613,6 +613,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
613 | out: | 613 | out: |
614 | bh_unlock_sock(sk); | 614 | bh_unlock_sock(sk); |
615 | sock_put(sk); | 615 | sock_put(sk); |
616 | return 0; | ||
616 | } | 617 | } |
617 | 618 | ||
618 | void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) | 619 | void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) |
@@ -969,10 +970,13 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
969 | * We need to maintain these in the sk structure. | 970 | * We need to maintain these in the sk structure. |
970 | */ | 971 | */ |
971 | 972 | ||
973 | struct static_key tcp_md5_needed __read_mostly; | ||
974 | EXPORT_SYMBOL(tcp_md5_needed); | ||
975 | |||
972 | /* Find the Key structure for an address. */ | 976 | /* Find the Key structure for an address. */ |
973 | struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, | 977 | struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, |
974 | const union tcp_md5_addr *addr, | 978 | const union tcp_md5_addr *addr, |
975 | int family) | 979 | int family) |
976 | { | 980 | { |
977 | const struct tcp_sock *tp = tcp_sk(sk); | 981 | const struct tcp_sock *tp = tcp_sk(sk); |
978 | struct tcp_md5sig_key *key; | 982 | struct tcp_md5sig_key *key; |
@@ -1010,7 +1014,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, | |||
1010 | } | 1014 | } |
1011 | return best_match; | 1015 | return best_match; |
1012 | } | 1016 | } |
1013 | EXPORT_SYMBOL(tcp_md5_do_lookup); | 1017 | EXPORT_SYMBOL(__tcp_md5_do_lookup); |
1014 | 1018 | ||
1015 | static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, | 1019 | static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, |
1016 | const union tcp_md5_addr *addr, | 1020 | const union tcp_md5_addr *addr, |
@@ -1618,12 +1622,14 @@ int tcp_v4_early_demux(struct sk_buff *skb) | |||
1618 | bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) | 1622 | bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) |
1619 | { | 1623 | { |
1620 | u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; | 1624 | u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; |
1621 | 1625 | struct skb_shared_info *shinfo; | |
1622 | /* Only socket owner can try to collapse/prune rx queues | 1626 | const struct tcphdr *th; |
1623 | * to reduce memory overhead, so add a little headroom here. | 1627 | struct tcphdr *thtail; |
1624 | * Few sockets backlog are possibly concurrently non empty. | 1628 | struct sk_buff *tail; |
1625 | */ | 1629 | unsigned int hdrlen; |
1626 | limit += 64*1024; | 1630 | bool fragstolen; |
1631 | u32 gso_segs; | ||
1632 | int delta; | ||
1627 | 1633 | ||
1628 | /* In case all data was pulled from skb frags (in __pskb_pull_tail()), | 1634 | /* In case all data was pulled from skb frags (in __pskb_pull_tail()), |
1629 | * we can fix skb->truesize to its real value to avoid future drops. | 1635 | * we can fix skb->truesize to its real value to avoid future drops. |
@@ -1633,6 +1639,86 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) | |||
1633 | */ | 1639 | */ |
1634 | skb_condense(skb); | 1640 | skb_condense(skb); |
1635 | 1641 | ||
1642 | skb_dst_drop(skb); | ||
1643 | |||
1644 | if (unlikely(tcp_checksum_complete(skb))) { | ||
1645 | bh_unlock_sock(sk); | ||
1646 | __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); | ||
1647 | __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); | ||
1648 | return true; | ||
1649 | } | ||
1650 | |||
1651 | /* Attempt coalescing to last skb in backlog, even if we are | ||
1652 | * above the limits. | ||
1653 | * This is okay because skb capacity is limited to MAX_SKB_FRAGS. | ||
1654 | */ | ||
1655 | th = (const struct tcphdr *)skb->data; | ||
1656 | hdrlen = th->doff * 4; | ||
1657 | shinfo = skb_shinfo(skb); | ||
1658 | |||
1659 | if (!shinfo->gso_size) | ||
1660 | shinfo->gso_size = skb->len - hdrlen; | ||
1661 | |||
1662 | if (!shinfo->gso_segs) | ||
1663 | shinfo->gso_segs = 1; | ||
1664 | |||
1665 | tail = sk->sk_backlog.tail; | ||
1666 | if (!tail) | ||
1667 | goto no_coalesce; | ||
1668 | thtail = (struct tcphdr *)tail->data; | ||
1669 | |||
1670 | if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || | ||
1671 | TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || | ||
1672 | ((TCP_SKB_CB(tail)->tcp_flags | | ||
1673 | TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || | ||
1674 | ((TCP_SKB_CB(tail)->tcp_flags ^ | ||
1675 | TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || | ||
1676 | #ifdef CONFIG_TLS_DEVICE | ||
1677 | tail->decrypted != skb->decrypted || | ||
1678 | #endif | ||
1679 | thtail->doff != th->doff || | ||
1680 | memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) | ||
1681 | goto no_coalesce; | ||
1682 | |||
1683 | __skb_pull(skb, hdrlen); | ||
1684 | if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { | ||
1685 | thtail->window = th->window; | ||
1686 | |||
1687 | TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; | ||
1688 | |||
1689 | if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) | ||
1690 | TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; | ||
1691 | |||
1692 | TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; | ||
1693 | |||
1694 | if (TCP_SKB_CB(skb)->has_rxtstamp) { | ||
1695 | TCP_SKB_CB(tail)->has_rxtstamp = true; | ||
1696 | tail->tstamp = skb->tstamp; | ||
1697 | skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp; | ||
1698 | } | ||
1699 | |||
1700 | /* Not as strict as GRO. We only need to carry mss max value */ | ||
1701 | skb_shinfo(tail)->gso_size = max(shinfo->gso_size, | ||
1702 | skb_shinfo(tail)->gso_size); | ||
1703 | |||
1704 | gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs; | ||
1705 | skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | ||
1706 | |||
1707 | sk->sk_backlog.len += delta; | ||
1708 | __NET_INC_STATS(sock_net(sk), | ||
1709 | LINUX_MIB_TCPBACKLOGCOALESCE); | ||
1710 | kfree_skb_partial(skb, fragstolen); | ||
1711 | return false; | ||
1712 | } | ||
1713 | __skb_push(skb, hdrlen); | ||
1714 | |||
1715 | no_coalesce: | ||
1716 | /* Only socket owner can try to collapse/prune rx queues | ||
1717 | * to reduce memory overhead, so add a little headroom here. | ||
1718 | * Few sockets backlog are possibly concurrently non empty. | ||
1719 | */ | ||
1720 | limit += 64*1024; | ||
1721 | |||
1636 | if (unlikely(sk_add_backlog(sk, skb, limit))) { | 1722 | if (unlikely(sk_add_backlog(sk, skb, limit))) { |
1637 | bh_unlock_sock(sk); | 1723 | bh_unlock_sock(sk); |
1638 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); | 1724 | __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); |
@@ -2573,8 +2659,8 @@ static int __net_init tcp_sk_init(struct net *net) | |||
2573 | * which are too large can cause TCP streams to be bursty. | 2659 | * which are too large can cause TCP streams to be bursty. |
2574 | */ | 2660 | */ |
2575 | net->ipv4.sysctl_tcp_tso_win_divisor = 3; | 2661 | net->ipv4.sysctl_tcp_tso_win_divisor = 3; |
2576 | /* Default TSQ limit of four TSO segments */ | 2662 | /* Default TSQ limit of 16 TSO segments */ |
2577 | net->ipv4.sysctl_tcp_limit_output_bytes = 262144; | 2663 | net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536; |
2578 | /* rfc5961 challenge ack rate limiting */ | 2664 | /* rfc5961 challenge ack rate limiting */ |
2579 | net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; | 2665 | net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; |
2580 | net->ipv4.sysctl_tcp_min_tso_segs = 2; | 2666 | net->ipv4.sysctl_tcp_min_tso_segs = 2; |