diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 310 |
1 files changed, 180 insertions, 130 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a7c41fbc6d3..65caf8b95e17 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -318,36 +318,56 @@ static u16 tcp_select_window(struct sock *sk) | |||
318 | } | 318 | } |
319 | 319 | ||
320 | /* Packet ECN state for a SYN-ACK */ | 320 | /* Packet ECN state for a SYN-ACK */ |
321 | static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) | 321 | static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) |
322 | { | 322 | { |
323 | const struct tcp_sock *tp = tcp_sk(sk); | ||
324 | |||
323 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; | 325 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; |
324 | if (!(tp->ecn_flags & TCP_ECN_OK)) | 326 | if (!(tp->ecn_flags & TCP_ECN_OK)) |
325 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; | 327 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; |
328 | else if (tcp_ca_needs_ecn(sk)) | ||
329 | INET_ECN_xmit(sk); | ||
326 | } | 330 | } |
327 | 331 | ||
328 | /* Packet ECN state for a SYN. */ | 332 | /* Packet ECN state for a SYN. */ |
329 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | 333 | static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) |
330 | { | 334 | { |
331 | struct tcp_sock *tp = tcp_sk(sk); | 335 | struct tcp_sock *tp = tcp_sk(sk); |
336 | bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || | ||
337 | tcp_ca_needs_ecn(sk); | ||
338 | |||
339 | if (!use_ecn) { | ||
340 | const struct dst_entry *dst = __sk_dst_get(sk); | ||
341 | |||
342 | if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) | ||
343 | use_ecn = true; | ||
344 | } | ||
332 | 345 | ||
333 | tp->ecn_flags = 0; | 346 | tp->ecn_flags = 0; |
334 | if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { | 347 | |
348 | if (use_ecn) { | ||
335 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; | 349 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; |
336 | tp->ecn_flags = TCP_ECN_OK; | 350 | tp->ecn_flags = TCP_ECN_OK; |
351 | if (tcp_ca_needs_ecn(sk)) | ||
352 | INET_ECN_xmit(sk); | ||
337 | } | 353 | } |
338 | } | 354 | } |
339 | 355 | ||
340 | static __inline__ void | 356 | static void |
341 | TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) | 357 | tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, |
358 | struct sock *sk) | ||
342 | { | 359 | { |
343 | if (inet_rsk(req)->ecn_ok) | 360 | if (inet_rsk(req)->ecn_ok) { |
344 | th->ece = 1; | 361 | th->ece = 1; |
362 | if (tcp_ca_needs_ecn(sk)) | ||
363 | INET_ECN_xmit(sk); | ||
364 | } | ||
345 | } | 365 | } |
346 | 366 | ||
347 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to | 367 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to |
348 | * be sent. | 368 | * be sent. |
349 | */ | 369 | */ |
350 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | 370 | static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, |
351 | int tcp_header_len) | 371 | int tcp_header_len) |
352 | { | 372 | { |
353 | struct tcp_sock *tp = tcp_sk(sk); | 373 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -362,7 +382,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | |||
362 | tcp_hdr(skb)->cwr = 1; | 382 | tcp_hdr(skb)->cwr = 1; |
363 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; | 383 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; |
364 | } | 384 | } |
365 | } else { | 385 | } else if (!tcp_ca_needs_ecn(sk)) { |
366 | /* ACK or retransmitted segment: clear ECT|CE */ | 386 | /* ACK or retransmitted segment: clear ECT|CE */ |
367 | INET_ECN_dontxmit(sk); | 387 | INET_ECN_dontxmit(sk); |
368 | } | 388 | } |
@@ -384,7 +404,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
384 | TCP_SKB_CB(skb)->tcp_flags = flags; | 404 | TCP_SKB_CB(skb)->tcp_flags = flags; |
385 | TCP_SKB_CB(skb)->sacked = 0; | 405 | TCP_SKB_CB(skb)->sacked = 0; |
386 | 406 | ||
387 | shinfo->gso_segs = 1; | 407 | tcp_skb_pcount_set(skb, 1); |
388 | shinfo->gso_size = 0; | 408 | shinfo->gso_size = 0; |
389 | shinfo->gso_type = 0; | 409 | shinfo->gso_type = 0; |
390 | 410 | ||
@@ -550,7 +570,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
550 | 570 | ||
551 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { | 571 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { |
552 | opts->options |= OPTION_TS; | 572 | opts->options |= OPTION_TS; |
553 | opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; | 573 | opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; |
554 | opts->tsecr = tp->rx_opt.ts_recent; | 574 | opts->tsecr = tp->rx_opt.ts_recent; |
555 | remaining -= TCPOLEN_TSTAMP_ALIGNED; | 575 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
556 | } | 576 | } |
@@ -618,7 +638,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
618 | } | 638 | } |
619 | if (likely(ireq->tstamp_ok)) { | 639 | if (likely(ireq->tstamp_ok)) { |
620 | opts->options |= OPTION_TS; | 640 | opts->options |= OPTION_TS; |
621 | opts->tsval = TCP_SKB_CB(skb)->when; | 641 | opts->tsval = tcp_skb_timestamp(skb); |
622 | opts->tsecr = req->ts_recent; | 642 | opts->tsecr = req->ts_recent; |
623 | remaining -= TCPOLEN_TSTAMP_ALIGNED; | 643 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
624 | } | 644 | } |
@@ -647,7 +667,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
647 | struct tcp_out_options *opts, | 667 | struct tcp_out_options *opts, |
648 | struct tcp_md5sig_key **md5) | 668 | struct tcp_md5sig_key **md5) |
649 | { | 669 | { |
650 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | ||
651 | struct tcp_sock *tp = tcp_sk(sk); | 670 | struct tcp_sock *tp = tcp_sk(sk); |
652 | unsigned int size = 0; | 671 | unsigned int size = 0; |
653 | unsigned int eff_sacks; | 672 | unsigned int eff_sacks; |
@@ -666,7 +685,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
666 | 685 | ||
667 | if (likely(tp->rx_opt.tstamp_ok)) { | 686 | if (likely(tp->rx_opt.tstamp_ok)) { |
668 | opts->options |= OPTION_TS; | 687 | opts->options |= OPTION_TS; |
669 | opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; | 688 | opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; |
670 | opts->tsecr = tp->rx_opt.ts_recent; | 689 | opts->tsecr = tp->rx_opt.ts_recent; |
671 | size += TCPOLEN_TSTAMP_ALIGNED; | 690 | size += TCPOLEN_TSTAMP_ALIGNED; |
672 | } | 691 | } |
@@ -829,26 +848,38 @@ void tcp_wfree(struct sk_buff *skb) | |||
829 | { | 848 | { |
830 | struct sock *sk = skb->sk; | 849 | struct sock *sk = skb->sk; |
831 | struct tcp_sock *tp = tcp_sk(sk); | 850 | struct tcp_sock *tp = tcp_sk(sk); |
851 | int wmem; | ||
852 | |||
853 | /* Keep one reference on sk_wmem_alloc. | ||
854 | * Will be released by sk_free() from here or tcp_tasklet_func() | ||
855 | */ | ||
856 | wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
857 | |||
858 | /* If this softirq is serviced by ksoftirqd, we are likely under stress. | ||
859 | * Wait until our queues (qdisc + devices) are drained. | ||
860 | * This gives : | ||
861 | * - less callbacks to tcp_write_xmit(), reducing stress (batches) | ||
862 | * - chance for incoming ACK (processed by another cpu maybe) | ||
863 | * to migrate this flow (skb->ooo_okay will be eventually set) | ||
864 | */ | ||
865 | if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) | ||
866 | goto out; | ||
832 | 867 | ||
833 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && | 868 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && |
834 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { | 869 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { |
835 | unsigned long flags; | 870 | unsigned long flags; |
836 | struct tsq_tasklet *tsq; | 871 | struct tsq_tasklet *tsq; |
837 | 872 | ||
838 | /* Keep a ref on socket. | ||
839 | * This last ref will be released in tcp_tasklet_func() | ||
840 | */ | ||
841 | atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
842 | |||
843 | /* queue this socket to tasklet queue */ | 873 | /* queue this socket to tasklet queue */ |
844 | local_irq_save(flags); | 874 | local_irq_save(flags); |
845 | tsq = &__get_cpu_var(tsq_tasklet); | 875 | tsq = this_cpu_ptr(&tsq_tasklet); |
846 | list_add(&tp->tsq_node, &tsq->head); | 876 | list_add(&tp->tsq_node, &tsq->head); |
847 | tasklet_schedule(&tsq->tasklet); | 877 | tasklet_schedule(&tsq->tasklet); |
848 | local_irq_restore(flags); | 878 | local_irq_restore(flags); |
849 | } else { | 879 | return; |
850 | sock_wfree(skb); | ||
851 | } | 880 | } |
881 | out: | ||
882 | sk_free(sk); | ||
852 | } | 883 | } |
853 | 884 | ||
854 | /* This routine actually transmits TCP packets queued in by | 885 | /* This routine actually transmits TCP packets queued in by |
@@ -886,8 +917,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
886 | skb = skb_clone(skb, gfp_mask); | 917 | skb = skb_clone(skb, gfp_mask); |
887 | if (unlikely(!skb)) | 918 | if (unlikely(!skb)) |
888 | return -ENOBUFS; | 919 | return -ENOBUFS; |
889 | /* Our usage of tstamp should remain private */ | ||
890 | skb->tstamp.tv64 = 0; | ||
891 | } | 920 | } |
892 | 921 | ||
893 | inet = inet_sk(sk); | 922 | inet = inet_sk(sk); |
@@ -906,9 +935,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
906 | tcp_ca_event(sk, CA_EVENT_TX_START); | 935 | tcp_ca_event(sk, CA_EVENT_TX_START); |
907 | 936 | ||
908 | /* if no packet is in qdisc/device queue, then allow XPS to select | 937 | /* if no packet is in qdisc/device queue, then allow XPS to select |
909 | * another queue. | 938 | * another queue. We can be called from tcp_tsq_handler() |
939 | * which holds one reference to sk_wmem_alloc. | ||
940 | * | ||
941 | * TODO: Ideally, in-flight pure ACK packets should not matter here. | ||
942 | * One way to get this would be to set skb->truesize = 2 on them. | ||
910 | */ | 943 | */ |
911 | skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; | 944 | skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
912 | 945 | ||
913 | skb_push(skb, tcp_header_size); | 946 | skb_push(skb, tcp_header_size); |
914 | skb_reset_transport_header(skb); | 947 | skb_reset_transport_header(skb); |
@@ -952,7 +985,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
952 | 985 | ||
953 | tcp_options_write((__be32 *)(th + 1), tp, &opts); | 986 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
954 | if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) | 987 | if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) |
955 | TCP_ECN_send(sk, skb, tcp_header_size); | 988 | tcp_ecn_send(sk, skb, tcp_header_size); |
956 | 989 | ||
957 | #ifdef CONFIG_TCP_MD5SIG | 990 | #ifdef CONFIG_TCP_MD5SIG |
958 | /* Calculate the MD5 hash, as we have all we need now */ | 991 | /* Calculate the MD5 hash, as we have all we need now */ |
@@ -975,7 +1008,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
975 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, | 1008 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, |
976 | tcp_skb_pcount(skb)); | 1009 | tcp_skb_pcount(skb)); |
977 | 1010 | ||
1011 | /* OK, its time to fill skb_shinfo(skb)->gso_segs */ | ||
1012 | skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); | ||
1013 | |||
1014 | /* Our usage of tstamp should remain private */ | ||
1015 | skb->tstamp.tv64 = 0; | ||
1016 | |||
1017 | /* Cleanup our debris for IP stacks */ | ||
1018 | memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), | ||
1019 | sizeof(struct inet6_skb_parm))); | ||
1020 | |||
978 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); | 1021 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
1022 | |||
979 | if (likely(err <= 0)) | 1023 | if (likely(err <= 0)) |
980 | return err; | 1024 | return err; |
981 | 1025 | ||
@@ -995,7 +1039,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
995 | 1039 | ||
996 | /* Advance write_seq and place onto the write_queue. */ | 1040 | /* Advance write_seq and place onto the write_queue. */ |
997 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; | 1041 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; |
998 | skb_header_release(skb); | 1042 | __skb_header_release(skb); |
999 | tcp_add_write_queue_tail(sk, skb); | 1043 | tcp_add_write_queue_tail(sk, skb); |
1000 | sk->sk_wmem_queued += skb->truesize; | 1044 | sk->sk_wmem_queued += skb->truesize; |
1001 | sk_mem_charge(sk, skb->truesize); | 1045 | sk_mem_charge(sk, skb->truesize); |
@@ -1014,11 +1058,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
1014 | /* Avoid the costly divide in the normal | 1058 | /* Avoid the costly divide in the normal |
1015 | * non-TSO case. | 1059 | * non-TSO case. |
1016 | */ | 1060 | */ |
1017 | shinfo->gso_segs = 1; | 1061 | tcp_skb_pcount_set(skb, 1); |
1018 | shinfo->gso_size = 0; | 1062 | shinfo->gso_size = 0; |
1019 | shinfo->gso_type = 0; | 1063 | shinfo->gso_type = 0; |
1020 | } else { | 1064 | } else { |
1021 | shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); | 1065 | tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); |
1022 | shinfo->gso_size = mss_now; | 1066 | shinfo->gso_size = mss_now; |
1023 | shinfo->gso_type = sk->sk_gso_type; | 1067 | shinfo->gso_type = sk->sk_gso_type; |
1024 | } | 1068 | } |
@@ -1146,10 +1190,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1146 | 1190 | ||
1147 | buff->ip_summed = skb->ip_summed; | 1191 | buff->ip_summed = skb->ip_summed; |
1148 | 1192 | ||
1149 | /* Looks stupid, but our code really uses when of | ||
1150 | * skbs, which it never sent before. --ANK | ||
1151 | */ | ||
1152 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; | ||
1153 | buff->tstamp = skb->tstamp; | 1193 | buff->tstamp = skb->tstamp; |
1154 | tcp_fragment_tstamp(skb, buff); | 1194 | tcp_fragment_tstamp(skb, buff); |
1155 | 1195 | ||
@@ -1171,7 +1211,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1171 | } | 1211 | } |
1172 | 1212 | ||
1173 | /* Link BUFF into the send queue. */ | 1213 | /* Link BUFF into the send queue. */ |
1174 | skb_header_release(buff); | 1214 | __skb_header_release(buff); |
1175 | tcp_insert_write_queue_after(skb, buff, sk); | 1215 | tcp_insert_write_queue_after(skb, buff, sk); |
1176 | 1216 | ||
1177 | return 0; | 1217 | return 0; |
@@ -1484,6 +1524,27 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, | |||
1484 | ((nonagle & TCP_NAGLE_CORK) || | 1524 | ((nonagle & TCP_NAGLE_CORK) || |
1485 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | 1525 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); |
1486 | } | 1526 | } |
1527 | |||
1528 | /* Return how many segs we'd like on a TSO packet, | ||
1529 | * to send one TSO packet per ms | ||
1530 | */ | ||
1531 | static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) | ||
1532 | { | ||
1533 | u32 bytes, segs; | ||
1534 | |||
1535 | bytes = min(sk->sk_pacing_rate >> 10, | ||
1536 | sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); | ||
1537 | |||
1538 | /* Goal is to send at least one packet per ms, | ||
1539 | * not one big TSO packet every 100 ms. | ||
1540 | * This preserves ACK clocking and is consistent | ||
1541 | * with tcp_tso_should_defer() heuristic. | ||
1542 | */ | ||
1543 | segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); | ||
1544 | |||
1545 | return min_t(u32, segs, sk->sk_gso_max_segs); | ||
1546 | } | ||
1547 | |||
1487 | /* Returns the portion of skb which can be sent right away */ | 1548 | /* Returns the portion of skb which can be sent right away */ |
1488 | static unsigned int tcp_mss_split_point(const struct sock *sk, | 1549 | static unsigned int tcp_mss_split_point(const struct sock *sk, |
1489 | const struct sk_buff *skb, | 1550 | const struct sk_buff *skb, |
@@ -1522,7 +1583,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, | |||
1522 | static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, | 1583 | static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, |
1523 | const struct sk_buff *skb) | 1584 | const struct sk_buff *skb) |
1524 | { | 1585 | { |
1525 | u32 in_flight, cwnd; | 1586 | u32 in_flight, cwnd, halfcwnd; |
1526 | 1587 | ||
1527 | /* Don't be strict about the congestion window for the final FIN. */ | 1588 | /* Don't be strict about the congestion window for the final FIN. */ |
1528 | if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && | 1589 | if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && |
@@ -1531,10 +1592,14 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, | |||
1531 | 1592 | ||
1532 | in_flight = tcp_packets_in_flight(tp); | 1593 | in_flight = tcp_packets_in_flight(tp); |
1533 | cwnd = tp->snd_cwnd; | 1594 | cwnd = tp->snd_cwnd; |
1534 | if (in_flight < cwnd) | 1595 | if (in_flight >= cwnd) |
1535 | return (cwnd - in_flight); | 1596 | return 0; |
1536 | 1597 | ||
1537 | return 0; | 1598 | /* For better scheduling, ensure we have at least |
1599 | * 2 GSO packets in flight. | ||
1600 | */ | ||
1601 | halfcwnd = max(cwnd >> 1, 1U); | ||
1602 | return min(halfcwnd, cwnd - in_flight); | ||
1538 | } | 1603 | } |
1539 | 1604 | ||
1540 | /* Initialize TSO state of a skb. | 1605 | /* Initialize TSO state of a skb. |
@@ -1675,7 +1740,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1675 | tcp_set_skb_tso_segs(sk, buff, mss_now); | 1740 | tcp_set_skb_tso_segs(sk, buff, mss_now); |
1676 | 1741 | ||
1677 | /* Link BUFF into the send queue. */ | 1742 | /* Link BUFF into the send queue. */ |
1678 | skb_header_release(buff); | 1743 | __skb_header_release(buff); |
1679 | tcp_insert_write_queue_after(skb, buff, sk); | 1744 | tcp_insert_write_queue_after(skb, buff, sk); |
1680 | 1745 | ||
1681 | return 0; | 1746 | return 0; |
@@ -1687,7 +1752,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1687 | * This algorithm is from John Heffner. | 1752 | * This algorithm is from John Heffner. |
1688 | */ | 1753 | */ |
1689 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | 1754 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, |
1690 | bool *is_cwnd_limited) | 1755 | bool *is_cwnd_limited, u32 max_segs) |
1691 | { | 1756 | { |
1692 | struct tcp_sock *tp = tcp_sk(sk); | 1757 | struct tcp_sock *tp = tcp_sk(sk); |
1693 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1758 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1717,8 +1782,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1717 | limit = min(send_win, cong_win); | 1782 | limit = min(send_win, cong_win); |
1718 | 1783 | ||
1719 | /* If a full-sized TSO skb can be sent, do it. */ | 1784 | /* If a full-sized TSO skb can be sent, do it. */ |
1720 | if (limit >= min_t(unsigned int, sk->sk_gso_max_size, | 1785 | if (limit >= max_segs * tp->mss_cache) |
1721 | tp->xmit_size_goal_segs * tp->mss_cache)) | ||
1722 | goto send_now; | 1786 | goto send_now; |
1723 | 1787 | ||
1724 | /* Middle in queue won't get any more data, full sendable already? */ | 1788 | /* Middle in queue won't get any more data, full sendable already? */ |
@@ -1874,8 +1938,8 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1874 | tcp_init_tso_segs(sk, nskb, nskb->len); | 1938 | tcp_init_tso_segs(sk, nskb, nskb->len); |
1875 | 1939 | ||
1876 | /* We're ready to send. If this fails, the probe will | 1940 | /* We're ready to send. If this fails, the probe will |
1877 | * be resegmented into mss-sized pieces by tcp_write_xmit(). */ | 1941 | * be resegmented into mss-sized pieces by tcp_write_xmit(). |
1878 | TCP_SKB_CB(nskb)->when = tcp_time_stamp; | 1942 | */ |
1879 | if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { | 1943 | if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { |
1880 | /* Decrement cwnd here because we are sending | 1944 | /* Decrement cwnd here because we are sending |
1881 | * effectively two packets. */ | 1945 | * effectively two packets. */ |
@@ -1915,6 +1979,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1915 | int cwnd_quota; | 1979 | int cwnd_quota; |
1916 | int result; | 1980 | int result; |
1917 | bool is_cwnd_limited = false; | 1981 | bool is_cwnd_limited = false; |
1982 | u32 max_segs; | ||
1918 | 1983 | ||
1919 | sent_pkts = 0; | 1984 | sent_pkts = 0; |
1920 | 1985 | ||
@@ -1928,6 +1993,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1928 | } | 1993 | } |
1929 | } | 1994 | } |
1930 | 1995 | ||
1996 | max_segs = tcp_tso_autosize(sk, mss_now); | ||
1931 | while ((skb = tcp_send_head(sk))) { | 1997 | while ((skb = tcp_send_head(sk))) { |
1932 | unsigned int limit; | 1998 | unsigned int limit; |
1933 | 1999 | ||
@@ -1935,8 +2001,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1935 | BUG_ON(!tso_segs); | 2001 | BUG_ON(!tso_segs); |
1936 | 2002 | ||
1937 | if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { | 2003 | if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { |
1938 | /* "when" is used as a start point for the retransmit timer */ | 2004 | /* "skb_mstamp" is used as a start point for the retransmit timer */ |
1939 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2005 | skb_mstamp_get(&skb->skb_mstamp); |
1940 | goto repair; /* Skip network transmission */ | 2006 | goto repair; /* Skip network transmission */ |
1941 | } | 2007 | } |
1942 | 2008 | ||
@@ -1953,17 +2019,30 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1953 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) | 2019 | if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) |
1954 | break; | 2020 | break; |
1955 | 2021 | ||
1956 | if (tso_segs == 1) { | 2022 | if (tso_segs == 1 || !max_segs) { |
1957 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, | 2023 | if (unlikely(!tcp_nagle_test(tp, skb, mss_now, |
1958 | (tcp_skb_is_last(sk, skb) ? | 2024 | (tcp_skb_is_last(sk, skb) ? |
1959 | nonagle : TCP_NAGLE_PUSH)))) | 2025 | nonagle : TCP_NAGLE_PUSH)))) |
1960 | break; | 2026 | break; |
1961 | } else { | 2027 | } else { |
1962 | if (!push_one && | 2028 | if (!push_one && |
1963 | tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) | 2029 | tcp_tso_should_defer(sk, skb, &is_cwnd_limited, |
2030 | max_segs)) | ||
1964 | break; | 2031 | break; |
1965 | } | 2032 | } |
1966 | 2033 | ||
2034 | limit = mss_now; | ||
2035 | if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) | ||
2036 | limit = tcp_mss_split_point(sk, skb, mss_now, | ||
2037 | min_t(unsigned int, | ||
2038 | cwnd_quota, | ||
2039 | max_segs), | ||
2040 | nonagle); | ||
2041 | |||
2042 | if (skb->len > limit && | ||
2043 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | ||
2044 | break; | ||
2045 | |||
1967 | /* TCP Small Queues : | 2046 | /* TCP Small Queues : |
1968 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. | 2047 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
1969 | * This allows for : | 2048 | * This allows for : |
@@ -1974,8 +2053,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1974 | * of queued bytes to ensure line rate. | 2053 | * of queued bytes to ensure line rate. |
1975 | * One example is wifi aggregation (802.11 AMPDU) | 2054 | * One example is wifi aggregation (802.11 AMPDU) |
1976 | */ | 2055 | */ |
1977 | limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, | 2056 | limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); |
1978 | sk->sk_pacing_rate >> 10); | 2057 | limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); |
1979 | 2058 | ||
1980 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 2059 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
1981 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 2060 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
@@ -1988,20 +2067,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1988 | break; | 2067 | break; |
1989 | } | 2068 | } |
1990 | 2069 | ||
1991 | limit = mss_now; | ||
1992 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | ||
1993 | limit = tcp_mss_split_point(sk, skb, mss_now, | ||
1994 | min_t(unsigned int, | ||
1995 | cwnd_quota, | ||
1996 | sk->sk_gso_max_segs), | ||
1997 | nonagle); | ||
1998 | |||
1999 | if (skb->len > limit && | ||
2000 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | ||
2001 | break; | ||
2002 | |||
2003 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2004 | |||
2005 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) | 2070 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
2006 | break; | 2071 | break; |
2007 | 2072 | ||
@@ -2097,10 +2162,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
2097 | static bool skb_still_in_host_queue(const struct sock *sk, | 2162 | static bool skb_still_in_host_queue(const struct sock *sk, |
2098 | const struct sk_buff *skb) | 2163 | const struct sk_buff *skb) |
2099 | { | 2164 | { |
2100 | const struct sk_buff *fclone = skb + 1; | 2165 | if (unlikely(skb_fclone_busy(sk, skb))) { |
2101 | |||
2102 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
2103 | fclone->fclone == SKB_FCLONE_CLONE)) { | ||
2104 | NET_INC_STATS_BH(sock_net(sk), | 2166 | NET_INC_STATS_BH(sock_net(sk), |
2105 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | 2167 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
2106 | return true; | 2168 | return true; |
@@ -2499,7 +2561,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2499 | /* Make a copy, if the first transmission SKB clone we made | 2561 | /* Make a copy, if the first transmission SKB clone we made |
2500 | * is still in somebody's hands, else make a clone. | 2562 | * is still in somebody's hands, else make a clone. |
2501 | */ | 2563 | */ |
2502 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2503 | 2564 | ||
2504 | /* make sure skb->data is aligned on arches that require it | 2565 | /* make sure skb->data is aligned on arches that require it |
2505 | * and check if ack-trimming & collapsing extended the headroom | 2566 | * and check if ack-trimming & collapsing extended the headroom |
@@ -2544,7 +2605,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2544 | 2605 | ||
2545 | /* Save stamp of the first retransmit. */ | 2606 | /* Save stamp of the first retransmit. */ |
2546 | if (!tp->retrans_stamp) | 2607 | if (!tp->retrans_stamp) |
2547 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; | 2608 | tp->retrans_stamp = tcp_skb_timestamp(skb); |
2548 | 2609 | ||
2549 | /* snd_nxt is stored to detect loss of retransmitted segment, | 2610 | /* snd_nxt is stored to detect loss of retransmitted segment, |
2550 | * see tcp_input.c tcp_sacktag_write_queue(). | 2611 | * see tcp_input.c tcp_sacktag_write_queue(). |
@@ -2752,7 +2813,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2752 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), | 2813 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
2753 | TCPHDR_ACK | TCPHDR_RST); | 2814 | TCPHDR_ACK | TCPHDR_RST); |
2754 | /* Send it off. */ | 2815 | /* Send it off. */ |
2755 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2756 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2816 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
2757 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); | 2817 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
2758 | 2818 | ||
@@ -2780,7 +2840,7 @@ int tcp_send_synack(struct sock *sk) | |||
2780 | if (nskb == NULL) | 2840 | if (nskb == NULL) |
2781 | return -ENOMEM; | 2841 | return -ENOMEM; |
2782 | tcp_unlink_write_queue(skb, sk); | 2842 | tcp_unlink_write_queue(skb, sk); |
2783 | skb_header_release(nskb); | 2843 | __skb_header_release(nskb); |
2784 | __tcp_add_write_queue_head(sk, nskb); | 2844 | __tcp_add_write_queue_head(sk, nskb); |
2785 | sk_wmem_free_skb(sk, skb); | 2845 | sk_wmem_free_skb(sk, skb); |
2786 | sk->sk_wmem_queued += nskb->truesize; | 2846 | sk->sk_wmem_queued += nskb->truesize; |
@@ -2789,9 +2849,8 @@ int tcp_send_synack(struct sock *sk) | |||
2789 | } | 2849 | } |
2790 | 2850 | ||
2791 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; | 2851 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; |
2792 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 2852 | tcp_ecn_send_synack(sk, skb); |
2793 | } | 2853 | } |
2794 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2795 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2854 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2796 | } | 2855 | } |
2797 | 2856 | ||
@@ -2835,10 +2894,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2835 | memset(&opts, 0, sizeof(opts)); | 2894 | memset(&opts, 0, sizeof(opts)); |
2836 | #ifdef CONFIG_SYN_COOKIES | 2895 | #ifdef CONFIG_SYN_COOKIES |
2837 | if (unlikely(req->cookie_ts)) | 2896 | if (unlikely(req->cookie_ts)) |
2838 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); | 2897 | skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); |
2839 | else | 2898 | else |
2840 | #endif | 2899 | #endif |
2841 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2900 | skb_mstamp_get(&skb->skb_mstamp); |
2842 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, | 2901 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, |
2843 | foc) + sizeof(*th); | 2902 | foc) + sizeof(*th); |
2844 | 2903 | ||
@@ -2849,7 +2908,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2849 | memset(th, 0, sizeof(struct tcphdr)); | 2908 | memset(th, 0, sizeof(struct tcphdr)); |
2850 | th->syn = 1; | 2909 | th->syn = 1; |
2851 | th->ack = 1; | 2910 | th->ack = 1; |
2852 | TCP_ECN_make_synack(req, th); | 2911 | tcp_ecn_make_synack(req, th, sk); |
2853 | th->source = htons(ireq->ir_num); | 2912 | th->source = htons(ireq->ir_num); |
2854 | th->dest = ireq->ir_rmt_port; | 2913 | th->dest = ireq->ir_rmt_port; |
2855 | /* Setting of flags are superfluous here for callers (and ECE is | 2914 | /* Setting of flags are superfluous here for callers (and ECE is |
@@ -2956,7 +3015,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
2956 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 3015 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); |
2957 | 3016 | ||
2958 | tcb->end_seq += skb->len; | 3017 | tcb->end_seq += skb->len; |
2959 | skb_header_release(skb); | 3018 | __skb_header_release(skb); |
2960 | __tcp_add_write_queue_tail(sk, skb); | 3019 | __tcp_add_write_queue_tail(sk, skb); |
2961 | sk->sk_wmem_queued += skb->truesize; | 3020 | sk->sk_wmem_queued += skb->truesize; |
2962 | sk_mem_charge(sk, skb->truesize); | 3021 | sk_mem_charge(sk, skb->truesize); |
@@ -2975,9 +3034,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
2975 | { | 3034 | { |
2976 | struct tcp_sock *tp = tcp_sk(sk); | 3035 | struct tcp_sock *tp = tcp_sk(sk); |
2977 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3036 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
2978 | int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; | 3037 | int syn_loss = 0, space, err = 0; |
2979 | struct sk_buff *syn_data = NULL, *data; | ||
2980 | unsigned long last_syn_loss = 0; | 3038 | unsigned long last_syn_loss = 0; |
3039 | struct sk_buff *syn_data; | ||
2981 | 3040 | ||
2982 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ | 3041 | tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
2983 | tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, | 3042 | tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, |
@@ -3008,48 +3067,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
3008 | /* limit to order-0 allocations */ | 3067 | /* limit to order-0 allocations */ |
3009 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); | 3068 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); |
3010 | 3069 | ||
3011 | syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, | 3070 | syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); |
3012 | sk->sk_allocation); | 3071 | if (!syn_data) |
3013 | if (syn_data == NULL) | ||
3014 | goto fallback; | 3072 | goto fallback; |
3073 | syn_data->ip_summed = CHECKSUM_PARTIAL; | ||
3074 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | ||
3075 | if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | ||
3076 | fo->data->msg_iter.iov, 0, space))) { | ||
3077 | kfree_skb(syn_data); | ||
3078 | goto fallback; | ||
3079 | } | ||
3015 | 3080 | ||
3016 | for (i = 0; i < iovlen && syn_data->len < space; ++i) { | 3081 | /* No more data pending in inet_wait_for_connect() */ |
3017 | struct iovec *iov = &fo->data->msg_iov[i]; | 3082 | if (space == fo->size) |
3018 | unsigned char __user *from = iov->iov_base; | 3083 | fo->data = NULL; |
3019 | int len = iov->iov_len; | 3084 | fo->copied = space; |
3020 | 3085 | ||
3021 | if (syn_data->len + len > space) | 3086 | tcp_connect_queue_skb(sk, syn_data); |
3022 | len = space - syn_data->len; | ||
3023 | else if (i + 1 == iovlen) | ||
3024 | /* No more data pending in inet_wait_for_connect() */ | ||
3025 | fo->data = NULL; | ||
3026 | 3087 | ||
3027 | if (skb_add_data(syn_data, from, len)) | 3088 | err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
3028 | goto fallback; | ||
3029 | } | ||
3030 | 3089 | ||
3031 | /* Queue a data-only packet after the regular SYN for retransmission */ | 3090 | syn->skb_mstamp = syn_data->skb_mstamp; |
3032 | data = pskb_copy(syn_data, sk->sk_allocation); | ||
3033 | if (data == NULL) | ||
3034 | goto fallback; | ||
3035 | TCP_SKB_CB(data)->seq++; | ||
3036 | TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; | ||
3037 | TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); | ||
3038 | tcp_connect_queue_skb(sk, data); | ||
3039 | fo->copied = data->len; | ||
3040 | |||
3041 | /* syn_data is about to be sent, we need to take current time stamps | ||
3042 | * for the packets that are in write queue : SYN packet and DATA | ||
3043 | */ | ||
3044 | skb_mstamp_get(&syn->skb_mstamp); | ||
3045 | data->skb_mstamp = syn->skb_mstamp; | ||
3046 | 3091 | ||
3047 | if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { | 3092 | /* Now full SYN+DATA was cloned and sent (or not), |
3093 | * remove the SYN from the original skb (syn_data) | ||
3094 | * we keep in write queue in case of a retransmit, as we | ||
3095 | * also have the SYN packet (with no data) in the same queue. | ||
3096 | */ | ||
3097 | TCP_SKB_CB(syn_data)->seq++; | ||
3098 | TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; | ||
3099 | if (!err) { | ||
3048 | tp->syn_data = (fo->copied > 0); | 3100 | tp->syn_data = (fo->copied > 0); |
3049 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); | 3101 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); |
3050 | goto done; | 3102 | goto done; |
3051 | } | 3103 | } |
3052 | syn_data = NULL; | ||
3053 | 3104 | ||
3054 | fallback: | 3105 | fallback: |
3055 | /* Send a regular SYN with Fast Open cookie request option */ | 3106 | /* Send a regular SYN with Fast Open cookie request option */ |
@@ -3058,7 +3109,6 @@ fallback: | |||
3058 | err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); | 3109 | err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); |
3059 | if (err) | 3110 | if (err) |
3060 | tp->syn_fastopen = 0; | 3111 | tp->syn_fastopen = 0; |
3061 | kfree_skb(syn_data); | ||
3062 | done: | 3112 | done: |
3063 | fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ | 3113 | fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ |
3064 | return err; | 3114 | return err; |
@@ -3078,17 +3128,14 @@ int tcp_connect(struct sock *sk) | |||
3078 | return 0; | 3128 | return 0; |
3079 | } | 3129 | } |
3080 | 3130 | ||
3081 | buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); | 3131 | buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); |
3082 | if (unlikely(buff == NULL)) | 3132 | if (unlikely(!buff)) |
3083 | return -ENOBUFS; | 3133 | return -ENOBUFS; |
3084 | 3134 | ||
3085 | /* Reserve space for headers. */ | ||
3086 | skb_reserve(buff, MAX_TCP_HEADER); | ||
3087 | |||
3088 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); | 3135 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
3089 | tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3136 | tp->retrans_stamp = tcp_time_stamp; |
3090 | tcp_connect_queue_skb(sk, buff); | 3137 | tcp_connect_queue_skb(sk, buff); |
3091 | TCP_ECN_send_syn(sk, buff); | 3138 | tcp_ecn_send_syn(sk, buff); |
3092 | 3139 | ||
3093 | /* Send off SYN; include data in Fast Open. */ | 3140 | /* Send off SYN; include data in Fast Open. */ |
3094 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : | 3141 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : |
@@ -3120,6 +3167,8 @@ void tcp_send_delayed_ack(struct sock *sk) | |||
3120 | int ato = icsk->icsk_ack.ato; | 3167 | int ato = icsk->icsk_ack.ato; |
3121 | unsigned long timeout; | 3168 | unsigned long timeout; |
3122 | 3169 | ||
3170 | tcp_ca_event(sk, CA_EVENT_DELAYED_ACK); | ||
3171 | |||
3123 | if (ato > TCP_DELACK_MIN) { | 3172 | if (ato > TCP_DELACK_MIN) { |
3124 | const struct tcp_sock *tp = tcp_sk(sk); | 3173 | const struct tcp_sock *tp = tcp_sk(sk); |
3125 | int max_ato = HZ / 2; | 3174 | int max_ato = HZ / 2; |
@@ -3176,6 +3225,8 @@ void tcp_send_ack(struct sock *sk) | |||
3176 | if (sk->sk_state == TCP_CLOSE) | 3225 | if (sk->sk_state == TCP_CLOSE) |
3177 | return; | 3226 | return; |
3178 | 3227 | ||
3228 | tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK); | ||
3229 | |||
3179 | /* We are not putting this on the write queue, so | 3230 | /* We are not putting this on the write queue, so |
3180 | * tcp_transmit_skb() will set the ownership to this | 3231 | * tcp_transmit_skb() will set the ownership to this |
3181 | * sock. | 3232 | * sock. |
@@ -3194,9 +3245,10 @@ void tcp_send_ack(struct sock *sk) | |||
3194 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); | 3245 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
3195 | 3246 | ||
3196 | /* Send it off, this clears delayed acks for us. */ | 3247 | /* Send it off, this clears delayed acks for us. */ |
3197 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3248 | skb_mstamp_get(&buff->skb_mstamp); |
3198 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); | 3249 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |
3199 | } | 3250 | } |
3251 | EXPORT_SYMBOL_GPL(tcp_send_ack); | ||
3200 | 3252 | ||
3201 | /* This routine sends a packet with an out of date sequence | 3253 | /* This routine sends a packet with an out of date sequence |
3202 | * number. It assumes the other end will try to ack it. | 3254 | * number. It assumes the other end will try to ack it. |
@@ -3226,7 +3278,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
3226 | * send it. | 3278 | * send it. |
3227 | */ | 3279 | */ |
3228 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); | 3280 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
3229 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 3281 | skb_mstamp_get(&skb->skb_mstamp); |
3230 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 3282 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
3231 | } | 3283 | } |
3232 | 3284 | ||
@@ -3270,7 +3322,6 @@ int tcp_write_wakeup(struct sock *sk) | |||
3270 | tcp_set_skb_tso_segs(sk, skb, mss); | 3322 | tcp_set_skb_tso_segs(sk, skb, mss); |
3271 | 3323 | ||
3272 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; | 3324 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
3273 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
3274 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 3325 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
3275 | if (!err) | 3326 | if (!err) |
3276 | tcp_event_new_data_sent(sk, skb); | 3327 | tcp_event_new_data_sent(sk, skb); |
@@ -3289,6 +3340,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3289 | { | 3340 | { |
3290 | struct inet_connection_sock *icsk = inet_csk(sk); | 3341 | struct inet_connection_sock *icsk = inet_csk(sk); |
3291 | struct tcp_sock *tp = tcp_sk(sk); | 3342 | struct tcp_sock *tp = tcp_sk(sk); |
3343 | unsigned long probe_max; | ||
3292 | int err; | 3344 | int err; |
3293 | 3345 | ||
3294 | err = tcp_write_wakeup(sk); | 3346 | err = tcp_write_wakeup(sk); |
@@ -3304,9 +3356,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3304 | if (icsk->icsk_backoff < sysctl_tcp_retries2) | 3356 | if (icsk->icsk_backoff < sysctl_tcp_retries2) |
3305 | icsk->icsk_backoff++; | 3357 | icsk->icsk_backoff++; |
3306 | icsk->icsk_probes_out++; | 3358 | icsk->icsk_probes_out++; |
3307 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3359 | probe_max = TCP_RTO_MAX; |
3308 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), | ||
3309 | TCP_RTO_MAX); | ||
3310 | } else { | 3360 | } else { |
3311 | /* If packet was not sent due to local congestion, | 3361 | /* If packet was not sent due to local congestion, |
3312 | * do not backoff and do not remember icsk_probes_out. | 3362 | * do not backoff and do not remember icsk_probes_out. |
@@ -3316,11 +3366,11 @@ void tcp_send_probe0(struct sock *sk) | |||
3316 | */ | 3366 | */ |
3317 | if (!icsk->icsk_probes_out) | 3367 | if (!icsk->icsk_probes_out) |
3318 | icsk->icsk_probes_out = 1; | 3368 | icsk->icsk_probes_out = 1; |
3319 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3369 | probe_max = TCP_RESOURCE_PROBE_INTERVAL; |
3320 | min(icsk->icsk_rto << icsk->icsk_backoff, | ||
3321 | TCP_RESOURCE_PROBE_INTERVAL), | ||
3322 | TCP_RTO_MAX); | ||
3323 | } | 3370 | } |
3371 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | ||
3372 | inet_csk_rto_backoff(icsk, probe_max), | ||
3373 | TCP_RTO_MAX); | ||
3324 | } | 3374 | } |
3325 | 3375 | ||
3326 | int tcp_rtx_synack(struct sock *sk, struct request_sock *req) | 3376 | int tcp_rtx_synack(struct sock *sk, struct request_sock *req) |