diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 198 |
1 files changed, 130 insertions, 68 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 179b51e6bda3..3af21296d967 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -318,36 +318,47 @@ static u16 tcp_select_window(struct sock *sk) | |||
318 | } | 318 | } |
319 | 319 | ||
320 | /* Packet ECN state for a SYN-ACK */ | 320 | /* Packet ECN state for a SYN-ACK */ |
321 | static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) | 321 | static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) |
322 | { | 322 | { |
323 | const struct tcp_sock *tp = tcp_sk(sk); | ||
324 | |||
323 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; | 325 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; |
324 | if (!(tp->ecn_flags & TCP_ECN_OK)) | 326 | if (!(tp->ecn_flags & TCP_ECN_OK)) |
325 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; | 327 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; |
328 | else if (tcp_ca_needs_ecn(sk)) | ||
329 | INET_ECN_xmit(sk); | ||
326 | } | 330 | } |
327 | 331 | ||
328 | /* Packet ECN state for a SYN. */ | 332 | /* Packet ECN state for a SYN. */ |
329 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | 333 | static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) |
330 | { | 334 | { |
331 | struct tcp_sock *tp = tcp_sk(sk); | 335 | struct tcp_sock *tp = tcp_sk(sk); |
332 | 336 | ||
333 | tp->ecn_flags = 0; | 337 | tp->ecn_flags = 0; |
334 | if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { | 338 | if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || |
339 | tcp_ca_needs_ecn(sk)) { | ||
335 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; | 340 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; |
336 | tp->ecn_flags = TCP_ECN_OK; | 341 | tp->ecn_flags = TCP_ECN_OK; |
342 | if (tcp_ca_needs_ecn(sk)) | ||
343 | INET_ECN_xmit(sk); | ||
337 | } | 344 | } |
338 | } | 345 | } |
339 | 346 | ||
340 | static __inline__ void | 347 | static void |
341 | TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) | 348 | tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, |
349 | struct sock *sk) | ||
342 | { | 350 | { |
343 | if (inet_rsk(req)->ecn_ok) | 351 | if (inet_rsk(req)->ecn_ok) { |
344 | th->ece = 1; | 352 | th->ece = 1; |
353 | if (tcp_ca_needs_ecn(sk)) | ||
354 | INET_ECN_xmit(sk); | ||
355 | } | ||
345 | } | 356 | } |
346 | 357 | ||
347 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to | 358 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to |
348 | * be sent. | 359 | * be sent. |
349 | */ | 360 | */ |
350 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | 361 | static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, |
351 | int tcp_header_len) | 362 | int tcp_header_len) |
352 | { | 363 | { |
353 | struct tcp_sock *tp = tcp_sk(sk); | 364 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -362,7 +373,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | |||
362 | tcp_hdr(skb)->cwr = 1; | 373 | tcp_hdr(skb)->cwr = 1; |
363 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; | 374 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; |
364 | } | 375 | } |
365 | } else { | 376 | } else if (!tcp_ca_needs_ecn(sk)) { |
366 | /* ACK or retransmitted segment: clear ECT|CE */ | 377 | /* ACK or retransmitted segment: clear ECT|CE */ |
367 | INET_ECN_dontxmit(sk); | 378 | INET_ECN_dontxmit(sk); |
368 | } | 379 | } |
@@ -384,7 +395,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
384 | TCP_SKB_CB(skb)->tcp_flags = flags; | 395 | TCP_SKB_CB(skb)->tcp_flags = flags; |
385 | TCP_SKB_CB(skb)->sacked = 0; | 396 | TCP_SKB_CB(skb)->sacked = 0; |
386 | 397 | ||
387 | shinfo->gso_segs = 1; | 398 | tcp_skb_pcount_set(skb, 1); |
388 | shinfo->gso_size = 0; | 399 | shinfo->gso_size = 0; |
389 | shinfo->gso_type = 0; | 400 | shinfo->gso_type = 0; |
390 | 401 | ||
@@ -550,7 +561,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
550 | 561 | ||
551 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { | 562 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { |
552 | opts->options |= OPTION_TS; | 563 | opts->options |= OPTION_TS; |
553 | opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; | 564 | opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; |
554 | opts->tsecr = tp->rx_opt.ts_recent; | 565 | opts->tsecr = tp->rx_opt.ts_recent; |
555 | remaining -= TCPOLEN_TSTAMP_ALIGNED; | 566 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
556 | } | 567 | } |
@@ -618,7 +629,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
618 | } | 629 | } |
619 | if (likely(ireq->tstamp_ok)) { | 630 | if (likely(ireq->tstamp_ok)) { |
620 | opts->options |= OPTION_TS; | 631 | opts->options |= OPTION_TS; |
621 | opts->tsval = TCP_SKB_CB(skb)->when; | 632 | opts->tsval = tcp_skb_timestamp(skb); |
622 | opts->tsecr = req->ts_recent; | 633 | opts->tsecr = req->ts_recent; |
623 | remaining -= TCPOLEN_TSTAMP_ALIGNED; | 634 | remaining -= TCPOLEN_TSTAMP_ALIGNED; |
624 | } | 635 | } |
@@ -647,7 +658,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
647 | struct tcp_out_options *opts, | 658 | struct tcp_out_options *opts, |
648 | struct tcp_md5sig_key **md5) | 659 | struct tcp_md5sig_key **md5) |
649 | { | 660 | { |
650 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | ||
651 | struct tcp_sock *tp = tcp_sk(sk); | 661 | struct tcp_sock *tp = tcp_sk(sk); |
652 | unsigned int size = 0; | 662 | unsigned int size = 0; |
653 | unsigned int eff_sacks; | 663 | unsigned int eff_sacks; |
@@ -666,7 +676,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
666 | 676 | ||
667 | if (likely(tp->rx_opt.tstamp_ok)) { | 677 | if (likely(tp->rx_opt.tstamp_ok)) { |
668 | opts->options |= OPTION_TS; | 678 | opts->options |= OPTION_TS; |
669 | opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; | 679 | opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; |
670 | opts->tsecr = tp->rx_opt.ts_recent; | 680 | opts->tsecr = tp->rx_opt.ts_recent; |
671 | size += TCPOLEN_TSTAMP_ALIGNED; | 681 | size += TCPOLEN_TSTAMP_ALIGNED; |
672 | } | 682 | } |
@@ -800,7 +810,7 @@ void tcp_release_cb(struct sock *sk) | |||
800 | __sock_put(sk); | 810 | __sock_put(sk); |
801 | } | 811 | } |
802 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { | 812 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { |
803 | sk->sk_prot->mtu_reduced(sk); | 813 | inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); |
804 | __sock_put(sk); | 814 | __sock_put(sk); |
805 | } | 815 | } |
806 | } | 816 | } |
@@ -829,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb) | |||
829 | { | 839 | { |
830 | struct sock *sk = skb->sk; | 840 | struct sock *sk = skb->sk; |
831 | struct tcp_sock *tp = tcp_sk(sk); | 841 | struct tcp_sock *tp = tcp_sk(sk); |
842 | int wmem; | ||
843 | |||
844 | /* Keep one reference on sk_wmem_alloc. | ||
845 | * Will be released by sk_free() from here or tcp_tasklet_func() | ||
846 | */ | ||
847 | wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
848 | |||
849 | /* If this softirq is serviced by ksoftirqd, we are likely under stress. | ||
850 | * Wait until our queues (qdisc + devices) are drained. | ||
851 | * This gives : | ||
852 | * - less callbacks to tcp_write_xmit(), reducing stress (batches) | ||
853 | * - chance for incoming ACK (processed by another cpu maybe) | ||
854 | * to migrate this flow (skb->ooo_okay will be eventually set) | ||
855 | */ | ||
856 | if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) | ||
857 | goto out; | ||
832 | 858 | ||
833 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && | 859 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && |
834 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { | 860 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { |
835 | unsigned long flags; | 861 | unsigned long flags; |
836 | struct tsq_tasklet *tsq; | 862 | struct tsq_tasklet *tsq; |
837 | 863 | ||
838 | /* Keep a ref on socket. | ||
839 | * This last ref will be released in tcp_tasklet_func() | ||
840 | */ | ||
841 | atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
842 | |||
843 | /* queue this socket to tasklet queue */ | 864 | /* queue this socket to tasklet queue */ |
844 | local_irq_save(flags); | 865 | local_irq_save(flags); |
845 | tsq = &__get_cpu_var(tsq_tasklet); | 866 | tsq = this_cpu_ptr(&tsq_tasklet); |
846 | list_add(&tp->tsq_node, &tsq->head); | 867 | list_add(&tp->tsq_node, &tsq->head); |
847 | tasklet_schedule(&tsq->tasklet); | 868 | tasklet_schedule(&tsq->tasklet); |
848 | local_irq_restore(flags); | 869 | local_irq_restore(flags); |
849 | } else { | 870 | return; |
850 | sock_wfree(skb); | ||
851 | } | 871 | } |
872 | out: | ||
873 | sk_free(sk); | ||
852 | } | 874 | } |
853 | 875 | ||
854 | /* This routine actually transmits TCP packets queued in by | 876 | /* This routine actually transmits TCP packets queued in by |
@@ -886,8 +908,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
886 | skb = skb_clone(skb, gfp_mask); | 908 | skb = skb_clone(skb, gfp_mask); |
887 | if (unlikely(!skb)) | 909 | if (unlikely(!skb)) |
888 | return -ENOBUFS; | 910 | return -ENOBUFS; |
889 | /* Our usage of tstamp should remain private */ | ||
890 | skb->tstamp.tv64 = 0; | ||
891 | } | 911 | } |
892 | 912 | ||
893 | inet = inet_sk(sk); | 913 | inet = inet_sk(sk); |
@@ -906,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
906 | tcp_ca_event(sk, CA_EVENT_TX_START); | 926 | tcp_ca_event(sk, CA_EVENT_TX_START); |
907 | 927 | ||
908 | /* if no packet is in qdisc/device queue, then allow XPS to select | 928 | /* if no packet is in qdisc/device queue, then allow XPS to select |
909 | * another queue. | 929 | * another queue. We can be called from tcp_tsq_handler() |
930 | * which holds one reference to sk_wmem_alloc. | ||
931 | * | ||
932 | * TODO: Ideally, in-flight pure ACK packets should not matter here. | ||
933 | * One way to get this would be to set skb->truesize = 2 on them. | ||
910 | */ | 934 | */ |
911 | skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; | 935 | skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
912 | 936 | ||
913 | skb_push(skb, tcp_header_size); | 937 | skb_push(skb, tcp_header_size); |
914 | skb_reset_transport_header(skb); | 938 | skb_reset_transport_header(skb); |
@@ -916,6 +940,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
916 | skb_orphan(skb); | 940 | skb_orphan(skb); |
917 | skb->sk = sk; | 941 | skb->sk = sk; |
918 | skb->destructor = tcp_wfree; | 942 | skb->destructor = tcp_wfree; |
943 | skb_set_hash_from_sk(skb, sk); | ||
919 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 944 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
920 | 945 | ||
921 | /* Build TCP header and checksum it. */ | 946 | /* Build TCP header and checksum it. */ |
@@ -951,7 +976,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
951 | 976 | ||
952 | tcp_options_write((__be32 *)(th + 1), tp, &opts); | 977 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
953 | if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) | 978 | if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) |
954 | TCP_ECN_send(sk, skb, tcp_header_size); | 979 | tcp_ecn_send(sk, skb, tcp_header_size); |
955 | 980 | ||
956 | #ifdef CONFIG_TCP_MD5SIG | 981 | #ifdef CONFIG_TCP_MD5SIG |
957 | /* Calculate the MD5 hash, as we have all we need now */ | 982 | /* Calculate the MD5 hash, as we have all we need now */ |
@@ -974,11 +999,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
974 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, | 999 | TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, |
975 | tcp_skb_pcount(skb)); | 1000 | tcp_skb_pcount(skb)); |
976 | 1001 | ||
1002 | /* OK, its time to fill skb_shinfo(skb)->gso_segs */ | ||
1003 | skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); | ||
1004 | |||
1005 | /* Our usage of tstamp should remain private */ | ||
1006 | skb->tstamp.tv64 = 0; | ||
1007 | |||
1008 | /* Cleanup our debris for IP stacks */ | ||
1009 | memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), | ||
1010 | sizeof(struct inet6_skb_parm))); | ||
1011 | |||
977 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); | 1012 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
1013 | |||
978 | if (likely(err <= 0)) | 1014 | if (likely(err <= 0)) |
979 | return err; | 1015 | return err; |
980 | 1016 | ||
981 | tcp_enter_cwr(sk, 1); | 1017 | tcp_enter_cwr(sk); |
982 | 1018 | ||
983 | return net_xmit_eval(err); | 1019 | return net_xmit_eval(err); |
984 | } | 1020 | } |
@@ -994,7 +1030,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
994 | 1030 | ||
995 | /* Advance write_seq and place onto the write_queue. */ | 1031 | /* Advance write_seq and place onto the write_queue. */ |
996 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; | 1032 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; |
997 | skb_header_release(skb); | 1033 | __skb_header_release(skb); |
998 | tcp_add_write_queue_tail(sk, skb); | 1034 | tcp_add_write_queue_tail(sk, skb); |
999 | sk->sk_wmem_queued += skb->truesize; | 1035 | sk->sk_wmem_queued += skb->truesize; |
1000 | sk_mem_charge(sk, skb->truesize); | 1036 | sk_mem_charge(sk, skb->truesize); |
@@ -1013,11 +1049,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
1013 | /* Avoid the costly divide in the normal | 1049 | /* Avoid the costly divide in the normal |
1014 | * non-TSO case. | 1050 | * non-TSO case. |
1015 | */ | 1051 | */ |
1016 | shinfo->gso_segs = 1; | 1052 | tcp_skb_pcount_set(skb, 1); |
1017 | shinfo->gso_size = 0; | 1053 | shinfo->gso_size = 0; |
1018 | shinfo->gso_type = 0; | 1054 | shinfo->gso_type = 0; |
1019 | } else { | 1055 | } else { |
1020 | shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); | 1056 | tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); |
1021 | shinfo->gso_size = mss_now; | 1057 | shinfo->gso_size = mss_now; |
1022 | shinfo->gso_type = sk->sk_gso_type; | 1058 | shinfo->gso_type = sk->sk_gso_type; |
1023 | } | 1059 | } |
@@ -1068,6 +1104,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de | |||
1068 | tcp_verify_left_out(tp); | 1104 | tcp_verify_left_out(tp); |
1069 | } | 1105 | } |
1070 | 1106 | ||
1107 | static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) | ||
1108 | { | ||
1109 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
1110 | |||
1111 | if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && | ||
1112 | !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { | ||
1113 | struct skb_shared_info *shinfo2 = skb_shinfo(skb2); | ||
1114 | u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; | ||
1115 | |||
1116 | shinfo->tx_flags &= ~tsflags; | ||
1117 | shinfo2->tx_flags |= tsflags; | ||
1118 | swap(shinfo->tskey, shinfo2->tskey); | ||
1119 | } | ||
1120 | } | ||
1121 | |||
1071 | /* Function to create two new TCP segments. Shrinks the given segment | 1122 | /* Function to create two new TCP segments. Shrinks the given segment |
1072 | * to the specified size and appends a new segment with the rest of the | 1123 | * to the specified size and appends a new segment with the rest of the |
1073 | * packet to the list. This won't be called frequently, I hope. | 1124 | * packet to the list. This won't be called frequently, I hope. |
@@ -1130,11 +1181,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1130 | 1181 | ||
1131 | buff->ip_summed = skb->ip_summed; | 1182 | buff->ip_summed = skb->ip_summed; |
1132 | 1183 | ||
1133 | /* Looks stupid, but our code really uses when of | ||
1134 | * skbs, which it never sent before. --ANK | ||
1135 | */ | ||
1136 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; | ||
1137 | buff->tstamp = skb->tstamp; | 1184 | buff->tstamp = skb->tstamp; |
1185 | tcp_fragment_tstamp(skb, buff); | ||
1138 | 1186 | ||
1139 | old_factor = tcp_skb_pcount(skb); | 1187 | old_factor = tcp_skb_pcount(skb); |
1140 | 1188 | ||
@@ -1154,7 +1202,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1154 | } | 1202 | } |
1155 | 1203 | ||
1156 | /* Link BUFF into the send queue. */ | 1204 | /* Link BUFF into the send queue. */ |
1157 | skb_header_release(buff); | 1205 | __skb_header_release(buff); |
1158 | tcp_insert_write_queue_after(skb, buff, sk); | 1206 | tcp_insert_write_queue_after(skb, buff, sk); |
1159 | 1207 | ||
1160 | return 0; | 1208 | return 0; |
@@ -1651,13 +1699,14 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1651 | 1699 | ||
1652 | buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; | 1700 | buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; |
1653 | skb_split(skb, buff, len); | 1701 | skb_split(skb, buff, len); |
1702 | tcp_fragment_tstamp(skb, buff); | ||
1654 | 1703 | ||
1655 | /* Fix up tso_factor for both original and new SKB. */ | 1704 | /* Fix up tso_factor for both original and new SKB. */ |
1656 | tcp_set_skb_tso_segs(sk, skb, mss_now); | 1705 | tcp_set_skb_tso_segs(sk, skb, mss_now); |
1657 | tcp_set_skb_tso_segs(sk, buff, mss_now); | 1706 | tcp_set_skb_tso_segs(sk, buff, mss_now); |
1658 | 1707 | ||
1659 | /* Link BUFF into the send queue. */ | 1708 | /* Link BUFF into the send queue. */ |
1660 | skb_header_release(buff); | 1709 | __skb_header_release(buff); |
1661 | tcp_insert_write_queue_after(skb, buff, sk); | 1710 | tcp_insert_write_queue_after(skb, buff, sk); |
1662 | 1711 | ||
1663 | return 0; | 1712 | return 0; |
@@ -1856,8 +1905,8 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1856 | tcp_init_tso_segs(sk, nskb, nskb->len); | 1905 | tcp_init_tso_segs(sk, nskb, nskb->len); |
1857 | 1906 | ||
1858 | /* We're ready to send. If this fails, the probe will | 1907 | /* We're ready to send. If this fails, the probe will |
1859 | * be resegmented into mss-sized pieces by tcp_write_xmit(). */ | 1908 | * be resegmented into mss-sized pieces by tcp_write_xmit(). |
1860 | TCP_SKB_CB(nskb)->when = tcp_time_stamp; | 1909 | */ |
1861 | if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { | 1910 | if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { |
1862 | /* Decrement cwnd here because we are sending | 1911 | /* Decrement cwnd here because we are sending |
1863 | * effectively two packets. */ | 1912 | * effectively two packets. */ |
@@ -1916,8 +1965,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1916 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1965 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
1917 | BUG_ON(!tso_segs); | 1966 | BUG_ON(!tso_segs); |
1918 | 1967 | ||
1919 | if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) | 1968 | if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { |
1969 | /* "skb_mstamp" is used as a start point for the retransmit timer */ | ||
1970 | skb_mstamp_get(&skb->skb_mstamp); | ||
1920 | goto repair; /* Skip network transmission */ | 1971 | goto repair; /* Skip network transmission */ |
1972 | } | ||
1921 | 1973 | ||
1922 | cwnd_quota = tcp_cwnd_test(tp, skb); | 1974 | cwnd_quota = tcp_cwnd_test(tp, skb); |
1923 | if (!cwnd_quota) { | 1975 | if (!cwnd_quota) { |
@@ -1979,8 +2031,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1979 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 2031 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
1980 | break; | 2032 | break; |
1981 | 2033 | ||
1982 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
1983 | |||
1984 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) | 2034 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
1985 | break; | 2035 | break; |
1986 | 2036 | ||
@@ -2076,10 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
2076 | static bool skb_still_in_host_queue(const struct sock *sk, | 2126 | static bool skb_still_in_host_queue(const struct sock *sk, |
2077 | const struct sk_buff *skb) | 2127 | const struct sk_buff *skb) |
2078 | { | 2128 | { |
2079 | const struct sk_buff *fclone = skb + 1; | 2129 | if (unlikely(skb_fclone_busy(skb))) { |
2080 | |||
2081 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | ||
2082 | fclone->fclone == SKB_FCLONE_CLONE)) { | ||
2083 | NET_INC_STATS_BH(sock_net(sk), | 2130 | NET_INC_STATS_BH(sock_net(sk), |
2084 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | 2131 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
2085 | return true; | 2132 | return true; |
@@ -2478,7 +2525,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2478 | /* Make a copy, if the first transmission SKB clone we made | 2525 | /* Make a copy, if the first transmission SKB clone we made |
2479 | * is still in somebody's hands, else make a clone. | 2526 | * is still in somebody's hands, else make a clone. |
2480 | */ | 2527 | */ |
2481 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2482 | 2528 | ||
2483 | /* make sure skb->data is aligned on arches that require it | 2529 | /* make sure skb->data is aligned on arches that require it |
2484 | * and check if ack-trimming & collapsing extended the headroom | 2530 | * and check if ack-trimming & collapsing extended the headroom |
@@ -2523,7 +2569,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2523 | 2569 | ||
2524 | /* Save stamp of the first retransmit. */ | 2570 | /* Save stamp of the first retransmit. */ |
2525 | if (!tp->retrans_stamp) | 2571 | if (!tp->retrans_stamp) |
2526 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; | 2572 | tp->retrans_stamp = tcp_skb_timestamp(skb); |
2527 | 2573 | ||
2528 | /* snd_nxt is stored to detect loss of retransmitted segment, | 2574 | /* snd_nxt is stored to detect loss of retransmitted segment, |
2529 | * see tcp_input.c tcp_sacktag_write_queue(). | 2575 | * see tcp_input.c tcp_sacktag_write_queue(). |
@@ -2731,7 +2777,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2731 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), | 2777 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
2732 | TCPHDR_ACK | TCPHDR_RST); | 2778 | TCPHDR_ACK | TCPHDR_RST); |
2733 | /* Send it off. */ | 2779 | /* Send it off. */ |
2734 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2735 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2780 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
2736 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); | 2781 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
2737 | 2782 | ||
@@ -2759,7 +2804,7 @@ int tcp_send_synack(struct sock *sk) | |||
2759 | if (nskb == NULL) | 2804 | if (nskb == NULL) |
2760 | return -ENOMEM; | 2805 | return -ENOMEM; |
2761 | tcp_unlink_write_queue(skb, sk); | 2806 | tcp_unlink_write_queue(skb, sk); |
2762 | skb_header_release(nskb); | 2807 | __skb_header_release(nskb); |
2763 | __tcp_add_write_queue_head(sk, nskb); | 2808 | __tcp_add_write_queue_head(sk, nskb); |
2764 | sk_wmem_free_skb(sk, skb); | 2809 | sk_wmem_free_skb(sk, skb); |
2765 | sk->sk_wmem_queued += nskb->truesize; | 2810 | sk->sk_wmem_queued += nskb->truesize; |
@@ -2768,9 +2813,8 @@ int tcp_send_synack(struct sock *sk) | |||
2768 | } | 2813 | } |
2769 | 2814 | ||
2770 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; | 2815 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; |
2771 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 2816 | tcp_ecn_send_synack(sk, skb); |
2772 | } | 2817 | } |
2773 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2774 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2818 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2775 | } | 2819 | } |
2776 | 2820 | ||
@@ -2814,10 +2858,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2814 | memset(&opts, 0, sizeof(opts)); | 2858 | memset(&opts, 0, sizeof(opts)); |
2815 | #ifdef CONFIG_SYN_COOKIES | 2859 | #ifdef CONFIG_SYN_COOKIES |
2816 | if (unlikely(req->cookie_ts)) | 2860 | if (unlikely(req->cookie_ts)) |
2817 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); | 2861 | skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); |
2818 | else | 2862 | else |
2819 | #endif | 2863 | #endif |
2820 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2864 | skb_mstamp_get(&skb->skb_mstamp); |
2821 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, | 2865 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, |
2822 | foc) + sizeof(*th); | 2866 | foc) + sizeof(*th); |
2823 | 2867 | ||
@@ -2828,7 +2872,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2828 | memset(th, 0, sizeof(struct tcphdr)); | 2872 | memset(th, 0, sizeof(struct tcphdr)); |
2829 | th->syn = 1; | 2873 | th->syn = 1; |
2830 | th->ack = 1; | 2874 | th->ack = 1; |
2831 | TCP_ECN_make_synack(req, th); | 2875 | tcp_ecn_make_synack(req, th, sk); |
2832 | th->source = htons(ireq->ir_num); | 2876 | th->source = htons(ireq->ir_num); |
2833 | th->dest = ireq->ir_rmt_port; | 2877 | th->dest = ireq->ir_rmt_port; |
2834 | /* Setting of flags are superfluous here for callers (and ECE is | 2878 | /* Setting of flags are superfluous here for callers (and ECE is |
@@ -2935,7 +2979,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
2935 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 2979 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); |
2936 | 2980 | ||
2937 | tcb->end_seq += skb->len; | 2981 | tcb->end_seq += skb->len; |
2938 | skb_header_release(skb); | 2982 | __skb_header_release(skb); |
2939 | __tcp_add_write_queue_tail(sk, skb); | 2983 | __tcp_add_write_queue_tail(sk, skb); |
2940 | sk->sk_wmem_queued += skb->truesize; | 2984 | sk->sk_wmem_queued += skb->truesize; |
2941 | sk_mem_charge(sk, skb->truesize); | 2985 | sk_mem_charge(sk, skb->truesize); |
@@ -3065,9 +3109,9 @@ int tcp_connect(struct sock *sk) | |||
3065 | skb_reserve(buff, MAX_TCP_HEADER); | 3109 | skb_reserve(buff, MAX_TCP_HEADER); |
3066 | 3110 | ||
3067 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); | 3111 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
3068 | tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3112 | tp->retrans_stamp = tcp_time_stamp; |
3069 | tcp_connect_queue_skb(sk, buff); | 3113 | tcp_connect_queue_skb(sk, buff); |
3070 | TCP_ECN_send_syn(sk, buff); | 3114 | tcp_ecn_send_syn(sk, buff); |
3071 | 3115 | ||
3072 | /* Send off SYN; include data in Fast Open. */ | 3116 | /* Send off SYN; include data in Fast Open. */ |
3073 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : | 3117 | err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : |
@@ -3099,6 +3143,8 @@ void tcp_send_delayed_ack(struct sock *sk) | |||
3099 | int ato = icsk->icsk_ack.ato; | 3143 | int ato = icsk->icsk_ack.ato; |
3100 | unsigned long timeout; | 3144 | unsigned long timeout; |
3101 | 3145 | ||
3146 | tcp_ca_event(sk, CA_EVENT_DELAYED_ACK); | ||
3147 | |||
3102 | if (ato > TCP_DELACK_MIN) { | 3148 | if (ato > TCP_DELACK_MIN) { |
3103 | const struct tcp_sock *tp = tcp_sk(sk); | 3149 | const struct tcp_sock *tp = tcp_sk(sk); |
3104 | int max_ato = HZ / 2; | 3150 | int max_ato = HZ / 2; |
@@ -3155,6 +3201,8 @@ void tcp_send_ack(struct sock *sk) | |||
3155 | if (sk->sk_state == TCP_CLOSE) | 3201 | if (sk->sk_state == TCP_CLOSE) |
3156 | return; | 3202 | return; |
3157 | 3203 | ||
3204 | tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK); | ||
3205 | |||
3158 | /* We are not putting this on the write queue, so | 3206 | /* We are not putting this on the write queue, so |
3159 | * tcp_transmit_skb() will set the ownership to this | 3207 | * tcp_transmit_skb() will set the ownership to this |
3160 | * sock. | 3208 | * sock. |
@@ -3173,9 +3221,10 @@ void tcp_send_ack(struct sock *sk) | |||
3173 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); | 3221 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
3174 | 3222 | ||
3175 | /* Send it off, this clears delayed acks for us. */ | 3223 | /* Send it off, this clears delayed acks for us. */ |
3176 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3224 | skb_mstamp_get(&buff->skb_mstamp); |
3177 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); | 3225 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |
3178 | } | 3226 | } |
3227 | EXPORT_SYMBOL_GPL(tcp_send_ack); | ||
3179 | 3228 | ||
3180 | /* This routine sends a packet with an out of date sequence | 3229 | /* This routine sends a packet with an out of date sequence |
3181 | * number. It assumes the other end will try to ack it. | 3230 | * number. It assumes the other end will try to ack it. |
@@ -3205,7 +3254,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
3205 | * send it. | 3254 | * send it. |
3206 | */ | 3255 | */ |
3207 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); | 3256 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
3208 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 3257 | skb_mstamp_get(&skb->skb_mstamp); |
3209 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 3258 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
3210 | } | 3259 | } |
3211 | 3260 | ||
@@ -3249,7 +3298,6 @@ int tcp_write_wakeup(struct sock *sk) | |||
3249 | tcp_set_skb_tso_segs(sk, skb, mss); | 3298 | tcp_set_skb_tso_segs(sk, skb, mss); |
3250 | 3299 | ||
3251 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; | 3300 | TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
3252 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
3253 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 3301 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
3254 | if (!err) | 3302 | if (!err) |
3255 | tcp_event_new_data_sent(sk, skb); | 3303 | tcp_event_new_data_sent(sk, skb); |
@@ -3268,6 +3316,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3268 | { | 3316 | { |
3269 | struct inet_connection_sock *icsk = inet_csk(sk); | 3317 | struct inet_connection_sock *icsk = inet_csk(sk); |
3270 | struct tcp_sock *tp = tcp_sk(sk); | 3318 | struct tcp_sock *tp = tcp_sk(sk); |
3319 | unsigned long probe_max; | ||
3271 | int err; | 3320 | int err; |
3272 | 3321 | ||
3273 | err = tcp_write_wakeup(sk); | 3322 | err = tcp_write_wakeup(sk); |
@@ -3283,9 +3332,7 @@ void tcp_send_probe0(struct sock *sk) | |||
3283 | if (icsk->icsk_backoff < sysctl_tcp_retries2) | 3332 | if (icsk->icsk_backoff < sysctl_tcp_retries2) |
3284 | icsk->icsk_backoff++; | 3333 | icsk->icsk_backoff++; |
3285 | icsk->icsk_probes_out++; | 3334 | icsk->icsk_probes_out++; |
3286 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3335 | probe_max = TCP_RTO_MAX; |
3287 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), | ||
3288 | TCP_RTO_MAX); | ||
3289 | } else { | 3336 | } else { |
3290 | /* If packet was not sent due to local congestion, | 3337 | /* If packet was not sent due to local congestion, |
3291 | * do not backoff and do not remember icsk_probes_out. | 3338 | * do not backoff and do not remember icsk_probes_out. |
@@ -3295,9 +3342,24 @@ void tcp_send_probe0(struct sock *sk) | |||
3295 | */ | 3342 | */ |
3296 | if (!icsk->icsk_probes_out) | 3343 | if (!icsk->icsk_probes_out) |
3297 | icsk->icsk_probes_out = 1; | 3344 | icsk->icsk_probes_out = 1; |
3298 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3345 | probe_max = TCP_RESOURCE_PROBE_INTERVAL; |
3299 | min(icsk->icsk_rto << icsk->icsk_backoff, | 3346 | } |
3300 | TCP_RESOURCE_PROBE_INTERVAL), | 3347 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
3301 | TCP_RTO_MAX); | 3348 | inet_csk_rto_backoff(icsk, probe_max), |
3349 | TCP_RTO_MAX); | ||
3350 | } | ||
3351 | |||
3352 | int tcp_rtx_synack(struct sock *sk, struct request_sock *req) | ||
3353 | { | ||
3354 | const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; | ||
3355 | struct flowi fl; | ||
3356 | int res; | ||
3357 | |||
3358 | res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); | ||
3359 | if (!res) { | ||
3360 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | ||
3361 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
3302 | } | 3362 | } |
3363 | return res; | ||
3303 | } | 3364 | } |
3365 | EXPORT_SYMBOL(tcp_rtx_synack); | ||