aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c198
1 files changed, 130 insertions, 68 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 179b51e6bda3..3af21296d967 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -318,36 +318,47 @@ static u16 tcp_select_window(struct sock *sk)
318} 318}
319 319
320/* Packet ECN state for a SYN-ACK */ 320/* Packet ECN state for a SYN-ACK */
321static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) 321static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
322{ 322{
323 const struct tcp_sock *tp = tcp_sk(sk);
324
323 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 325 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
324 if (!(tp->ecn_flags & TCP_ECN_OK)) 326 if (!(tp->ecn_flags & TCP_ECN_OK))
325 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 327 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
328 else if (tcp_ca_needs_ecn(sk))
329 INET_ECN_xmit(sk);
326} 330}
327 331
328/* Packet ECN state for a SYN. */ 332/* Packet ECN state for a SYN. */
329static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) 333static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
330{ 334{
331 struct tcp_sock *tp = tcp_sk(sk); 335 struct tcp_sock *tp = tcp_sk(sk);
332 336
333 tp->ecn_flags = 0; 337 tp->ecn_flags = 0;
334 if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) { 338 if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
339 tcp_ca_needs_ecn(sk)) {
335 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 340 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
336 tp->ecn_flags = TCP_ECN_OK; 341 tp->ecn_flags = TCP_ECN_OK;
342 if (tcp_ca_needs_ecn(sk))
343 INET_ECN_xmit(sk);
337 } 344 }
338} 345}
339 346
340static __inline__ void 347static void
341TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) 348tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
349 struct sock *sk)
342{ 350{
343 if (inet_rsk(req)->ecn_ok) 351 if (inet_rsk(req)->ecn_ok) {
344 th->ece = 1; 352 th->ece = 1;
353 if (tcp_ca_needs_ecn(sk))
354 INET_ECN_xmit(sk);
355 }
345} 356}
346 357
347/* Set up ECN state for a packet on a ESTABLISHED socket that is about to 358/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
348 * be sent. 359 * be sent.
349 */ 360 */
350static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, 361static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
351 int tcp_header_len) 362 int tcp_header_len)
352{ 363{
353 struct tcp_sock *tp = tcp_sk(sk); 364 struct tcp_sock *tp = tcp_sk(sk);
@@ -362,7 +373,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
362 tcp_hdr(skb)->cwr = 1; 373 tcp_hdr(skb)->cwr = 1;
363 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 374 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
364 } 375 }
365 } else { 376 } else if (!tcp_ca_needs_ecn(sk)) {
366 /* ACK or retransmitted segment: clear ECT|CE */ 377 /* ACK or retransmitted segment: clear ECT|CE */
367 INET_ECN_dontxmit(sk); 378 INET_ECN_dontxmit(sk);
368 } 379 }
@@ -384,7 +395,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
384 TCP_SKB_CB(skb)->tcp_flags = flags; 395 TCP_SKB_CB(skb)->tcp_flags = flags;
385 TCP_SKB_CB(skb)->sacked = 0; 396 TCP_SKB_CB(skb)->sacked = 0;
386 397
387 shinfo->gso_segs = 1; 398 tcp_skb_pcount_set(skb, 1);
388 shinfo->gso_size = 0; 399 shinfo->gso_size = 0;
389 shinfo->gso_type = 0; 400 shinfo->gso_type = 0;
390 401
@@ -550,7 +561,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
550 561
551 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { 562 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
552 opts->options |= OPTION_TS; 563 opts->options |= OPTION_TS;
553 opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; 564 opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
554 opts->tsecr = tp->rx_opt.ts_recent; 565 opts->tsecr = tp->rx_opt.ts_recent;
555 remaining -= TCPOLEN_TSTAMP_ALIGNED; 566 remaining -= TCPOLEN_TSTAMP_ALIGNED;
556 } 567 }
@@ -618,7 +629,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
618 } 629 }
619 if (likely(ireq->tstamp_ok)) { 630 if (likely(ireq->tstamp_ok)) {
620 opts->options |= OPTION_TS; 631 opts->options |= OPTION_TS;
621 opts->tsval = TCP_SKB_CB(skb)->when; 632 opts->tsval = tcp_skb_timestamp(skb);
622 opts->tsecr = req->ts_recent; 633 opts->tsecr = req->ts_recent;
623 remaining -= TCPOLEN_TSTAMP_ALIGNED; 634 remaining -= TCPOLEN_TSTAMP_ALIGNED;
624 } 635 }
@@ -647,7 +658,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
647 struct tcp_out_options *opts, 658 struct tcp_out_options *opts,
648 struct tcp_md5sig_key **md5) 659 struct tcp_md5sig_key **md5)
649{ 660{
650 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
651 struct tcp_sock *tp = tcp_sk(sk); 661 struct tcp_sock *tp = tcp_sk(sk);
652 unsigned int size = 0; 662 unsigned int size = 0;
653 unsigned int eff_sacks; 663 unsigned int eff_sacks;
@@ -666,7 +676,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
666 676
667 if (likely(tp->rx_opt.tstamp_ok)) { 677 if (likely(tp->rx_opt.tstamp_ok)) {
668 opts->options |= OPTION_TS; 678 opts->options |= OPTION_TS;
669 opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; 679 opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
670 opts->tsecr = tp->rx_opt.ts_recent; 680 opts->tsecr = tp->rx_opt.ts_recent;
671 size += TCPOLEN_TSTAMP_ALIGNED; 681 size += TCPOLEN_TSTAMP_ALIGNED;
672 } 682 }
@@ -800,7 +810,7 @@ void tcp_release_cb(struct sock *sk)
800 __sock_put(sk); 810 __sock_put(sk);
801 } 811 }
802 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { 812 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
803 sk->sk_prot->mtu_reduced(sk); 813 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
804 __sock_put(sk); 814 __sock_put(sk);
805 } 815 }
806} 816}
@@ -829,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb)
829{ 839{
830 struct sock *sk = skb->sk; 840 struct sock *sk = skb->sk;
831 struct tcp_sock *tp = tcp_sk(sk); 841 struct tcp_sock *tp = tcp_sk(sk);
842 int wmem;
843
844 /* Keep one reference on sk_wmem_alloc.
845 * Will be released by sk_free() from here or tcp_tasklet_func()
846 */
847 wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
848
849 /* If this softirq is serviced by ksoftirqd, we are likely under stress.
850 * Wait until our queues (qdisc + devices) are drained.
851 * This gives :
852 * - less callbacks to tcp_write_xmit(), reducing stress (batches)
853 * - chance for incoming ACK (processed by another cpu maybe)
854 * to migrate this flow (skb->ooo_okay will be eventually set)
855 */
856 if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
857 goto out;
832 858
833 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && 859 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
834 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { 860 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
835 unsigned long flags; 861 unsigned long flags;
836 struct tsq_tasklet *tsq; 862 struct tsq_tasklet *tsq;
837 863
838 /* Keep a ref on socket.
839 * This last ref will be released in tcp_tasklet_func()
840 */
841 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
842
843 /* queue this socket to tasklet queue */ 864 /* queue this socket to tasklet queue */
844 local_irq_save(flags); 865 local_irq_save(flags);
845 tsq = &__get_cpu_var(tsq_tasklet); 866 tsq = this_cpu_ptr(&tsq_tasklet);
846 list_add(&tp->tsq_node, &tsq->head); 867 list_add(&tp->tsq_node, &tsq->head);
847 tasklet_schedule(&tsq->tasklet); 868 tasklet_schedule(&tsq->tasklet);
848 local_irq_restore(flags); 869 local_irq_restore(flags);
849 } else { 870 return;
850 sock_wfree(skb);
851 } 871 }
872out:
873 sk_free(sk);
852} 874}
853 875
854/* This routine actually transmits TCP packets queued in by 876/* This routine actually transmits TCP packets queued in by
@@ -886,8 +908,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
886 skb = skb_clone(skb, gfp_mask); 908 skb = skb_clone(skb, gfp_mask);
887 if (unlikely(!skb)) 909 if (unlikely(!skb))
888 return -ENOBUFS; 910 return -ENOBUFS;
889 /* Our usage of tstamp should remain private */
890 skb->tstamp.tv64 = 0;
891 } 911 }
892 912
893 inet = inet_sk(sk); 913 inet = inet_sk(sk);
@@ -906,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
906 tcp_ca_event(sk, CA_EVENT_TX_START); 926 tcp_ca_event(sk, CA_EVENT_TX_START);
907 927
908 /* if no packet is in qdisc/device queue, then allow XPS to select 928 /* if no packet is in qdisc/device queue, then allow XPS to select
909 * another queue. 929 * another queue. We can be called from tcp_tsq_handler()
930 * which holds one reference to sk_wmem_alloc.
931 *
932 * TODO: Ideally, in-flight pure ACK packets should not matter here.
933 * One way to get this would be to set skb->truesize = 2 on them.
910 */ 934 */
911 skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; 935 skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
912 936
913 skb_push(skb, tcp_header_size); 937 skb_push(skb, tcp_header_size);
914 skb_reset_transport_header(skb); 938 skb_reset_transport_header(skb);
@@ -916,6 +940,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
916 skb_orphan(skb); 940 skb_orphan(skb);
917 skb->sk = sk; 941 skb->sk = sk;
918 skb->destructor = tcp_wfree; 942 skb->destructor = tcp_wfree;
943 skb_set_hash_from_sk(skb, sk);
919 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 944 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
920 945
921 /* Build TCP header and checksum it. */ 946 /* Build TCP header and checksum it. */
@@ -951,7 +976,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
951 976
952 tcp_options_write((__be32 *)(th + 1), tp, &opts); 977 tcp_options_write((__be32 *)(th + 1), tp, &opts);
953 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) 978 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
954 TCP_ECN_send(sk, skb, tcp_header_size); 979 tcp_ecn_send(sk, skb, tcp_header_size);
955 980
956#ifdef CONFIG_TCP_MD5SIG 981#ifdef CONFIG_TCP_MD5SIG
957 /* Calculate the MD5 hash, as we have all we need now */ 982 /* Calculate the MD5 hash, as we have all we need now */
@@ -974,11 +999,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
974 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 999 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
975 tcp_skb_pcount(skb)); 1000 tcp_skb_pcount(skb));
976 1001
1002 /* OK, its time to fill skb_shinfo(skb)->gso_segs */
1003 skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
1004
1005 /* Our usage of tstamp should remain private */
1006 skb->tstamp.tv64 = 0;
1007
1008 /* Cleanup our debris for IP stacks */
1009 memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
1010 sizeof(struct inet6_skb_parm)));
1011
977 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); 1012 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1013
978 if (likely(err <= 0)) 1014 if (likely(err <= 0))
979 return err; 1015 return err;
980 1016
981 tcp_enter_cwr(sk, 1); 1017 tcp_enter_cwr(sk);
982 1018
983 return net_xmit_eval(err); 1019 return net_xmit_eval(err);
984} 1020}
@@ -994,7 +1030,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
994 1030
995 /* Advance write_seq and place onto the write_queue. */ 1031 /* Advance write_seq and place onto the write_queue. */
996 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 1032 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
997 skb_header_release(skb); 1033 __skb_header_release(skb);
998 tcp_add_write_queue_tail(sk, skb); 1034 tcp_add_write_queue_tail(sk, skb);
999 sk->sk_wmem_queued += skb->truesize; 1035 sk->sk_wmem_queued += skb->truesize;
1000 sk_mem_charge(sk, skb->truesize); 1036 sk_mem_charge(sk, skb->truesize);
@@ -1013,11 +1049,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1013 /* Avoid the costly divide in the normal 1049 /* Avoid the costly divide in the normal
1014 * non-TSO case. 1050 * non-TSO case.
1015 */ 1051 */
1016 shinfo->gso_segs = 1; 1052 tcp_skb_pcount_set(skb, 1);
1017 shinfo->gso_size = 0; 1053 shinfo->gso_size = 0;
1018 shinfo->gso_type = 0; 1054 shinfo->gso_type = 0;
1019 } else { 1055 } else {
1020 shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); 1056 tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
1021 shinfo->gso_size = mss_now; 1057 shinfo->gso_size = mss_now;
1022 shinfo->gso_type = sk->sk_gso_type; 1058 shinfo->gso_type = sk->sk_gso_type;
1023 } 1059 }
@@ -1068,6 +1104,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
1068 tcp_verify_left_out(tp); 1104 tcp_verify_left_out(tp);
1069} 1105}
1070 1106
1107static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1108{
1109 struct skb_shared_info *shinfo = skb_shinfo(skb);
1110
1111 if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
1112 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1113 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1114 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1115
1116 shinfo->tx_flags &= ~tsflags;
1117 shinfo2->tx_flags |= tsflags;
1118 swap(shinfo->tskey, shinfo2->tskey);
1119 }
1120}
1121
1071/* Function to create two new TCP segments. Shrinks the given segment 1122/* Function to create two new TCP segments. Shrinks the given segment
1072 * to the specified size and appends a new segment with the rest of the 1123 * to the specified size and appends a new segment with the rest of the
1073 * packet to the list. This won't be called frequently, I hope. 1124 * packet to the list. This won't be called frequently, I hope.
@@ -1130,11 +1181,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1130 1181
1131 buff->ip_summed = skb->ip_summed; 1182 buff->ip_summed = skb->ip_summed;
1132 1183
1133 /* Looks stupid, but our code really uses when of
1134 * skbs, which it never sent before. --ANK
1135 */
1136 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1137 buff->tstamp = skb->tstamp; 1184 buff->tstamp = skb->tstamp;
1185 tcp_fragment_tstamp(skb, buff);
1138 1186
1139 old_factor = tcp_skb_pcount(skb); 1187 old_factor = tcp_skb_pcount(skb);
1140 1188
@@ -1154,7 +1202,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1154 } 1202 }
1155 1203
1156 /* Link BUFF into the send queue. */ 1204 /* Link BUFF into the send queue. */
1157 skb_header_release(buff); 1205 __skb_header_release(buff);
1158 tcp_insert_write_queue_after(skb, buff, sk); 1206 tcp_insert_write_queue_after(skb, buff, sk);
1159 1207
1160 return 0; 1208 return 0;
@@ -1651,13 +1699,14 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1651 1699
1652 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; 1700 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1653 skb_split(skb, buff, len); 1701 skb_split(skb, buff, len);
1702 tcp_fragment_tstamp(skb, buff);
1654 1703
1655 /* Fix up tso_factor for both original and new SKB. */ 1704 /* Fix up tso_factor for both original and new SKB. */
1656 tcp_set_skb_tso_segs(sk, skb, mss_now); 1705 tcp_set_skb_tso_segs(sk, skb, mss_now);
1657 tcp_set_skb_tso_segs(sk, buff, mss_now); 1706 tcp_set_skb_tso_segs(sk, buff, mss_now);
1658 1707
1659 /* Link BUFF into the send queue. */ 1708 /* Link BUFF into the send queue. */
1660 skb_header_release(buff); 1709 __skb_header_release(buff);
1661 tcp_insert_write_queue_after(skb, buff, sk); 1710 tcp_insert_write_queue_after(skb, buff, sk);
1662 1711
1663 return 0; 1712 return 0;
@@ -1856,8 +1905,8 @@ static int tcp_mtu_probe(struct sock *sk)
1856 tcp_init_tso_segs(sk, nskb, nskb->len); 1905 tcp_init_tso_segs(sk, nskb, nskb->len);
1857 1906
1858 /* We're ready to send. If this fails, the probe will 1907 /* We're ready to send. If this fails, the probe will
1859 * be resegmented into mss-sized pieces by tcp_write_xmit(). */ 1908 * be resegmented into mss-sized pieces by tcp_write_xmit().
1860 TCP_SKB_CB(nskb)->when = tcp_time_stamp; 1909 */
1861 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { 1910 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1862 /* Decrement cwnd here because we are sending 1911 /* Decrement cwnd here because we are sending
1863 * effectively two packets. */ 1912 * effectively two packets. */
@@ -1916,8 +1965,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1916 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1965 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1917 BUG_ON(!tso_segs); 1966 BUG_ON(!tso_segs);
1918 1967
1919 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) 1968 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
1969 /* "skb_mstamp" is used as a start point for the retransmit timer */
1970 skb_mstamp_get(&skb->skb_mstamp);
1920 goto repair; /* Skip network transmission */ 1971 goto repair; /* Skip network transmission */
1972 }
1921 1973
1922 cwnd_quota = tcp_cwnd_test(tp, skb); 1974 cwnd_quota = tcp_cwnd_test(tp, skb);
1923 if (!cwnd_quota) { 1975 if (!cwnd_quota) {
@@ -1979,8 +2031,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1979 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 2031 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1980 break; 2032 break;
1981 2033
1982 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1983
1984 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) 2034 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1985 break; 2035 break;
1986 2036
@@ -2076,10 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2076static bool skb_still_in_host_queue(const struct sock *sk, 2126static bool skb_still_in_host_queue(const struct sock *sk,
2077 const struct sk_buff *skb) 2127 const struct sk_buff *skb)
2078{ 2128{
2079 const struct sk_buff *fclone = skb + 1; 2129 if (unlikely(skb_fclone_busy(skb))) {
2080
2081 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
2082 fclone->fclone == SKB_FCLONE_CLONE)) {
2083 NET_INC_STATS_BH(sock_net(sk), 2130 NET_INC_STATS_BH(sock_net(sk),
2084 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); 2131 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2085 return true; 2132 return true;
@@ -2478,7 +2525,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2478 /* Make a copy, if the first transmission SKB clone we made 2525 /* Make a copy, if the first transmission SKB clone we made
2479 * is still in somebody's hands, else make a clone. 2526 * is still in somebody's hands, else make a clone.
2480 */ 2527 */
2481 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2482 2528
2483 /* make sure skb->data is aligned on arches that require it 2529 /* make sure skb->data is aligned on arches that require it
2484 * and check if ack-trimming & collapsing extended the headroom 2530 * and check if ack-trimming & collapsing extended the headroom
@@ -2523,7 +2569,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2523 2569
2524 /* Save stamp of the first retransmit. */ 2570 /* Save stamp of the first retransmit. */
2525 if (!tp->retrans_stamp) 2571 if (!tp->retrans_stamp)
2526 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2572 tp->retrans_stamp = tcp_skb_timestamp(skb);
2527 2573
2528 /* snd_nxt is stored to detect loss of retransmitted segment, 2574 /* snd_nxt is stored to detect loss of retransmitted segment,
2529 * see tcp_input.c tcp_sacktag_write_queue(). 2575 * see tcp_input.c tcp_sacktag_write_queue().
@@ -2731,7 +2777,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2731 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2777 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2732 TCPHDR_ACK | TCPHDR_RST); 2778 TCPHDR_ACK | TCPHDR_RST);
2733 /* Send it off. */ 2779 /* Send it off. */
2734 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2735 if (tcp_transmit_skb(sk, skb, 0, priority)) 2780 if (tcp_transmit_skb(sk, skb, 0, priority))
2736 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 2781 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2737 2782
@@ -2759,7 +2804,7 @@ int tcp_send_synack(struct sock *sk)
2759 if (nskb == NULL) 2804 if (nskb == NULL)
2760 return -ENOMEM; 2805 return -ENOMEM;
2761 tcp_unlink_write_queue(skb, sk); 2806 tcp_unlink_write_queue(skb, sk);
2762 skb_header_release(nskb); 2807 __skb_header_release(nskb);
2763 __tcp_add_write_queue_head(sk, nskb); 2808 __tcp_add_write_queue_head(sk, nskb);
2764 sk_wmem_free_skb(sk, skb); 2809 sk_wmem_free_skb(sk, skb);
2765 sk->sk_wmem_queued += nskb->truesize; 2810 sk->sk_wmem_queued += nskb->truesize;
@@ -2768,9 +2813,8 @@ int tcp_send_synack(struct sock *sk)
2768 } 2813 }
2769 2814
2770 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; 2815 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2771 TCP_ECN_send_synack(tcp_sk(sk), skb); 2816 tcp_ecn_send_synack(sk, skb);
2772 } 2817 }
2773 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2774 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2818 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2775} 2819}
2776 2820
@@ -2814,10 +2858,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2814 memset(&opts, 0, sizeof(opts)); 2858 memset(&opts, 0, sizeof(opts));
2815#ifdef CONFIG_SYN_COOKIES 2859#ifdef CONFIG_SYN_COOKIES
2816 if (unlikely(req->cookie_ts)) 2860 if (unlikely(req->cookie_ts))
2817 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); 2861 skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
2818 else 2862 else
2819#endif 2863#endif
2820 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2864 skb_mstamp_get(&skb->skb_mstamp);
2821 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, 2865 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2822 foc) + sizeof(*th); 2866 foc) + sizeof(*th);
2823 2867
@@ -2828,7 +2872,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2828 memset(th, 0, sizeof(struct tcphdr)); 2872 memset(th, 0, sizeof(struct tcphdr));
2829 th->syn = 1; 2873 th->syn = 1;
2830 th->ack = 1; 2874 th->ack = 1;
2831 TCP_ECN_make_synack(req, th); 2875 tcp_ecn_make_synack(req, th, sk);
2832 th->source = htons(ireq->ir_num); 2876 th->source = htons(ireq->ir_num);
2833 th->dest = ireq->ir_rmt_port; 2877 th->dest = ireq->ir_rmt_port;
2834 /* Setting of flags are superfluous here for callers (and ECE is 2878 /* Setting of flags are superfluous here for callers (and ECE is
@@ -2935,7 +2979,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2935 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 2979 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2936 2980
2937 tcb->end_seq += skb->len; 2981 tcb->end_seq += skb->len;
2938 skb_header_release(skb); 2982 __skb_header_release(skb);
2939 __tcp_add_write_queue_tail(sk, skb); 2983 __tcp_add_write_queue_tail(sk, skb);
2940 sk->sk_wmem_queued += skb->truesize; 2984 sk->sk_wmem_queued += skb->truesize;
2941 sk_mem_charge(sk, skb->truesize); 2985 sk_mem_charge(sk, skb->truesize);
@@ -3065,9 +3109,9 @@ int tcp_connect(struct sock *sk)
3065 skb_reserve(buff, MAX_TCP_HEADER); 3109 skb_reserve(buff, MAX_TCP_HEADER);
3066 3110
3067 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 3111 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3068 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; 3112 tp->retrans_stamp = tcp_time_stamp;
3069 tcp_connect_queue_skb(sk, buff); 3113 tcp_connect_queue_skb(sk, buff);
3070 TCP_ECN_send_syn(sk, buff); 3114 tcp_ecn_send_syn(sk, buff);
3071 3115
3072 /* Send off SYN; include data in Fast Open. */ 3116 /* Send off SYN; include data in Fast Open. */
3073 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 3117 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3099,6 +3143,8 @@ void tcp_send_delayed_ack(struct sock *sk)
3099 int ato = icsk->icsk_ack.ato; 3143 int ato = icsk->icsk_ack.ato;
3100 unsigned long timeout; 3144 unsigned long timeout;
3101 3145
3146 tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
3147
3102 if (ato > TCP_DELACK_MIN) { 3148 if (ato > TCP_DELACK_MIN) {
3103 const struct tcp_sock *tp = tcp_sk(sk); 3149 const struct tcp_sock *tp = tcp_sk(sk);
3104 int max_ato = HZ / 2; 3150 int max_ato = HZ / 2;
@@ -3155,6 +3201,8 @@ void tcp_send_ack(struct sock *sk)
3155 if (sk->sk_state == TCP_CLOSE) 3201 if (sk->sk_state == TCP_CLOSE)
3156 return; 3202 return;
3157 3203
3204 tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
3205
3158 /* We are not putting this on the write queue, so 3206 /* We are not putting this on the write queue, so
3159 * tcp_transmit_skb() will set the ownership to this 3207 * tcp_transmit_skb() will set the ownership to this
3160 * sock. 3208 * sock.
@@ -3173,9 +3221,10 @@ void tcp_send_ack(struct sock *sk)
3173 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); 3221 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
3174 3222
3175 /* Send it off, this clears delayed acks for us. */ 3223 /* Send it off, this clears delayed acks for us. */
3176 TCP_SKB_CB(buff)->when = tcp_time_stamp; 3224 skb_mstamp_get(&buff->skb_mstamp);
3177 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); 3225 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3178} 3226}
3227EXPORT_SYMBOL_GPL(tcp_send_ack);
3179 3228
3180/* This routine sends a packet with an out of date sequence 3229/* This routine sends a packet with an out of date sequence
3181 * number. It assumes the other end will try to ack it. 3230 * number. It assumes the other end will try to ack it.
@@ -3205,7 +3254,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3205 * send it. 3254 * send it.
3206 */ 3255 */
3207 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); 3256 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
3208 TCP_SKB_CB(skb)->when = tcp_time_stamp; 3257 skb_mstamp_get(&skb->skb_mstamp);
3209 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 3258 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3210} 3259}
3211 3260
@@ -3249,7 +3298,6 @@ int tcp_write_wakeup(struct sock *sk)
3249 tcp_set_skb_tso_segs(sk, skb, mss); 3298 tcp_set_skb_tso_segs(sk, skb, mss);
3250 3299
3251 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 3300 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
3252 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3253 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 3301 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3254 if (!err) 3302 if (!err)
3255 tcp_event_new_data_sent(sk, skb); 3303 tcp_event_new_data_sent(sk, skb);
@@ -3268,6 +3316,7 @@ void tcp_send_probe0(struct sock *sk)
3268{ 3316{
3269 struct inet_connection_sock *icsk = inet_csk(sk); 3317 struct inet_connection_sock *icsk = inet_csk(sk);
3270 struct tcp_sock *tp = tcp_sk(sk); 3318 struct tcp_sock *tp = tcp_sk(sk);
3319 unsigned long probe_max;
3271 int err; 3320 int err;
3272 3321
3273 err = tcp_write_wakeup(sk); 3322 err = tcp_write_wakeup(sk);
@@ -3283,9 +3332,7 @@ void tcp_send_probe0(struct sock *sk)
3283 if (icsk->icsk_backoff < sysctl_tcp_retries2) 3332 if (icsk->icsk_backoff < sysctl_tcp_retries2)
3284 icsk->icsk_backoff++; 3333 icsk->icsk_backoff++;
3285 icsk->icsk_probes_out++; 3334 icsk->icsk_probes_out++;
3286 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3335 probe_max = TCP_RTO_MAX;
3287 min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
3288 TCP_RTO_MAX);
3289 } else { 3336 } else {
3290 /* If packet was not sent due to local congestion, 3337 /* If packet was not sent due to local congestion,
3291 * do not backoff and do not remember icsk_probes_out. 3338 * do not backoff and do not remember icsk_probes_out.
@@ -3295,9 +3342,24 @@ void tcp_send_probe0(struct sock *sk)
3295 */ 3342 */
3296 if (!icsk->icsk_probes_out) 3343 if (!icsk->icsk_probes_out)
3297 icsk->icsk_probes_out = 1; 3344 icsk->icsk_probes_out = 1;
3298 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 3345 probe_max = TCP_RESOURCE_PROBE_INTERVAL;
3299 min(icsk->icsk_rto << icsk->icsk_backoff, 3346 }
3300 TCP_RESOURCE_PROBE_INTERVAL), 3347 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3301 TCP_RTO_MAX); 3348 inet_csk_rto_backoff(icsk, probe_max),
3349 TCP_RTO_MAX);
3350}
3351
3352int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
3353{
3354 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
3355 struct flowi fl;
3356 int res;
3357
3358 res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
3359 if (!res) {
3360 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
3361 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3302 } 3362 }
3363 return res;
3303} 3364}
3365EXPORT_SYMBOL(tcp_rtx_synack);