diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 189 |
1 files changed, 120 insertions, 69 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7820f3a7dd70..17a11e65e57f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | |||
363 | */ | 363 | */ |
364 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | 364 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) |
365 | { | 365 | { |
366 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
367 | |||
366 | skb->ip_summed = CHECKSUM_PARTIAL; | 368 | skb->ip_summed = CHECKSUM_PARTIAL; |
367 | skb->csum = 0; | 369 | skb->csum = 0; |
368 | 370 | ||
369 | TCP_SKB_CB(skb)->tcp_flags = flags; | 371 | TCP_SKB_CB(skb)->tcp_flags = flags; |
370 | TCP_SKB_CB(skb)->sacked = 0; | 372 | TCP_SKB_CB(skb)->sacked = 0; |
371 | 373 | ||
372 | skb_shinfo(skb)->gso_segs = 1; | 374 | shinfo->gso_segs = 1; |
373 | skb_shinfo(skb)->gso_size = 0; | 375 | shinfo->gso_size = 0; |
374 | skb_shinfo(skb)->gso_type = 0; | 376 | shinfo->gso_type = 0; |
375 | 377 | ||
376 | TCP_SKB_CB(skb)->seq = seq; | 378 | TCP_SKB_CB(skb)->seq = seq; |
377 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) | 379 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
@@ -406,7 +408,7 @@ struct tcp_out_options { | |||
406 | * Beware: Something in the Internet is very sensitive to the ordering of | 408 | * Beware: Something in the Internet is very sensitive to the ordering of |
407 | * TCP options, we learned this through the hard way, so be careful here. | 409 | * TCP options, we learned this through the hard way, so be careful here. |
408 | * Luckily we can at least blame others for their non-compliance but from | 410 | * Luckily we can at least blame others for their non-compliance but from |
409 | * inter-operatibility perspective it seems that we're somewhat stuck with | 411 | * inter-operability perspective it seems that we're somewhat stuck with |
410 | * the ordering which we have been using if we want to keep working with | 412 | * the ordering which we have been using if we want to keep working with |
411 | * those broken things (not that it currently hurts anybody as there isn't | 413 | * those broken things (not that it currently hurts anybody as there isn't |
412 | * particular reason why the ordering would need to be changed). | 414 | * particular reason why the ordering would need to be changed). |
@@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
679 | * | 681 | * |
680 | * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb | 682 | * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb |
681 | * needs to be reallocated in a driver. | 683 | * needs to be reallocated in a driver. |
682 | * The invariant being skb->truesize substracted from sk->sk_wmem_alloc | 684 | * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc |
683 | * | 685 | * |
684 | * Since transmit from skb destructor is forbidden, we use a tasklet | 686 | * Since transmit from skb destructor is forbidden, we use a tasklet |
685 | * to process all sockets that eventually need to send more skbs. | 687 | * to process all sockets that eventually need to send more skbs. |
@@ -696,12 +698,13 @@ static void tcp_tsq_handler(struct sock *sk) | |||
696 | if ((1 << sk->sk_state) & | 698 | if ((1 << sk->sk_state) & |
697 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | 699 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
698 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | 700 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) |
699 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | 701 | tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, |
702 | 0, GFP_ATOMIC); | ||
700 | } | 703 | } |
701 | /* | 704 | /* |
702 | * One tasklest per cpu tries to send more skbs. | 705 | * One tasklet per cpu tries to send more skbs. |
703 | * We run in tasklet context but need to disable irqs when | 706 | * We run in tasklet context but need to disable irqs when |
704 | * transfering tsq->head because tcp_wfree() might | 707 | * transferring tsq->head because tcp_wfree() might |
705 | * interrupt us (non NAPI drivers) | 708 | * interrupt us (non NAPI drivers) |
706 | */ | 709 | */ |
707 | static void tcp_tasklet_func(unsigned long data) | 710 | static void tcp_tasklet_func(unsigned long data) |
@@ -764,6 +767,17 @@ void tcp_release_cb(struct sock *sk) | |||
764 | if (flags & (1UL << TCP_TSQ_DEFERRED)) | 767 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
765 | tcp_tsq_handler(sk); | 768 | tcp_tsq_handler(sk); |
766 | 769 | ||
770 | /* Here begins the tricky part : | ||
771 | * We are called from release_sock() with : | ||
772 | * 1) BH disabled | ||
773 | * 2) sk_lock.slock spinlock held | ||
774 | * 3) socket owned by us (sk->sk_lock.owned == 1) | ||
775 | * | ||
776 | * But following code is meant to be called from BH handlers, | ||
777 | * so we should keep BH disabled, but early release socket ownership | ||
778 | */ | ||
779 | sock_release_ownership(sk); | ||
780 | |||
767 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { | 781 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { |
768 | tcp_write_timer_handler(sk); | 782 | tcp_write_timer_handler(sk); |
769 | __sock_put(sk); | 783 | __sock_put(sk); |
@@ -795,7 +809,7 @@ void __init tcp_tasklet_init(void) | |||
795 | 809 | ||
796 | /* | 810 | /* |
797 | * Write buffer destructor automatically called from kfree_skb. | 811 | * Write buffer destructor automatically called from kfree_skb. |
798 | * We cant xmit new skbs from this context, as we might already | 812 | * We can't xmit new skbs from this context, as we might already |
799 | * hold qdisc lock. | 813 | * hold qdisc lock. |
800 | */ | 814 | */ |
801 | void tcp_wfree(struct sk_buff *skb) | 815 | void tcp_wfree(struct sk_buff *skb) |
@@ -861,8 +875,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
861 | 875 | ||
862 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 876 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
863 | fclone->fclone == SKB_FCLONE_CLONE)) | 877 | fclone->fclone == SKB_FCLONE_CLONE)) |
864 | NET_INC_STATS_BH(sock_net(sk), | 878 | NET_INC_STATS(sock_net(sk), |
865 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | 879 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
866 | 880 | ||
867 | if (unlikely(skb_cloned(skb))) | 881 | if (unlikely(skb_cloned(skb))) |
868 | skb = pskb_copy(skb, gfp_mask); | 882 | skb = pskb_copy(skb, gfp_mask); |
@@ -986,6 +1000,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
986 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | 1000 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, |
987 | unsigned int mss_now) | 1001 | unsigned int mss_now) |
988 | { | 1002 | { |
1003 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
1004 | |||
989 | /* Make sure we own this skb before messing gso_size/gso_segs */ | 1005 | /* Make sure we own this skb before messing gso_size/gso_segs */ |
990 | WARN_ON_ONCE(skb_cloned(skb)); | 1006 | WARN_ON_ONCE(skb_cloned(skb)); |
991 | 1007 | ||
@@ -993,13 +1009,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
993 | /* Avoid the costly divide in the normal | 1009 | /* Avoid the costly divide in the normal |
994 | * non-TSO case. | 1010 | * non-TSO case. |
995 | */ | 1011 | */ |
996 | skb_shinfo(skb)->gso_segs = 1; | 1012 | shinfo->gso_segs = 1; |
997 | skb_shinfo(skb)->gso_size = 0; | 1013 | shinfo->gso_size = 0; |
998 | skb_shinfo(skb)->gso_type = 0; | 1014 | shinfo->gso_type = 0; |
999 | } else { | 1015 | } else { |
1000 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); | 1016 | shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); |
1001 | skb_shinfo(skb)->gso_size = mss_now; | 1017 | shinfo->gso_size = mss_now; |
1002 | skb_shinfo(skb)->gso_type = sk->sk_gso_type; | 1018 | shinfo->gso_type = sk->sk_gso_type; |
1003 | } | 1019 | } |
1004 | } | 1020 | } |
1005 | 1021 | ||
@@ -1146,6 +1162,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1146 | */ | 1162 | */ |
1147 | static void __pskb_trim_head(struct sk_buff *skb, int len) | 1163 | static void __pskb_trim_head(struct sk_buff *skb, int len) |
1148 | { | 1164 | { |
1165 | struct skb_shared_info *shinfo; | ||
1149 | int i, k, eat; | 1166 | int i, k, eat; |
1150 | 1167 | ||
1151 | eat = min_t(int, len, skb_headlen(skb)); | 1168 | eat = min_t(int, len, skb_headlen(skb)); |
@@ -1157,23 +1174,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
1157 | } | 1174 | } |
1158 | eat = len; | 1175 | eat = len; |
1159 | k = 0; | 1176 | k = 0; |
1160 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 1177 | shinfo = skb_shinfo(skb); |
1161 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | 1178 | for (i = 0; i < shinfo->nr_frags; i++) { |
1179 | int size = skb_frag_size(&shinfo->frags[i]); | ||
1162 | 1180 | ||
1163 | if (size <= eat) { | 1181 | if (size <= eat) { |
1164 | skb_frag_unref(skb, i); | 1182 | skb_frag_unref(skb, i); |
1165 | eat -= size; | 1183 | eat -= size; |
1166 | } else { | 1184 | } else { |
1167 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; | 1185 | shinfo->frags[k] = shinfo->frags[i]; |
1168 | if (eat) { | 1186 | if (eat) { |
1169 | skb_shinfo(skb)->frags[k].page_offset += eat; | 1187 | shinfo->frags[k].page_offset += eat; |
1170 | skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); | 1188 | skb_frag_size_sub(&shinfo->frags[k], eat); |
1171 | eat = 0; | 1189 | eat = 0; |
1172 | } | 1190 | } |
1173 | k++; | 1191 | k++; |
1174 | } | 1192 | } |
1175 | } | 1193 | } |
1176 | skb_shinfo(skb)->nr_frags = k; | 1194 | shinfo->nr_frags = k; |
1177 | 1195 | ||
1178 | skb_reset_tail_pointer(skb); | 1196 | skb_reset_tail_pointer(skb); |
1179 | skb->data_len -= len; | 1197 | skb->data_len -= len; |
@@ -1378,23 +1396,51 @@ static void tcp_cwnd_validate(struct sock *sk) | |||
1378 | } | 1396 | } |
1379 | } | 1397 | } |
1380 | 1398 | ||
1381 | /* Returns the portion of skb which can be sent right away without | 1399 | /* Minshall's variant of the Nagle send check. */ |
1382 | * introducing MSS oddities to segment boundaries. In rare cases where | 1400 | static bool tcp_minshall_check(const struct tcp_sock *tp) |
1383 | * mss_now != mss_cache, we will request caller to create a small skb | 1401 | { |
1384 | * per input skb which could be mostly avoided here (if desired). | 1402 | return after(tp->snd_sml, tp->snd_una) && |
1385 | * | 1403 | !after(tp->snd_sml, tp->snd_nxt); |
1386 | * We explicitly want to create a request for splitting write queue tail | 1404 | } |
1387 | * to a small skb for Nagle purposes while avoiding unnecessary modulos, | 1405 | |
1388 | * thus all the complexity (cwnd_len is always MSS multiple which we | 1406 | /* Update snd_sml if this skb is under mss |
1389 | * return whenever allowed by the other factors). Basically we need the | 1407 | * Note that a TSO packet might end with a sub-mss segment |
1390 | * modulo only when the receiver window alone is the limiting factor or | 1408 | * The test is really : |
1391 | * when we would be allowed to send the split-due-to-Nagle skb fully. | 1409 | * if ((skb->len % mss) != 0) |
1410 | * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; | ||
1411 | * But we can avoid doing the divide again given we already have | ||
1412 | * skb_pcount = skb->len / mss_now | ||
1413 | */ | ||
1414 | static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, | ||
1415 | const struct sk_buff *skb) | ||
1416 | { | ||
1417 | if (skb->len < tcp_skb_pcount(skb) * mss_now) | ||
1418 | tp->snd_sml = TCP_SKB_CB(skb)->end_seq; | ||
1419 | } | ||
1420 | |||
1421 | /* Return false, if packet can be sent now without violation Nagle's rules: | ||
1422 | * 1. It is full sized. (provided by caller in %partial bool) | ||
1423 | * 2. Or it contains FIN. (already checked by caller) | ||
1424 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. | ||
1425 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. | ||
1426 | * With Minshall's modification: all sent small packets are ACKed. | ||
1392 | */ | 1427 | */ |
1393 | static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, | 1428 | static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, |
1394 | unsigned int mss_now, unsigned int max_segs) | 1429 | unsigned int mss_now, int nonagle) |
1430 | { | ||
1431 | return partial && | ||
1432 | ((nonagle & TCP_NAGLE_CORK) || | ||
1433 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | ||
1434 | } | ||
1435 | /* Returns the portion of skb which can be sent right away */ | ||
1436 | static unsigned int tcp_mss_split_point(const struct sock *sk, | ||
1437 | const struct sk_buff *skb, | ||
1438 | unsigned int mss_now, | ||
1439 | unsigned int max_segs, | ||
1440 | int nonagle) | ||
1395 | { | 1441 | { |
1396 | const struct tcp_sock *tp = tcp_sk(sk); | 1442 | const struct tcp_sock *tp = tcp_sk(sk); |
1397 | u32 needed, window, max_len; | 1443 | u32 partial, needed, window, max_len; |
1398 | 1444 | ||
1399 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 1445 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
1400 | max_len = mss_now * max_segs; | 1446 | max_len = mss_now * max_segs; |
@@ -1407,7 +1453,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b | |||
1407 | if (max_len <= needed) | 1453 | if (max_len <= needed) |
1408 | return max_len; | 1454 | return max_len; |
1409 | 1455 | ||
1410 | return needed - needed % mss_now; | 1456 | partial = needed % mss_now; |
1457 | /* If last segment is not a full MSS, check if Nagle rules allow us | ||
1458 | * to include this last segment in this skb. | ||
1459 | * Otherwise, we'll split the skb at last MSS boundary | ||
1460 | */ | ||
1461 | if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) | ||
1462 | return needed - partial; | ||
1463 | |||
1464 | return needed; | ||
1411 | } | 1465 | } |
1412 | 1466 | ||
1413 | /* Can at least one segment of SKB be sent right now, according to the | 1467 | /* Can at least one segment of SKB be sent right now, according to the |
@@ -1447,28 +1501,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
1447 | return tso_segs; | 1501 | return tso_segs; |
1448 | } | 1502 | } |
1449 | 1503 | ||
1450 | /* Minshall's variant of the Nagle send check. */ | ||
1451 | static inline bool tcp_minshall_check(const struct tcp_sock *tp) | ||
1452 | { | ||
1453 | return after(tp->snd_sml, tp->snd_una) && | ||
1454 | !after(tp->snd_sml, tp->snd_nxt); | ||
1455 | } | ||
1456 | |||
1457 | /* Return false, if packet can be sent now without violation Nagle's rules: | ||
1458 | * 1. It is full sized. | ||
1459 | * 2. Or it contains FIN. (already checked by caller) | ||
1460 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. | ||
1461 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. | ||
1462 | * With Minshall's modification: all sent small packets are ACKed. | ||
1463 | */ | ||
1464 | static inline bool tcp_nagle_check(const struct tcp_sock *tp, | ||
1465 | const struct sk_buff *skb, | ||
1466 | unsigned int mss_now, int nonagle) | ||
1467 | { | ||
1468 | return skb->len < mss_now && | ||
1469 | ((nonagle & TCP_NAGLE_CORK) || | ||
1470 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | ||
1471 | } | ||
1472 | 1504 | ||
1473 | /* Return true if the Nagle test allows this packet to be | 1505 | /* Return true if the Nagle test allows this packet to be |
1474 | * sent now. | 1506 | * sent now. |
@@ -1489,7 +1521,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf | |||
1489 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) | 1521 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
1490 | return true; | 1522 | return true; |
1491 | 1523 | ||
1492 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1524 | if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) |
1493 | return true; | 1525 | return true; |
1494 | 1526 | ||
1495 | return false; | 1527 | return false; |
@@ -1884,7 +1916,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1884 | 1916 | ||
1885 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 1917 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
1886 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1918 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
1887 | break; | 1919 | /* It is possible TX completion already happened |
1920 | * before we set TSQ_THROTTLED, so we must | ||
1921 | * test again the condition. | ||
1922 | * We abuse smp_mb__after_clear_bit() because | ||
1923 | * there is no smp_mb__after_set_bit() yet | ||
1924 | */ | ||
1925 | smp_mb__after_clear_bit(); | ||
1926 | if (atomic_read(&sk->sk_wmem_alloc) > limit) | ||
1927 | break; | ||
1888 | } | 1928 | } |
1889 | 1929 | ||
1890 | limit = mss_now; | 1930 | limit = mss_now; |
@@ -1892,7 +1932,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1892 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1932 | limit = tcp_mss_split_point(sk, skb, mss_now, |
1893 | min_t(unsigned int, | 1933 | min_t(unsigned int, |
1894 | cwnd_quota, | 1934 | cwnd_quota, |
1895 | sk->sk_gso_max_segs)); | 1935 | sk->sk_gso_max_segs), |
1936 | nonagle); | ||
1896 | 1937 | ||
1897 | if (skb->len > limit && | 1938 | if (skb->len > limit && |
1898 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 1939 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
@@ -1956,7 +1997,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
1956 | /* Schedule a loss probe in 2*RTT for SACK capable connections | 1997 | /* Schedule a loss probe in 2*RTT for SACK capable connections |
1957 | * in Open state, that are either limited by cwnd or application. | 1998 | * in Open state, that are either limited by cwnd or application. |
1958 | */ | 1999 | */ |
1959 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | 2000 | if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || |
1960 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2001 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
1961 | return false; | 2002 | return false; |
1962 | 2003 | ||
@@ -2307,6 +2348,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2307 | struct tcp_sock *tp = tcp_sk(sk); | 2348 | struct tcp_sock *tp = tcp_sk(sk); |
2308 | struct inet_connection_sock *icsk = inet_csk(sk); | 2349 | struct inet_connection_sock *icsk = inet_csk(sk); |
2309 | unsigned int cur_mss; | 2350 | unsigned int cur_mss; |
2351 | int err; | ||
2310 | 2352 | ||
2311 | /* Inconslusive MTU probe */ | 2353 | /* Inconslusive MTU probe */ |
2312 | if (icsk->icsk_mtup.probe_size) { | 2354 | if (icsk->icsk_mtup.probe_size) { |
@@ -2370,11 +2412,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2370 | skb_headroom(skb) >= 0xFFFF)) { | 2412 | skb_headroom(skb) >= 0xFFFF)) { |
2371 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, | 2413 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, |
2372 | GFP_ATOMIC); | 2414 | GFP_ATOMIC); |
2373 | return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | 2415 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : |
2374 | -ENOBUFS; | 2416 | -ENOBUFS; |
2375 | } else { | 2417 | } else { |
2376 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2418 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2377 | } | 2419 | } |
2420 | |||
2421 | if (likely(!err)) | ||
2422 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | ||
2423 | return err; | ||
2378 | } | 2424 | } |
2379 | 2425 | ||
2380 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | 2426 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) |
@@ -2756,7 +2802,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2756 | EXPORT_SYMBOL(tcp_make_synack); | 2802 | EXPORT_SYMBOL(tcp_make_synack); |
2757 | 2803 | ||
2758 | /* Do all connect socket setups that can be done AF independent. */ | 2804 | /* Do all connect socket setups that can be done AF independent. */ |
2759 | void tcp_connect_init(struct sock *sk) | 2805 | static void tcp_connect_init(struct sock *sk) |
2760 | { | 2806 | { |
2761 | const struct dst_entry *dst = __sk_dst_get(sk); | 2807 | const struct dst_entry *dst = __sk_dst_get(sk); |
2762 | struct tcp_sock *tp = tcp_sk(sk); | 2808 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2878,7 +2924,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
2878 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - | 2924 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
2879 | MAX_TCP_OPTION_SPACE; | 2925 | MAX_TCP_OPTION_SPACE; |
2880 | 2926 | ||
2881 | syn_data = skb_copy_expand(syn, skb_headroom(syn), space, | 2927 | space = min_t(size_t, space, fo->size); |
2928 | |||
2929 | /* limit to order-0 allocations */ | ||
2930 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); | ||
2931 | |||
2932 | syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, | ||
2882 | sk->sk_allocation); | 2933 | sk->sk_allocation); |
2883 | if (syn_data == NULL) | 2934 | if (syn_data == NULL) |
2884 | goto fallback; | 2935 | goto fallback; |