diff options
| author | H. Peter Anvin <hpa@linux.intel.com> | 2014-02-07 14:27:30 -0500 |
|---|---|---|
| committer | H. Peter Anvin <hpa@linux.intel.com> | 2014-02-07 14:27:30 -0500 |
| commit | a3b072cd180c12e8fe0ece9487b9065808327640 (patch) | |
| tree | 62b982041be84748852d77cdf6ca5639ef40858f /net/ipv4/tcp_output.c | |
| parent | 75a1ba5b2c529db60ca49626bcaf0bddf4548438 (diff) | |
| parent | 081cd62a010f97b5bc1d2b0cd123c5abc692b68a (diff) | |
Merge tag 'efi-urgent' into x86/urgent
* Avoid WARN_ON() when mapping BGRT on Baytrail (EFI 32-bit).
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 141 |
1 files changed, 81 insertions, 60 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7820f3a7dd70..03d26b85eab8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | |||
| 363 | */ | 363 | */ |
| 364 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | 364 | static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) |
| 365 | { | 365 | { |
| 366 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
| 367 | |||
| 366 | skb->ip_summed = CHECKSUM_PARTIAL; | 368 | skb->ip_summed = CHECKSUM_PARTIAL; |
| 367 | skb->csum = 0; | 369 | skb->csum = 0; |
| 368 | 370 | ||
| 369 | TCP_SKB_CB(skb)->tcp_flags = flags; | 371 | TCP_SKB_CB(skb)->tcp_flags = flags; |
| 370 | TCP_SKB_CB(skb)->sacked = 0; | 372 | TCP_SKB_CB(skb)->sacked = 0; |
| 371 | 373 | ||
| 372 | skb_shinfo(skb)->gso_segs = 1; | 374 | shinfo->gso_segs = 1; |
| 373 | skb_shinfo(skb)->gso_size = 0; | 375 | shinfo->gso_size = 0; |
| 374 | skb_shinfo(skb)->gso_type = 0; | 376 | shinfo->gso_type = 0; |
| 375 | 377 | ||
| 376 | TCP_SKB_CB(skb)->seq = seq; | 378 | TCP_SKB_CB(skb)->seq = seq; |
| 377 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) | 379 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
| @@ -406,7 +408,7 @@ struct tcp_out_options { | |||
| 406 | * Beware: Something in the Internet is very sensitive to the ordering of | 408 | * Beware: Something in the Internet is very sensitive to the ordering of |
| 407 | * TCP options, we learned this through the hard way, so be careful here. | 409 | * TCP options, we learned this through the hard way, so be careful here. |
| 408 | * Luckily we can at least blame others for their non-compliance but from | 410 | * Luckily we can at least blame others for their non-compliance but from |
| 409 | * inter-operatibility perspective it seems that we're somewhat stuck with | 411 | * inter-operability perspective it seems that we're somewhat stuck with |
| 410 | * the ordering which we have been using if we want to keep working with | 412 | * the ordering which we have been using if we want to keep working with |
| 411 | * those broken things (not that it currently hurts anybody as there isn't | 413 | * those broken things (not that it currently hurts anybody as there isn't |
| 412 | * particular reason why the ordering would need to be changed). | 414 | * particular reason why the ordering would need to be changed). |
| @@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
| 679 | * | 681 | * |
| 680 | * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb | 682 | * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb |
| 681 | * needs to be reallocated in a driver. | 683 | * needs to be reallocated in a driver. |
| 682 | * The invariant being skb->truesize substracted from sk->sk_wmem_alloc | 684 | * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc |
| 683 | * | 685 | * |
| 684 | * Since transmit from skb destructor is forbidden, we use a tasklet | 686 | * Since transmit from skb destructor is forbidden, we use a tasklet |
| 685 | * to process all sockets that eventually need to send more skbs. | 687 | * to process all sockets that eventually need to send more skbs. |
| @@ -699,9 +701,9 @@ static void tcp_tsq_handler(struct sock *sk) | |||
| 699 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | 701 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); |
| 700 | } | 702 | } |
| 701 | /* | 703 | /* |
| 702 | * One tasklest per cpu tries to send more skbs. | 704 | * One tasklet per cpu tries to send more skbs. |
| 703 | * We run in tasklet context but need to disable irqs when | 705 | * We run in tasklet context but need to disable irqs when |
| 704 | * transfering tsq->head because tcp_wfree() might | 706 | * transferring tsq->head because tcp_wfree() might |
| 705 | * interrupt us (non NAPI drivers) | 707 | * interrupt us (non NAPI drivers) |
| 706 | */ | 708 | */ |
| 707 | static void tcp_tasklet_func(unsigned long data) | 709 | static void tcp_tasklet_func(unsigned long data) |
| @@ -795,7 +797,7 @@ void __init tcp_tasklet_init(void) | |||
| 795 | 797 | ||
| 796 | /* | 798 | /* |
| 797 | * Write buffer destructor automatically called from kfree_skb. | 799 | * Write buffer destructor automatically called from kfree_skb. |
| 798 | * We cant xmit new skbs from this context, as we might already | 800 | * We can't xmit new skbs from this context, as we might already |
| 799 | * hold qdisc lock. | 801 | * hold qdisc lock. |
| 800 | */ | 802 | */ |
| 801 | void tcp_wfree(struct sk_buff *skb) | 803 | void tcp_wfree(struct sk_buff *skb) |
| @@ -986,6 +988,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
| 986 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | 988 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, |
| 987 | unsigned int mss_now) | 989 | unsigned int mss_now) |
| 988 | { | 990 | { |
| 991 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
| 992 | |||
| 989 | /* Make sure we own this skb before messing gso_size/gso_segs */ | 993 | /* Make sure we own this skb before messing gso_size/gso_segs */ |
| 990 | WARN_ON_ONCE(skb_cloned(skb)); | 994 | WARN_ON_ONCE(skb_cloned(skb)); |
| 991 | 995 | ||
| @@ -993,13 +997,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
| 993 | /* Avoid the costly divide in the normal | 997 | /* Avoid the costly divide in the normal |
| 994 | * non-TSO case. | 998 | * non-TSO case. |
| 995 | */ | 999 | */ |
| 996 | skb_shinfo(skb)->gso_segs = 1; | 1000 | shinfo->gso_segs = 1; |
| 997 | skb_shinfo(skb)->gso_size = 0; | 1001 | shinfo->gso_size = 0; |
| 998 | skb_shinfo(skb)->gso_type = 0; | 1002 | shinfo->gso_type = 0; |
| 999 | } else { | 1003 | } else { |
| 1000 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); | 1004 | shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); |
| 1001 | skb_shinfo(skb)->gso_size = mss_now; | 1005 | shinfo->gso_size = mss_now; |
| 1002 | skb_shinfo(skb)->gso_type = sk->sk_gso_type; | 1006 | shinfo->gso_type = sk->sk_gso_type; |
| 1003 | } | 1007 | } |
| 1004 | } | 1008 | } |
| 1005 | 1009 | ||
| @@ -1146,6 +1150,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
| 1146 | */ | 1150 | */ |
| 1147 | static void __pskb_trim_head(struct sk_buff *skb, int len) | 1151 | static void __pskb_trim_head(struct sk_buff *skb, int len) |
| 1148 | { | 1152 | { |
| 1153 | struct skb_shared_info *shinfo; | ||
| 1149 | int i, k, eat; | 1154 | int i, k, eat; |
| 1150 | 1155 | ||
| 1151 | eat = min_t(int, len, skb_headlen(skb)); | 1156 | eat = min_t(int, len, skb_headlen(skb)); |
| @@ -1157,23 +1162,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
| 1157 | } | 1162 | } |
| 1158 | eat = len; | 1163 | eat = len; |
| 1159 | k = 0; | 1164 | k = 0; |
| 1160 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 1165 | shinfo = skb_shinfo(skb); |
| 1161 | int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); | 1166 | for (i = 0; i < shinfo->nr_frags; i++) { |
| 1167 | int size = skb_frag_size(&shinfo->frags[i]); | ||
| 1162 | 1168 | ||
| 1163 | if (size <= eat) { | 1169 | if (size <= eat) { |
| 1164 | skb_frag_unref(skb, i); | 1170 | skb_frag_unref(skb, i); |
| 1165 | eat -= size; | 1171 | eat -= size; |
| 1166 | } else { | 1172 | } else { |
| 1167 | skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; | 1173 | shinfo->frags[k] = shinfo->frags[i]; |
| 1168 | if (eat) { | 1174 | if (eat) { |
| 1169 | skb_shinfo(skb)->frags[k].page_offset += eat; | 1175 | shinfo->frags[k].page_offset += eat; |
| 1170 | skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); | 1176 | skb_frag_size_sub(&shinfo->frags[k], eat); |
| 1171 | eat = 0; | 1177 | eat = 0; |
| 1172 | } | 1178 | } |
| 1173 | k++; | 1179 | k++; |
| 1174 | } | 1180 | } |
| 1175 | } | 1181 | } |
| 1176 | skb_shinfo(skb)->nr_frags = k; | 1182 | shinfo->nr_frags = k; |
| 1177 | 1183 | ||
| 1178 | skb_reset_tail_pointer(skb); | 1184 | skb_reset_tail_pointer(skb); |
| 1179 | skb->data_len -= len; | 1185 | skb->data_len -= len; |
| @@ -1378,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk) | |||
| 1378 | } | 1384 | } |
| 1379 | } | 1385 | } |
| 1380 | 1386 | ||
| 1381 | /* Returns the portion of skb which can be sent right away without | 1387 | /* Minshall's variant of the Nagle send check. */ |
| 1382 | * introducing MSS oddities to segment boundaries. In rare cases where | 1388 | static bool tcp_minshall_check(const struct tcp_sock *tp) |
| 1383 | * mss_now != mss_cache, we will request caller to create a small skb | 1389 | { |
| 1384 | * per input skb which could be mostly avoided here (if desired). | 1390 | return after(tp->snd_sml, tp->snd_una) && |
| 1385 | * | 1391 | !after(tp->snd_sml, tp->snd_nxt); |
| 1386 | * We explicitly want to create a request for splitting write queue tail | 1392 | } |
| 1387 | * to a small skb for Nagle purposes while avoiding unnecessary modulos, | 1393 | |
| 1388 | * thus all the complexity (cwnd_len is always MSS multiple which we | 1394 | /* Update snd_sml if this skb is under mss |
| 1389 | * return whenever allowed by the other factors). Basically we need the | 1395 | * Note that a TSO packet might end with a sub-mss segment |
| 1390 | * modulo only when the receiver window alone is the limiting factor or | 1396 | * The test is really : |
| 1391 | * when we would be allowed to send the split-due-to-Nagle skb fully. | 1397 | * if ((skb->len % mss) != 0) |
| 1398 | * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; | ||
| 1399 | * But we can avoid doing the divide again given we already have | ||
| 1400 | * skb_pcount = skb->len / mss_now | ||
| 1401 | */ | ||
| 1402 | static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, | ||
| 1403 | const struct sk_buff *skb) | ||
| 1404 | { | ||
| 1405 | if (skb->len < tcp_skb_pcount(skb) * mss_now) | ||
| 1406 | tp->snd_sml = TCP_SKB_CB(skb)->end_seq; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | /* Return false, if packet can be sent now without violation Nagle's rules: | ||
| 1410 | * 1. It is full sized. (provided by caller in %partial bool) | ||
| 1411 | * 2. Or it contains FIN. (already checked by caller) | ||
| 1412 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. | ||
| 1413 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. | ||
| 1414 | * With Minshall's modification: all sent small packets are ACKed. | ||
| 1392 | */ | 1415 | */ |
| 1393 | static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, | 1416 | static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, |
| 1394 | unsigned int mss_now, unsigned int max_segs) | 1417 | unsigned int mss_now, int nonagle) |
| 1418 | { | ||
| 1419 | return partial && | ||
| 1420 | ((nonagle & TCP_NAGLE_CORK) || | ||
| 1421 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | ||
| 1422 | } | ||
| 1423 | /* Returns the portion of skb which can be sent right away */ | ||
| 1424 | static unsigned int tcp_mss_split_point(const struct sock *sk, | ||
| 1425 | const struct sk_buff *skb, | ||
| 1426 | unsigned int mss_now, | ||
| 1427 | unsigned int max_segs, | ||
| 1428 | int nonagle) | ||
| 1395 | { | 1429 | { |
| 1396 | const struct tcp_sock *tp = tcp_sk(sk); | 1430 | const struct tcp_sock *tp = tcp_sk(sk); |
| 1397 | u32 needed, window, max_len; | 1431 | u32 partial, needed, window, max_len; |
| 1398 | 1432 | ||
| 1399 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 1433 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
| 1400 | max_len = mss_now * max_segs; | 1434 | max_len = mss_now * max_segs; |
| @@ -1407,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b | |||
| 1407 | if (max_len <= needed) | 1441 | if (max_len <= needed) |
| 1408 | return max_len; | 1442 | return max_len; |
| 1409 | 1443 | ||
| 1410 | return needed - needed % mss_now; | 1444 | partial = needed % mss_now; |
| 1445 | /* If last segment is not a full MSS, check if Nagle rules allow us | ||
| 1446 | * to include this last segment in this skb. | ||
| 1447 | * Otherwise, we'll split the skb at last MSS boundary | ||
| 1448 | */ | ||
| 1449 | if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) | ||
| 1450 | return needed - partial; | ||
| 1451 | |||
| 1452 | return needed; | ||
| 1411 | } | 1453 | } |
| 1412 | 1454 | ||
| 1413 | /* Can at least one segment of SKB be sent right now, according to the | 1455 | /* Can at least one segment of SKB be sent right now, according to the |
| @@ -1447,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
| 1447 | return tso_segs; | 1489 | return tso_segs; |
| 1448 | } | 1490 | } |
| 1449 | 1491 | ||
| 1450 | /* Minshall's variant of the Nagle send check. */ | ||
| 1451 | static inline bool tcp_minshall_check(const struct tcp_sock *tp) | ||
| 1452 | { | ||
| 1453 | return after(tp->snd_sml, tp->snd_una) && | ||
| 1454 | !after(tp->snd_sml, tp->snd_nxt); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | /* Return false, if packet can be sent now without violation Nagle's rules: | ||
| 1458 | * 1. It is full sized. | ||
| 1459 | * 2. Or it contains FIN. (already checked by caller) | ||
| 1460 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. | ||
| 1461 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. | ||
| 1462 | * With Minshall's modification: all sent small packets are ACKed. | ||
| 1463 | */ | ||
| 1464 | static inline bool tcp_nagle_check(const struct tcp_sock *tp, | ||
| 1465 | const struct sk_buff *skb, | ||
| 1466 | unsigned int mss_now, int nonagle) | ||
| 1467 | { | ||
| 1468 | return skb->len < mss_now && | ||
| 1469 | ((nonagle & TCP_NAGLE_CORK) || | ||
| 1470 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | ||
| 1471 | } | ||
| 1472 | 1492 | ||
| 1473 | /* Return true if the Nagle test allows this packet to be | 1493 | /* Return true if the Nagle test allows this packet to be |
| 1474 | * sent now. | 1494 | * sent now. |
| @@ -1489,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf | |||
| 1489 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) | 1509 | if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
| 1490 | return true; | 1510 | return true; |
| 1491 | 1511 | ||
| 1492 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1512 | if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) |
| 1493 | return true; | 1513 | return true; |
| 1494 | 1514 | ||
| 1495 | return false; | 1515 | return false; |
| @@ -1892,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1892 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1912 | limit = tcp_mss_split_point(sk, skb, mss_now, |
| 1893 | min_t(unsigned int, | 1913 | min_t(unsigned int, |
| 1894 | cwnd_quota, | 1914 | cwnd_quota, |
| 1895 | sk->sk_gso_max_segs)); | 1915 | sk->sk_gso_max_segs), |
| 1916 | nonagle); | ||
| 1896 | 1917 | ||
| 1897 | if (skb->len > limit && | 1918 | if (skb->len > limit && |
| 1898 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 1919 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
| @@ -2756,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2756 | EXPORT_SYMBOL(tcp_make_synack); | 2777 | EXPORT_SYMBOL(tcp_make_synack); |
| 2757 | 2778 | ||
| 2758 | /* Do all connect socket setups that can be done AF independent. */ | 2779 | /* Do all connect socket setups that can be done AF independent. */ |
| 2759 | void tcp_connect_init(struct sock *sk) | 2780 | static void tcp_connect_init(struct sock *sk) |
| 2760 | { | 2781 | { |
| 2761 | const struct dst_entry *dst = __sk_dst_get(sk); | 2782 | const struct dst_entry *dst = __sk_dst_get(sk); |
| 2762 | struct tcp_sock *tp = tcp_sk(sk); | 2783 | struct tcp_sock *tp = tcp_sk(sk); |
