diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 368 |
1 files changed, 171 insertions, 197 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff2..00a41499d52c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -73,7 +73,6 @@ | |||
73 | #include <net/inet_common.h> | 73 | #include <net/inet_common.h> |
74 | #include <linux/ipsec.h> | 74 | #include <linux/ipsec.h> |
75 | #include <asm/unaligned.h> | 75 | #include <asm/unaligned.h> |
76 | #include <net/netdma.h> | ||
77 | #include <linux/errqueue.h> | 76 | #include <linux/errqueue.h> |
78 | 77 | ||
79 | int sysctl_tcp_timestamps __read_mostly = 1; | 78 | int sysctl_tcp_timestamps __read_mostly = 1; |
@@ -201,28 +200,25 @@ static inline bool tcp_in_quickack_mode(const struct sock *sk) | |||
201 | return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; | 200 | return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; |
202 | } | 201 | } |
203 | 202 | ||
204 | static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) | 203 | static void tcp_ecn_queue_cwr(struct tcp_sock *tp) |
205 | { | 204 | { |
206 | if (tp->ecn_flags & TCP_ECN_OK) | 205 | if (tp->ecn_flags & TCP_ECN_OK) |
207 | tp->ecn_flags |= TCP_ECN_QUEUE_CWR; | 206 | tp->ecn_flags |= TCP_ECN_QUEUE_CWR; |
208 | } | 207 | } |
209 | 208 | ||
210 | static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) | 209 | static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) |
211 | { | 210 | { |
212 | if (tcp_hdr(skb)->cwr) | 211 | if (tcp_hdr(skb)->cwr) |
213 | tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; | 212 | tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; |
214 | } | 213 | } |
215 | 214 | ||
216 | static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) | 215 | static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) |
217 | { | 216 | { |
218 | tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; | 217 | tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; |
219 | } | 218 | } |
220 | 219 | ||
221 | static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) | 220 | static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) |
222 | { | 221 | { |
223 | if (!(tp->ecn_flags & TCP_ECN_OK)) | ||
224 | return; | ||
225 | |||
226 | switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { | 222 | switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { |
227 | case INET_ECN_NOT_ECT: | 223 | case INET_ECN_NOT_ECT: |
228 | /* Funny extension: if ECT is not set on a segment, | 224 | /* Funny extension: if ECT is not set on a segment, |
@@ -233,30 +229,43 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s | |||
233 | tcp_enter_quickack_mode((struct sock *)tp); | 229 | tcp_enter_quickack_mode((struct sock *)tp); |
234 | break; | 230 | break; |
235 | case INET_ECN_CE: | 231 | case INET_ECN_CE: |
232 | if (tcp_ca_needs_ecn((struct sock *)tp)) | ||
233 | tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE); | ||
234 | |||
236 | if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { | 235 | if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { |
237 | /* Better not delay acks, sender can have a very low cwnd */ | 236 | /* Better not delay acks, sender can have a very low cwnd */ |
238 | tcp_enter_quickack_mode((struct sock *)tp); | 237 | tcp_enter_quickack_mode((struct sock *)tp); |
239 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; | 238 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; |
240 | } | 239 | } |
241 | /* fallinto */ | 240 | tp->ecn_flags |= TCP_ECN_SEEN; |
241 | break; | ||
242 | default: | 242 | default: |
243 | if (tcp_ca_needs_ecn((struct sock *)tp)) | ||
244 | tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE); | ||
243 | tp->ecn_flags |= TCP_ECN_SEEN; | 245 | tp->ecn_flags |= TCP_ECN_SEEN; |
246 | break; | ||
244 | } | 247 | } |
245 | } | 248 | } |
246 | 249 | ||
247 | static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) | 250 | static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) |
251 | { | ||
252 | if (tp->ecn_flags & TCP_ECN_OK) | ||
253 | __tcp_ecn_check_ce(tp, skb); | ||
254 | } | ||
255 | |||
256 | static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) | ||
248 | { | 257 | { |
249 | if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) | 258 | if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) |
250 | tp->ecn_flags &= ~TCP_ECN_OK; | 259 | tp->ecn_flags &= ~TCP_ECN_OK; |
251 | } | 260 | } |
252 | 261 | ||
253 | static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) | 262 | static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) |
254 | { | 263 | { |
255 | if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) | 264 | if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) |
256 | tp->ecn_flags &= ~TCP_ECN_OK; | 265 | tp->ecn_flags &= ~TCP_ECN_OK; |
257 | } | 266 | } |
258 | 267 | ||
259 | static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) | 268 | static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) |
260 | { | 269 | { |
261 | if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) | 270 | if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) |
262 | return true; | 271 | return true; |
@@ -653,7 +662,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) | |||
653 | } | 662 | } |
654 | icsk->icsk_ack.lrcvtime = now; | 663 | icsk->icsk_ack.lrcvtime = now; |
655 | 664 | ||
656 | TCP_ECN_check_ce(tp, skb); | 665 | tcp_ecn_check_ce(tp, skb); |
657 | 666 | ||
658 | if (skb->len >= 128) | 667 | if (skb->len >= 128) |
659 | tcp_grow_window(sk, skb); | 668 | tcp_grow_window(sk, skb); |
@@ -1295,9 +1304,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1295 | TCP_SKB_CB(prev)->end_seq += shifted; | 1304 | TCP_SKB_CB(prev)->end_seq += shifted; |
1296 | TCP_SKB_CB(skb)->seq += shifted; | 1305 | TCP_SKB_CB(skb)->seq += shifted; |
1297 | 1306 | ||
1298 | skb_shinfo(prev)->gso_segs += pcount; | 1307 | tcp_skb_pcount_add(prev, pcount); |
1299 | BUG_ON(skb_shinfo(skb)->gso_segs < pcount); | 1308 | BUG_ON(tcp_skb_pcount(skb) < pcount); |
1300 | skb_shinfo(skb)->gso_segs -= pcount; | 1309 | tcp_skb_pcount_add(skb, -pcount); |
1301 | 1310 | ||
1302 | /* When we're adding to gso_segs == 1, gso_size will be zero, | 1311 | /* When we're adding to gso_segs == 1, gso_size will be zero, |
1303 | * in theory this shouldn't be necessary but as long as DSACK | 1312 | * in theory this shouldn't be necessary but as long as DSACK |
@@ -1310,7 +1319,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1310 | } | 1319 | } |
1311 | 1320 | ||
1312 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ | 1321 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ |
1313 | if (skb_shinfo(skb)->gso_segs <= 1) { | 1322 | if (tcp_skb_pcount(skb) <= 1) { |
1314 | skb_shinfo(skb)->gso_size = 0; | 1323 | skb_shinfo(skb)->gso_size = 0; |
1315 | skb_shinfo(skb)->gso_type = 0; | 1324 | skb_shinfo(skb)->gso_type = 0; |
1316 | } | 1325 | } |
@@ -1888,21 +1897,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) | |||
1888 | tp->sacked_out = 0; | 1897 | tp->sacked_out = 0; |
1889 | } | 1898 | } |
1890 | 1899 | ||
1891 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) | 1900 | void tcp_clear_retrans(struct tcp_sock *tp) |
1892 | { | 1901 | { |
1893 | tp->retrans_out = 0; | 1902 | tp->retrans_out = 0; |
1894 | tp->lost_out = 0; | 1903 | tp->lost_out = 0; |
1895 | |||
1896 | tp->undo_marker = 0; | 1904 | tp->undo_marker = 0; |
1897 | tp->undo_retrans = -1; | 1905 | tp->undo_retrans = -1; |
1906 | tp->fackets_out = 0; | ||
1907 | tp->sacked_out = 0; | ||
1898 | } | 1908 | } |
1899 | 1909 | ||
1900 | void tcp_clear_retrans(struct tcp_sock *tp) | 1910 | static inline void tcp_init_undo(struct tcp_sock *tp) |
1901 | { | 1911 | { |
1902 | tcp_clear_retrans_partial(tp); | 1912 | tp->undo_marker = tp->snd_una; |
1903 | 1913 | /* Retransmission still in flight may cause DSACKs later. */ | |
1904 | tp->fackets_out = 0; | 1914 | tp->undo_retrans = tp->retrans_out ? : -1; |
1905 | tp->sacked_out = 0; | ||
1906 | } | 1915 | } |
1907 | 1916 | ||
1908 | /* Enter Loss state. If we detect SACK reneging, forget all SACK information | 1917 | /* Enter Loss state. If we detect SACK reneging, forget all SACK information |
@@ -1925,18 +1934,18 @@ void tcp_enter_loss(struct sock *sk) | |||
1925 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 1934 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1926 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | 1935 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1927 | tcp_ca_event(sk, CA_EVENT_LOSS); | 1936 | tcp_ca_event(sk, CA_EVENT_LOSS); |
1937 | tcp_init_undo(tp); | ||
1928 | } | 1938 | } |
1929 | tp->snd_cwnd = 1; | 1939 | tp->snd_cwnd = 1; |
1930 | tp->snd_cwnd_cnt = 0; | 1940 | tp->snd_cwnd_cnt = 0; |
1931 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1941 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1932 | 1942 | ||
1933 | tcp_clear_retrans_partial(tp); | 1943 | tp->retrans_out = 0; |
1944 | tp->lost_out = 0; | ||
1934 | 1945 | ||
1935 | if (tcp_is_reno(tp)) | 1946 | if (tcp_is_reno(tp)) |
1936 | tcp_reset_reno_sack(tp); | 1947 | tcp_reset_reno_sack(tp); |
1937 | 1948 | ||
1938 | tp->undo_marker = tp->snd_una; | ||
1939 | |||
1940 | skb = tcp_write_queue_head(sk); | 1949 | skb = tcp_write_queue_head(sk); |
1941 | is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); | 1950 | is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); |
1942 | if (is_reneg) { | 1951 | if (is_reneg) { |
@@ -1950,9 +1959,6 @@ void tcp_enter_loss(struct sock *sk) | |||
1950 | if (skb == tcp_send_head(sk)) | 1959 | if (skb == tcp_send_head(sk)) |
1951 | break; | 1960 | break; |
1952 | 1961 | ||
1953 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) | ||
1954 | tp->undo_marker = 0; | ||
1955 | |||
1956 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; | 1962 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; |
1957 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { | 1963 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { |
1958 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; | 1964 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; |
@@ -1972,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk) | |||
1972 | sysctl_tcp_reordering); | 1978 | sysctl_tcp_reordering); |
1973 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1979 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1974 | tp->high_seq = tp->snd_nxt; | 1980 | tp->high_seq = tp->snd_nxt; |
1975 | TCP_ECN_queue_cwr(tp); | 1981 | tcp_ecn_queue_cwr(tp); |
1976 | 1982 | ||
1977 | /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous | 1983 | /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous |
1978 | * loss recovery is underway except recurring timeout(s) on | 1984 | * loss recovery is underway except recurring timeout(s) on |
@@ -2364,7 +2370,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) | |||
2364 | 2370 | ||
2365 | if (tp->prior_ssthresh > tp->snd_ssthresh) { | 2371 | if (tp->prior_ssthresh > tp->snd_ssthresh) { |
2366 | tp->snd_ssthresh = tp->prior_ssthresh; | 2372 | tp->snd_ssthresh = tp->prior_ssthresh; |
2367 | TCP_ECN_withdraw_cwr(tp); | 2373 | tcp_ecn_withdraw_cwr(tp); |
2368 | } | 2374 | } |
2369 | } else { | 2375 | } else { |
2370 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); | 2376 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); |
@@ -2494,7 +2500,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) | |||
2494 | tp->prr_delivered = 0; | 2500 | tp->prr_delivered = 0; |
2495 | tp->prr_out = 0; | 2501 | tp->prr_out = 0; |
2496 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 2502 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); |
2497 | TCP_ECN_queue_cwr(tp); | 2503 | tcp_ecn_queue_cwr(tp); |
2498 | } | 2504 | } |
2499 | 2505 | ||
2500 | static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, | 2506 | static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, |
@@ -2671,8 +2677,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2671 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2677 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2672 | 2678 | ||
2673 | tp->prior_ssthresh = 0; | 2679 | tp->prior_ssthresh = 0; |
2674 | tp->undo_marker = tp->snd_una; | 2680 | tcp_init_undo(tp); |
2675 | tp->undo_retrans = tp->retrans_out ? : -1; | ||
2676 | 2681 | ||
2677 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2682 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2678 | if (!ece_ack) | 2683 | if (!ece_ack) |
@@ -2971,7 +2976,8 @@ void tcp_rearm_rto(struct sock *sk) | |||
2971 | if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || | 2976 | if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || |
2972 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { | 2977 | icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { |
2973 | struct sk_buff *skb = tcp_write_queue_head(sk); | 2978 | struct sk_buff *skb = tcp_write_queue_head(sk); |
2974 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; | 2979 | const u32 rto_time_stamp = |
2980 | tcp_skb_timestamp(skb) + rto; | ||
2975 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); | 2981 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); |
2976 | /* delta may not be positive if the socket is locked | 2982 | /* delta may not be positive if the socket is locked |
2977 | * when the retrans timer fires and is rescheduled. | 2983 | * when the retrans timer fires and is rescheduled. |
@@ -3211,9 +3217,10 @@ static void tcp_ack_probe(struct sock *sk) | |||
3211 | * This function is not for random using! | 3217 | * This function is not for random using! |
3212 | */ | 3218 | */ |
3213 | } else { | 3219 | } else { |
3220 | unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); | ||
3221 | |||
3214 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | 3222 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
3215 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), | 3223 | when, TCP_RTO_MAX); |
3216 | TCP_RTO_MAX); | ||
3217 | } | 3224 | } |
3218 | } | 3225 | } |
3219 | 3226 | ||
@@ -3364,6 +3371,14 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | |||
3364 | } | 3371 | } |
3365 | } | 3372 | } |
3366 | 3373 | ||
3374 | static inline void tcp_in_ack_event(struct sock *sk, u32 flags) | ||
3375 | { | ||
3376 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
3377 | |||
3378 | if (icsk->icsk_ca_ops->in_ack_event) | ||
3379 | icsk->icsk_ca_ops->in_ack_event(sk, flags); | ||
3380 | } | ||
3381 | |||
3367 | /* This routine deals with incoming acks, but not outgoing ones. */ | 3382 | /* This routine deals with incoming acks, but not outgoing ones. */ |
3368 | static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | 3383 | static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) |
3369 | { | 3384 | { |
@@ -3423,10 +3438,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3423 | tp->snd_una = ack; | 3438 | tp->snd_una = ack; |
3424 | flag |= FLAG_WIN_UPDATE; | 3439 | flag |= FLAG_WIN_UPDATE; |
3425 | 3440 | ||
3426 | tcp_ca_event(sk, CA_EVENT_FAST_ACK); | 3441 | tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); |
3427 | 3442 | ||
3428 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); | 3443 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); |
3429 | } else { | 3444 | } else { |
3445 | u32 ack_ev_flags = CA_ACK_SLOWPATH; | ||
3446 | |||
3430 | if (ack_seq != TCP_SKB_CB(skb)->end_seq) | 3447 | if (ack_seq != TCP_SKB_CB(skb)->end_seq) |
3431 | flag |= FLAG_DATA; | 3448 | flag |= FLAG_DATA; |
3432 | else | 3449 | else |
@@ -3438,10 +3455,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3438 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, | 3455 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3439 | &sack_rtt_us); | 3456 | &sack_rtt_us); |
3440 | 3457 | ||
3441 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) | 3458 | if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { |
3442 | flag |= FLAG_ECE; | 3459 | flag |= FLAG_ECE; |
3460 | ack_ev_flags |= CA_ACK_ECE; | ||
3461 | } | ||
3443 | 3462 | ||
3444 | tcp_ca_event(sk, CA_EVENT_SLOW_ACK); | 3463 | if (flag & FLAG_WIN_UPDATE) |
3464 | ack_ev_flags |= CA_ACK_WIN_UPDATE; | ||
3465 | |||
3466 | tcp_in_ack_event(sk, ack_ev_flags); | ||
3445 | } | 3467 | } |
3446 | 3468 | ||
3447 | /* We passed data and got it acked, remove any soft error | 3469 | /* We passed data and got it acked, remove any soft error |
@@ -4063,6 +4085,44 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4063 | tp->rx_opt.num_sacks = num_sacks; | 4085 | tp->rx_opt.num_sacks = num_sacks; |
4064 | } | 4086 | } |
4065 | 4087 | ||
4088 | /** | ||
4089 | * tcp_try_coalesce - try to merge skb to prior one | ||
4090 | * @sk: socket | ||
4091 | * @to: prior buffer | ||
4092 | * @from: buffer to add in queue | ||
4093 | * @fragstolen: pointer to boolean | ||
4094 | * | ||
4095 | * Before queueing skb @from after @to, try to merge them | ||
4096 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4097 | * Packets in ofo or receive queues can stay a long time. | ||
4098 | * Better try to coalesce them right now to avoid future collapses. | ||
4099 | * Returns true if caller should free @from instead of queueing it | ||
4100 | */ | ||
4101 | static bool tcp_try_coalesce(struct sock *sk, | ||
4102 | struct sk_buff *to, | ||
4103 | struct sk_buff *from, | ||
4104 | bool *fragstolen) | ||
4105 | { | ||
4106 | int delta; | ||
4107 | |||
4108 | *fragstolen = false; | ||
4109 | |||
4110 | /* Its possible this segment overlaps with prior segment in queue */ | ||
4111 | if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) | ||
4112 | return false; | ||
4113 | |||
4114 | if (!skb_try_coalesce(to, from, fragstolen, &delta)) | ||
4115 | return false; | ||
4116 | |||
4117 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4118 | sk_mem_charge(sk, delta); | ||
4119 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4120 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4121 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4122 | TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; | ||
4123 | return true; | ||
4124 | } | ||
4125 | |||
4066 | /* This one checks to see if we can put data from the | 4126 | /* This one checks to see if we can put data from the |
4067 | * out_of_order queue into the receive_queue. | 4127 | * out_of_order queue into the receive_queue. |
4068 | */ | 4128 | */ |
@@ -4070,7 +4130,8 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4070 | { | 4130 | { |
4071 | struct tcp_sock *tp = tcp_sk(sk); | 4131 | struct tcp_sock *tp = tcp_sk(sk); |
4072 | __u32 dsack_high = tp->rcv_nxt; | 4132 | __u32 dsack_high = tp->rcv_nxt; |
4073 | struct sk_buff *skb; | 4133 | struct sk_buff *skb, *tail; |
4134 | bool fragstolen, eaten; | ||
4074 | 4135 | ||
4075 | while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { | 4136 | while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { |
4076 | if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) | 4137 | if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) |
@@ -4083,9 +4144,9 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4083 | tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); | 4144 | tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); |
4084 | } | 4145 | } |
4085 | 4146 | ||
4147 | __skb_unlink(skb, &tp->out_of_order_queue); | ||
4086 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 4148 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
4087 | SOCK_DEBUG(sk, "ofo packet was already received\n"); | 4149 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
4088 | __skb_unlink(skb, &tp->out_of_order_queue); | ||
4089 | __kfree_skb(skb); | 4150 | __kfree_skb(skb); |
4090 | continue; | 4151 | continue; |
4091 | } | 4152 | } |
@@ -4093,11 +4154,15 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4093 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, | 4154 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, |
4094 | TCP_SKB_CB(skb)->end_seq); | 4155 | TCP_SKB_CB(skb)->end_seq); |
4095 | 4156 | ||
4096 | __skb_unlink(skb, &tp->out_of_order_queue); | 4157 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4097 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4158 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4098 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4159 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4099 | if (tcp_hdr(skb)->fin) | 4160 | if (!eaten) |
4161 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4162 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | ||
4100 | tcp_fin(sk); | 4163 | tcp_fin(sk); |
4164 | if (eaten) | ||
4165 | kfree_skb_partial(skb, fragstolen); | ||
4101 | } | 4166 | } |
4102 | } | 4167 | } |
4103 | 4168 | ||
@@ -4124,53 +4189,13 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, | |||
4124 | return 0; | 4189 | return 0; |
4125 | } | 4190 | } |
4126 | 4191 | ||
4127 | /** | ||
4128 | * tcp_try_coalesce - try to merge skb to prior one | ||
4129 | * @sk: socket | ||
4130 | * @to: prior buffer | ||
4131 | * @from: buffer to add in queue | ||
4132 | * @fragstolen: pointer to boolean | ||
4133 | * | ||
4134 | * Before queueing skb @from after @to, try to merge them | ||
4135 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4136 | * Packets in ofo or receive queues can stay a long time. | ||
4137 | * Better try to coalesce them right now to avoid future collapses. | ||
4138 | * Returns true if caller should free @from instead of queueing it | ||
4139 | */ | ||
4140 | static bool tcp_try_coalesce(struct sock *sk, | ||
4141 | struct sk_buff *to, | ||
4142 | struct sk_buff *from, | ||
4143 | bool *fragstolen) | ||
4144 | { | ||
4145 | int delta; | ||
4146 | |||
4147 | *fragstolen = false; | ||
4148 | |||
4149 | if (tcp_hdr(from)->fin) | ||
4150 | return false; | ||
4151 | |||
4152 | /* Its possible this segment overlaps with prior segment in queue */ | ||
4153 | if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) | ||
4154 | return false; | ||
4155 | |||
4156 | if (!skb_try_coalesce(to, from, fragstolen, &delta)) | ||
4157 | return false; | ||
4158 | |||
4159 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4160 | sk_mem_charge(sk, delta); | ||
4161 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4162 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4163 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4164 | return true; | ||
4165 | } | ||
4166 | |||
4167 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | 4192 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) |
4168 | { | 4193 | { |
4169 | struct tcp_sock *tp = tcp_sk(sk); | 4194 | struct tcp_sock *tp = tcp_sk(sk); |
4170 | struct sk_buff *skb1; | 4195 | struct sk_buff *skb1; |
4171 | u32 seq, end_seq; | 4196 | u32 seq, end_seq; |
4172 | 4197 | ||
4173 | TCP_ECN_check_ce(tp, skb); | 4198 | tcp_ecn_check_ce(tp, skb); |
4174 | 4199 | ||
4175 | if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { | 4200 | if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { |
4176 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); | 4201 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); |
@@ -4309,24 +4334,19 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4309 | 4334 | ||
4310 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | 4335 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) |
4311 | { | 4336 | { |
4312 | struct sk_buff *skb = NULL; | 4337 | struct sk_buff *skb; |
4313 | struct tcphdr *th; | ||
4314 | bool fragstolen; | 4338 | bool fragstolen; |
4315 | 4339 | ||
4316 | if (size == 0) | 4340 | if (size == 0) |
4317 | return 0; | 4341 | return 0; |
4318 | 4342 | ||
4319 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); | 4343 | skb = alloc_skb(size, sk->sk_allocation); |
4320 | if (!skb) | 4344 | if (!skb) |
4321 | goto err; | 4345 | goto err; |
4322 | 4346 | ||
4323 | if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) | 4347 | if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) |
4324 | goto err_free; | 4348 | goto err_free; |
4325 | 4349 | ||
4326 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); | ||
4327 | skb_reset_transport_header(skb); | ||
4328 | memset(th, 0, sizeof(*th)); | ||
4329 | |||
4330 | if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) | 4350 | if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) |
4331 | goto err_free; | 4351 | goto err_free; |
4332 | 4352 | ||
@@ -4334,7 +4354,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | |||
4334 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; | 4354 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; |
4335 | TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; | 4355 | TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; |
4336 | 4356 | ||
4337 | if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) { | 4357 | if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) { |
4338 | WARN_ON_ONCE(fragstolen); /* should not happen */ | 4358 | WARN_ON_ONCE(fragstolen); /* should not happen */ |
4339 | __kfree_skb(skb); | 4359 | __kfree_skb(skb); |
4340 | } | 4360 | } |
@@ -4348,7 +4368,6 @@ err: | |||
4348 | 4368 | ||
4349 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 4369 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
4350 | { | 4370 | { |
4351 | const struct tcphdr *th = tcp_hdr(skb); | ||
4352 | struct tcp_sock *tp = tcp_sk(sk); | 4371 | struct tcp_sock *tp = tcp_sk(sk); |
4353 | int eaten = -1; | 4372 | int eaten = -1; |
4354 | bool fragstolen = false; | 4373 | bool fragstolen = false; |
@@ -4357,9 +4376,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4357 | goto drop; | 4376 | goto drop; |
4358 | 4377 | ||
4359 | skb_dst_drop(skb); | 4378 | skb_dst_drop(skb); |
4360 | __skb_pull(skb, th->doff * 4); | 4379 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); |
4361 | 4380 | ||
4362 | TCP_ECN_accept_cwr(tp, skb); | 4381 | tcp_ecn_accept_cwr(tp, skb); |
4363 | 4382 | ||
4364 | tp->rx_opt.dsack = 0; | 4383 | tp->rx_opt.dsack = 0; |
4365 | 4384 | ||
@@ -4401,7 +4420,7 @@ queue_and_out: | |||
4401 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4420 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4402 | if (skb->len) | 4421 | if (skb->len) |
4403 | tcp_event_data_recv(sk, skb); | 4422 | tcp_event_data_recv(sk, skb); |
4404 | if (th->fin) | 4423 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
4405 | tcp_fin(sk); | 4424 | tcp_fin(sk); |
4406 | 4425 | ||
4407 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | 4426 | if (!skb_queue_empty(&tp->out_of_order_queue)) { |
@@ -4516,7 +4535,7 @@ restart: | |||
4516 | * - bloated or contains data before "start" or | 4535 | * - bloated or contains data before "start" or |
4517 | * overlaps to the next one. | 4536 | * overlaps to the next one. |
4518 | */ | 4537 | */ |
4519 | if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && | 4538 | if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && |
4520 | (tcp_win_from_space(skb->truesize) > skb->len || | 4539 | (tcp_win_from_space(skb->truesize) > skb->len || |
4521 | before(TCP_SKB_CB(skb)->seq, start))) { | 4540 | before(TCP_SKB_CB(skb)->seq, start))) { |
4522 | end_of_skbs = false; | 4541 | end_of_skbs = false; |
@@ -4535,30 +4554,18 @@ restart: | |||
4535 | /* Decided to skip this, advance start seq. */ | 4554 | /* Decided to skip this, advance start seq. */ |
4536 | start = TCP_SKB_CB(skb)->end_seq; | 4555 | start = TCP_SKB_CB(skb)->end_seq; |
4537 | } | 4556 | } |
4538 | if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) | 4557 | if (end_of_skbs || |
4558 | (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) | ||
4539 | return; | 4559 | return; |
4540 | 4560 | ||
4541 | while (before(start, end)) { | 4561 | while (before(start, end)) { |
4562 | int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); | ||
4542 | struct sk_buff *nskb; | 4563 | struct sk_buff *nskb; |
4543 | unsigned int header = skb_headroom(skb); | ||
4544 | int copy = SKB_MAX_ORDER(header, 0); | ||
4545 | 4564 | ||
4546 | /* Too big header? This can happen with IPv6. */ | 4565 | nskb = alloc_skb(copy, GFP_ATOMIC); |
4547 | if (copy < 0) | ||
4548 | return; | ||
4549 | if (end - start < copy) | ||
4550 | copy = end - start; | ||
4551 | nskb = alloc_skb(copy + header, GFP_ATOMIC); | ||
4552 | if (!nskb) | 4566 | if (!nskb) |
4553 | return; | 4567 | return; |
4554 | 4568 | ||
4555 | skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head); | ||
4556 | skb_set_network_header(nskb, (skb_network_header(skb) - | ||
4557 | skb->head)); | ||
4558 | skb_set_transport_header(nskb, (skb_transport_header(skb) - | ||
4559 | skb->head)); | ||
4560 | skb_reserve(nskb, header); | ||
4561 | memcpy(nskb->head, skb->head, header); | ||
4562 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | 4569 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
4563 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; | 4570 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; |
4564 | __skb_queue_before(list, skb, nskb); | 4571 | __skb_queue_before(list, skb, nskb); |
@@ -4582,8 +4589,7 @@ restart: | |||
4582 | skb = tcp_collapse_one(sk, skb, list); | 4589 | skb = tcp_collapse_one(sk, skb, list); |
4583 | if (!skb || | 4590 | if (!skb || |
4584 | skb == tail || | 4591 | skb == tail || |
4585 | tcp_hdr(skb)->syn || | 4592 | (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) |
4586 | tcp_hdr(skb)->fin) | ||
4587 | return; | 4593 | return; |
4588 | } | 4594 | } |
4589 | } | 4595 | } |
@@ -4951,53 +4957,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk, | |||
4951 | __tcp_checksum_complete_user(sk, skb); | 4957 | __tcp_checksum_complete_user(sk, skb); |
4952 | } | 4958 | } |
4953 | 4959 | ||
4954 | #ifdef CONFIG_NET_DMA | ||
4955 | static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, | ||
4956 | int hlen) | ||
4957 | { | ||
4958 | struct tcp_sock *tp = tcp_sk(sk); | ||
4959 | int chunk = skb->len - hlen; | ||
4960 | int dma_cookie; | ||
4961 | bool copied_early = false; | ||
4962 | |||
4963 | if (tp->ucopy.wakeup) | ||
4964 | return false; | ||
4965 | |||
4966 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | ||
4967 | tp->ucopy.dma_chan = net_dma_find_channel(); | ||
4968 | |||
4969 | if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { | ||
4970 | |||
4971 | dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, | ||
4972 | skb, hlen, | ||
4973 | tp->ucopy.iov, chunk, | ||
4974 | tp->ucopy.pinned_list); | ||
4975 | |||
4976 | if (dma_cookie < 0) | ||
4977 | goto out; | ||
4978 | |||
4979 | tp->ucopy.dma_cookie = dma_cookie; | ||
4980 | copied_early = true; | ||
4981 | |||
4982 | tp->ucopy.len -= chunk; | ||
4983 | tp->copied_seq += chunk; | ||
4984 | tcp_rcv_space_adjust(sk); | ||
4985 | |||
4986 | if ((tp->ucopy.len == 0) || | ||
4987 | (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) || | ||
4988 | (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { | ||
4989 | tp->ucopy.wakeup = 1; | ||
4990 | sk->sk_data_ready(sk); | ||
4991 | } | ||
4992 | } else if (chunk > 0) { | ||
4993 | tp->ucopy.wakeup = 1; | ||
4994 | sk->sk_data_ready(sk); | ||
4995 | } | ||
4996 | out: | ||
4997 | return copied_early; | ||
4998 | } | ||
4999 | #endif /* CONFIG_NET_DMA */ | ||
5000 | |||
5001 | /* Does PAWS and seqno based validation of an incoming segment, flags will | 4960 | /* Does PAWS and seqno based validation of an incoming segment, flags will |
5002 | * play significant role here. | 4961 | * play significant role here. |
5003 | */ | 4962 | */ |
@@ -5177,27 +5136,15 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5177 | } | 5136 | } |
5178 | } else { | 5137 | } else { |
5179 | int eaten = 0; | 5138 | int eaten = 0; |
5180 | int copied_early = 0; | ||
5181 | bool fragstolen = false; | 5139 | bool fragstolen = false; |
5182 | 5140 | ||
5183 | if (tp->copied_seq == tp->rcv_nxt && | 5141 | if (tp->ucopy.task == current && |
5184 | len - tcp_header_len <= tp->ucopy.len) { | 5142 | tp->copied_seq == tp->rcv_nxt && |
5185 | #ifdef CONFIG_NET_DMA | 5143 | len - tcp_header_len <= tp->ucopy.len && |
5186 | if (tp->ucopy.task == current && | 5144 | sock_owned_by_user(sk)) { |
5187 | sock_owned_by_user(sk) && | 5145 | __set_current_state(TASK_RUNNING); |
5188 | tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { | ||
5189 | copied_early = 1; | ||
5190 | eaten = 1; | ||
5191 | } | ||
5192 | #endif | ||
5193 | if (tp->ucopy.task == current && | ||
5194 | sock_owned_by_user(sk) && !copied_early) { | ||
5195 | __set_current_state(TASK_RUNNING); | ||
5196 | 5146 | ||
5197 | if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) | 5147 | if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { |
5198 | eaten = 1; | ||
5199 | } | ||
5200 | if (eaten) { | ||
5201 | /* Predicted packet is in window by definition. | 5148 | /* Predicted packet is in window by definition. |
5202 | * seq == rcv_nxt and rcv_wup <= rcv_nxt. | 5149 | * seq == rcv_nxt and rcv_wup <= rcv_nxt. |
5203 | * Hence, check seq<=rcv_wup reduces to: | 5150 | * Hence, check seq<=rcv_wup reduces to: |
@@ -5213,9 +5160,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5213 | __skb_pull(skb, tcp_header_len); | 5160 | __skb_pull(skb, tcp_header_len); |
5214 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 5161 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
5215 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); | 5162 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); |
5163 | eaten = 1; | ||
5216 | } | 5164 | } |
5217 | if (copied_early) | ||
5218 | tcp_cleanup_rbuf(sk, skb->len); | ||
5219 | } | 5165 | } |
5220 | if (!eaten) { | 5166 | if (!eaten) { |
5221 | if (tcp_checksum_complete_user(sk, skb)) | 5167 | if (tcp_checksum_complete_user(sk, skb)) |
@@ -5252,14 +5198,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5252 | goto no_ack; | 5198 | goto no_ack; |
5253 | } | 5199 | } |
5254 | 5200 | ||
5255 | if (!copied_early || tp->rcv_nxt != tp->rcv_wup) | 5201 | __tcp_ack_snd_check(sk, 0); |
5256 | __tcp_ack_snd_check(sk, 0); | ||
5257 | no_ack: | 5202 | no_ack: |
5258 | #ifdef CONFIG_NET_DMA | ||
5259 | if (copied_early) | ||
5260 | __skb_queue_tail(&sk->sk_async_wait_queue, skb); | ||
5261 | else | ||
5262 | #endif | ||
5263 | if (eaten) | 5203 | if (eaten) |
5264 | kfree_skb_partial(skb, fragstolen); | 5204 | kfree_skb_partial(skb, fragstolen); |
5265 | sk->sk_data_ready(sk); | 5205 | sk->sk_data_ready(sk); |
@@ -5453,7 +5393,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5453 | * state to ESTABLISHED..." | 5393 | * state to ESTABLISHED..." |
5454 | */ | 5394 | */ |
5455 | 5395 | ||
5456 | TCP_ECN_rcv_synack(tp, th); | 5396 | tcp_ecn_rcv_synack(tp, th); |
5457 | 5397 | ||
5458 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5398 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5459 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 5399 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
@@ -5572,7 +5512,7 @@ discard: | |||
5572 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; | 5512 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; |
5573 | tp->max_window = tp->snd_wnd; | 5513 | tp->max_window = tp->snd_wnd; |
5574 | 5514 | ||
5575 | TCP_ECN_rcv_syn(tp, th); | 5515 | tcp_ecn_rcv_syn(tp, th); |
5576 | 5516 | ||
5577 | tcp_mtup_init(sk); | 5517 | tcp_mtup_init(sk); |
5578 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 5518 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
@@ -5902,6 +5842,40 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) | |||
5902 | #endif | 5842 | #endif |
5903 | } | 5843 | } |
5904 | 5844 | ||
5845 | /* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set | ||
5846 | * | ||
5847 | * If we receive a SYN packet with these bits set, it means a | ||
5848 | * network is playing bad games with TOS bits. In order to | ||
5849 | * avoid possible false congestion notifications, we disable | ||
5850 | * TCP ECN negociation. | ||
5851 | * | ||
5852 | * Exception: tcp_ca wants ECN. This is required for DCTCP | ||
5853 | * congestion control; it requires setting ECT on all packets, | ||
5854 | * including SYN. We inverse the test in this case: If our | ||
5855 | * local socket wants ECN, but peer only set ece/cwr (but not | ||
5856 | * ECT in IP header) its probably a non-DCTCP aware sender. | ||
5857 | */ | ||
5858 | static void tcp_ecn_create_request(struct request_sock *req, | ||
5859 | const struct sk_buff *skb, | ||
5860 | const struct sock *listen_sk) | ||
5861 | { | ||
5862 | const struct tcphdr *th = tcp_hdr(skb); | ||
5863 | const struct net *net = sock_net(listen_sk); | ||
5864 | bool th_ecn = th->ece && th->cwr; | ||
5865 | bool ect, need_ecn; | ||
5866 | |||
5867 | if (!th_ecn) | ||
5868 | return; | ||
5869 | |||
5870 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); | ||
5871 | need_ecn = tcp_ca_needs_ecn(listen_sk); | ||
5872 | |||
5873 | if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn) | ||
5874 | inet_rsk(req)->ecn_ok = 1; | ||
5875 | else if (ect && need_ecn) | ||
5876 | inet_rsk(req)->ecn_ok = 1; | ||
5877 | } | ||
5878 | |||
5905 | int tcp_conn_request(struct request_sock_ops *rsk_ops, | 5879 | int tcp_conn_request(struct request_sock_ops *rsk_ops, |
5906 | const struct tcp_request_sock_ops *af_ops, | 5880 | const struct tcp_request_sock_ops *af_ops, |
5907 | struct sock *sk, struct sk_buff *skb) | 5881 | struct sock *sk, struct sk_buff *skb) |
@@ -5910,7 +5884,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
5910 | struct request_sock *req; | 5884 | struct request_sock *req; |
5911 | struct tcp_sock *tp = tcp_sk(sk); | 5885 | struct tcp_sock *tp = tcp_sk(sk); |
5912 | struct dst_entry *dst = NULL; | 5886 | struct dst_entry *dst = NULL; |
5913 | __u32 isn = TCP_SKB_CB(skb)->when; | 5887 | __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; |
5914 | bool want_cookie = false, fastopen; | 5888 | bool want_cookie = false, fastopen; |
5915 | struct flowi fl; | 5889 | struct flowi fl; |
5916 | struct tcp_fastopen_cookie foc = { .len = -1 }; | 5890 | struct tcp_fastopen_cookie foc = { .len = -1 }; |
@@ -5962,7 +5936,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
5962 | goto drop_and_free; | 5936 | goto drop_and_free; |
5963 | 5937 | ||
5964 | if (!want_cookie || tmp_opt.tstamp_ok) | 5938 | if (!want_cookie || tmp_opt.tstamp_ok) |
5965 | TCP_ECN_create_request(req, skb, sock_net(sk)); | 5939 | tcp_ecn_create_request(req, skb, sk); |
5966 | 5940 | ||
5967 | if (want_cookie) { | 5941 | if (want_cookie) { |
5968 | isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); | 5942 | isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); |