diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 89 |
1 files changed, 46 insertions, 43 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 94fe5b1f9dcb..3f884cea14ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -63,7 +63,6 @@ | |||
| 63 | * Pasi Sarolahti: F-RTO for dealing with spurious RTOs | 63 | * Pasi Sarolahti: F-RTO for dealing with spurious RTOs |
| 64 | */ | 64 | */ |
| 65 | 65 | ||
| 66 | #include <linux/config.h> | ||
| 67 | #include <linux/mm.h> | 66 | #include <linux/mm.h> |
| 68 | #include <linux/module.h> | 67 | #include <linux/module.h> |
| 69 | #include <linux/sysctl.h> | 68 | #include <linux/sysctl.h> |
| @@ -73,24 +72,24 @@ | |||
| 73 | #include <asm/unaligned.h> | 72 | #include <asm/unaligned.h> |
| 74 | #include <net/netdma.h> | 73 | #include <net/netdma.h> |
| 75 | 74 | ||
| 76 | int sysctl_tcp_timestamps = 1; | 75 | int sysctl_tcp_timestamps __read_mostly = 1; |
| 77 | int sysctl_tcp_window_scaling = 1; | 76 | int sysctl_tcp_window_scaling __read_mostly = 1; |
| 78 | int sysctl_tcp_sack = 1; | 77 | int sysctl_tcp_sack __read_mostly = 1; |
| 79 | int sysctl_tcp_fack = 1; | 78 | int sysctl_tcp_fack __read_mostly = 1; |
| 80 | int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; | 79 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
| 81 | int sysctl_tcp_ecn; | 80 | int sysctl_tcp_ecn __read_mostly; |
| 82 | int sysctl_tcp_dsack = 1; | 81 | int sysctl_tcp_dsack __read_mostly = 1; |
| 83 | int sysctl_tcp_app_win = 31; | 82 | int sysctl_tcp_app_win __read_mostly = 31; |
| 84 | int sysctl_tcp_adv_win_scale = 2; | 83 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
| 85 | 84 | ||
| 86 | int sysctl_tcp_stdurg; | 85 | int sysctl_tcp_stdurg __read_mostly; |
| 87 | int sysctl_tcp_rfc1337; | 86 | int sysctl_tcp_rfc1337 __read_mostly; |
| 88 | int sysctl_tcp_max_orphans = NR_FILE; | 87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
| 89 | int sysctl_tcp_frto; | 88 | int sysctl_tcp_frto __read_mostly; |
| 90 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save __read_mostly; |
| 91 | 90 | ||
| 92 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
| 93 | int sysctl_tcp_abc = 1; | 92 | int sysctl_tcp_abc __read_mostly; |
| 94 | 93 | ||
| 95 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
| 96 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
| @@ -128,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
| 128 | /* skb->len may jitter because of SACKs, even if peer | 127 | /* skb->len may jitter because of SACKs, even if peer |
| 129 | * sends good full-sized frames. | 128 | * sends good full-sized frames. |
| 130 | */ | 129 | */ |
| 131 | len = skb->len; | 130 | len = skb_shinfo(skb)->gso_size ?: skb->len; |
| 132 | if (len >= icsk->icsk_ack.rcv_mss) { | 131 | if (len >= icsk->icsk_ack.rcv_mss) { |
| 133 | icsk->icsk_ack.rcv_mss = len; | 132 | icsk->icsk_ack.rcv_mss = len; |
| 134 | } else { | 133 | } else { |
| @@ -157,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
| 157 | return; | 156 | return; |
| 158 | } | 157 | } |
| 159 | } | 158 | } |
| 159 | if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) | ||
| 160 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; | ||
| 160 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; | 161 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
| 161 | } | 162 | } |
| 162 | } | 163 | } |
| @@ -934,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 934 | const struct inet_connection_sock *icsk = inet_csk(sk); | 935 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 935 | struct tcp_sock *tp = tcp_sk(sk); | 936 | struct tcp_sock *tp = tcp_sk(sk); |
| 936 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; | 937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; |
| 937 | struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); | 938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); |
| 938 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; | 939 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; |
| 939 | int reord = tp->packets_out; | 940 | int reord = tp->packets_out; |
| 940 | int prior_fackets; | 941 | int prior_fackets; |
| @@ -2238,13 +2239,12 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
| 2238 | return acked; | 2239 | return acked; |
| 2239 | } | 2240 | } |
| 2240 | 2241 | ||
| 2241 | static u32 tcp_usrtt(const struct sk_buff *skb) | 2242 | static u32 tcp_usrtt(struct timeval *tv) |
| 2242 | { | 2243 | { |
| 2243 | struct timeval tv, now; | 2244 | struct timeval now; |
| 2244 | 2245 | ||
| 2245 | do_gettimeofday(&now); | 2246 | do_gettimeofday(&now); |
| 2246 | skb_get_timestamp(skb, &tv); | 2247 | return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); |
| 2247 | return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); | ||
| 2248 | } | 2248 | } |
| 2249 | 2249 | ||
| 2250 | /* Remove acknowledged frames from the retransmission queue. */ | 2250 | /* Remove acknowledged frames from the retransmission queue. */ |
| @@ -2259,6 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
| 2259 | u32 pkts_acked = 0; | 2259 | u32 pkts_acked = 0; |
| 2260 | void (*rtt_sample)(struct sock *sk, u32 usrtt) | 2260 | void (*rtt_sample)(struct sock *sk, u32 usrtt) |
| 2261 | = icsk->icsk_ca_ops->rtt_sample; | 2261 | = icsk->icsk_ca_ops->rtt_sample; |
| 2262 | struct timeval tv; | ||
| 2262 | 2263 | ||
| 2263 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2264 | while ((skb = skb_peek(&sk->sk_write_queue)) && |
| 2264 | skb != sk->sk_send_head) { | 2265 | skb != sk->sk_send_head) { |
| @@ -2307,8 +2308,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
| 2307 | seq_rtt = -1; | 2308 | seq_rtt = -1; |
| 2308 | } else if (seq_rtt < 0) { | 2309 | } else if (seq_rtt < 0) { |
| 2309 | seq_rtt = now - scb->when; | 2310 | seq_rtt = now - scb->when; |
| 2310 | if (rtt_sample) | 2311 | skb_get_timestamp(skb, &tv); |
| 2311 | (*rtt_sample)(sk, tcp_usrtt(skb)); | ||
| 2312 | } | 2312 | } |
| 2313 | if (sacked & TCPCB_SACKED_ACKED) | 2313 | if (sacked & TCPCB_SACKED_ACKED) |
| 2314 | tp->sacked_out -= tcp_skb_pcount(skb); | 2314 | tp->sacked_out -= tcp_skb_pcount(skb); |
| @@ -2321,8 +2321,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
| 2321 | } | 2321 | } |
| 2322 | } else if (seq_rtt < 0) { | 2322 | } else if (seq_rtt < 0) { |
| 2323 | seq_rtt = now - scb->when; | 2323 | seq_rtt = now - scb->when; |
| 2324 | if (rtt_sample) | 2324 | skb_get_timestamp(skb, &tv); |
| 2325 | (*rtt_sample)(sk, tcp_usrtt(skb)); | ||
| 2326 | } | 2325 | } |
| 2327 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2326 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
| 2328 | tcp_packets_out_dec(tp, skb); | 2327 | tcp_packets_out_dec(tp, skb); |
| @@ -2334,6 +2333,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
| 2334 | if (acked&FLAG_ACKED) { | 2333 | if (acked&FLAG_ACKED) { |
| 2335 | tcp_ack_update_rtt(sk, acked, seq_rtt); | 2334 | tcp_ack_update_rtt(sk, acked, seq_rtt); |
| 2336 | tcp_ack_packets_out(sk, tp); | 2335 | tcp_ack_packets_out(sk, tp); |
| 2336 | if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) | ||
| 2337 | (*rtt_sample)(sk, tcp_usrtt(&tv)); | ||
| 2337 | 2338 | ||
| 2338 | if (icsk->icsk_ca_ops->pkts_acked) | 2339 | if (icsk->icsk_ca_ops->pkts_acked) |
| 2339 | icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); | 2340 | icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); |
| @@ -2506,8 +2507,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 2506 | if (before(ack, prior_snd_una)) | 2507 | if (before(ack, prior_snd_una)) |
| 2507 | goto old_ack; | 2508 | goto old_ack; |
| 2508 | 2509 | ||
| 2509 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | 2510 | if (sysctl_tcp_abc) { |
| 2510 | tp->bytes_acked += ack - prior_snd_una; | 2511 | if (icsk->icsk_ca_state < TCP_CA_CWR) |
| 2512 | tp->bytes_acked += ack - prior_snd_una; | ||
| 2513 | else if (icsk->icsk_ca_state == TCP_CA_Loss) | ||
| 2514 | /* we assume just one segment left network */ | ||
| 2515 | tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); | ||
| 2516 | } | ||
| 2511 | 2517 | ||
| 2512 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2518 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
| 2513 | /* Window is constant, pure forward advance. | 2519 | /* Window is constant, pure forward advance. |
| @@ -2623,7 +2629,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
| 2623 | switch(opcode) { | 2629 | switch(opcode) { |
| 2624 | case TCPOPT_MSS: | 2630 | case TCPOPT_MSS: |
| 2625 | if(opsize==TCPOLEN_MSS && th->syn && !estab) { | 2631 | if(opsize==TCPOLEN_MSS && th->syn && !estab) { |
| 2626 | u16 in_mss = ntohs(get_unaligned((__u16 *)ptr)); | 2632 | u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); |
| 2627 | if (in_mss) { | 2633 | if (in_mss) { |
| 2628 | if (opt_rx->user_mss && opt_rx->user_mss < in_mss) | 2634 | if (opt_rx->user_mss && opt_rx->user_mss < in_mss) |
| 2629 | in_mss = opt_rx->user_mss; | 2635 | in_mss = opt_rx->user_mss; |
| @@ -2651,8 +2657,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
| 2651 | if ((estab && opt_rx->tstamp_ok) || | 2657 | if ((estab && opt_rx->tstamp_ok) || |
| 2652 | (!estab && sysctl_tcp_timestamps)) { | 2658 | (!estab && sysctl_tcp_timestamps)) { |
| 2653 | opt_rx->saw_tstamp = 1; | 2659 | opt_rx->saw_tstamp = 1; |
| 2654 | opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr)); | 2660 | opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); |
| 2655 | opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4))); | 2661 | opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); |
| 2656 | } | 2662 | } |
| 2657 | } | 2663 | } |
| 2658 | break; | 2664 | break; |
| @@ -2689,8 +2695,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | |||
| 2689 | return 0; | 2695 | return 0; |
| 2690 | } else if (tp->rx_opt.tstamp_ok && | 2696 | } else if (tp->rx_opt.tstamp_ok && |
| 2691 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { | 2697 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { |
| 2692 | __u32 *ptr = (__u32 *)(th + 1); | 2698 | __be32 *ptr = (__be32 *)(th + 1); |
| 2693 | if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 2699 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
| 2694 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | 2700 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { |
| 2695 | tp->rx_opt.saw_tstamp = 1; | 2701 | tp->rx_opt.saw_tstamp = 1; |
| 2696 | ++ptr; | 2702 | ++ptr; |
| @@ -3542,7 +3548,8 @@ void tcp_cwnd_application_limited(struct sock *sk) | |||
| 3542 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && | 3548 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && |
| 3543 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 3549 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
| 3544 | /* Limited by application or receiver window. */ | 3550 | /* Limited by application or receiver window. */ |
| 3545 | u32 win_used = max(tp->snd_cwnd_used, 2U); | 3551 | u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); |
| 3552 | u32 win_used = max(tp->snd_cwnd_used, init_win); | ||
| 3546 | if (win_used < tp->snd_cwnd) { | 3553 | if (win_used < tp->snd_cwnd) { |
| 3547 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | 3554 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
| 3548 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; | 3555 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; |
| @@ -3904,10 +3911,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 3904 | 3911 | ||
| 3905 | /* Check timestamp */ | 3912 | /* Check timestamp */ |
| 3906 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { | 3913 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { |
| 3907 | __u32 *ptr = (__u32 *)(th + 1); | 3914 | __be32 *ptr = (__be32 *)(th + 1); |
| 3908 | 3915 | ||
| 3909 | /* No? Slow path! */ | 3916 | /* No? Slow path! */ |
| 3910 | if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 3917 | if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
| 3911 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) | 3918 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) |
| 3912 | goto slow_path; | 3919 | goto slow_path; |
| 3913 | 3920 | ||
| @@ -4178,8 +4185,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 4178 | */ | 4185 | */ |
| 4179 | 4186 | ||
| 4180 | TCP_ECN_rcv_synack(tp, th); | 4187 | TCP_ECN_rcv_synack(tp, th); |
| 4181 | if (tp->ecn_flags&TCP_ECN_OK) | ||
| 4182 | sock_set_flag(sk, SOCK_NO_LARGESEND); | ||
| 4183 | 4188 | ||
| 4184 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; | 4189 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; |
| 4185 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 4190 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
| @@ -4322,8 +4327,6 @@ discard: | |||
| 4322 | tp->max_window = tp->snd_wnd; | 4327 | tp->max_window = tp->snd_wnd; |
| 4323 | 4328 | ||
| 4324 | TCP_ECN_rcv_syn(tp, th); | 4329 | TCP_ECN_rcv_syn(tp, th); |
| 4325 | if (tp->ecn_flags&TCP_ECN_OK) | ||
| 4326 | sock_set_flag(sk, SOCK_NO_LARGESEND); | ||
| 4327 | 4330 | ||
| 4328 | tcp_mtup_init(sk); | 4331 | tcp_mtup_init(sk); |
| 4329 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 4332 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
