aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c89
1 files changed, 46 insertions, 43 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94fe5b1f9dcb..3f884cea14ff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -63,7 +63,6 @@
63 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs 63 * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
64 */ 64 */
65 65
66#include <linux/config.h>
67#include <linux/mm.h> 66#include <linux/mm.h>
68#include <linux/module.h> 67#include <linux/module.h>
69#include <linux/sysctl.h> 68#include <linux/sysctl.h>
@@ -73,24 +72,24 @@
73#include <asm/unaligned.h> 72#include <asm/unaligned.h>
74#include <net/netdma.h> 73#include <net/netdma.h>
75 74
76int sysctl_tcp_timestamps = 1; 75int sysctl_tcp_timestamps __read_mostly = 1;
77int sysctl_tcp_window_scaling = 1; 76int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack = 1; 77int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack = 1; 78int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; 79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81int sysctl_tcp_ecn; 80int sysctl_tcp_ecn __read_mostly;
82int sysctl_tcp_dsack = 1; 81int sysctl_tcp_dsack __read_mostly = 1;
83int sysctl_tcp_app_win = 31; 82int sysctl_tcp_app_win __read_mostly = 31;
84int sysctl_tcp_adv_win_scale = 2; 83int sysctl_tcp_adv_win_scale __read_mostly = 2;
85 84
86int sysctl_tcp_stdurg; 85int sysctl_tcp_stdurg __read_mostly;
87int sysctl_tcp_rfc1337; 86int sysctl_tcp_rfc1337 __read_mostly;
88int sysctl_tcp_max_orphans = NR_FILE; 87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
89int sysctl_tcp_frto; 88int sysctl_tcp_frto __read_mostly;
90int sysctl_tcp_nometrics_save; 89int sysctl_tcp_nometrics_save __read_mostly;
91 90
92int sysctl_tcp_moderate_rcvbuf = 1; 91int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc = 1; 92int sysctl_tcp_abc __read_mostly;
94 93
95#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 94#define FLAG_DATA 0x01 /* Incoming frame contained data. */
96#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -128,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
128 /* skb->len may jitter because of SACKs, even if peer 127 /* skb->len may jitter because of SACKs, even if peer
129 * sends good full-sized frames. 128 * sends good full-sized frames.
130 */ 129 */
131 len = skb->len; 130 len = skb_shinfo(skb)->gso_size ?: skb->len;
132 if (len >= icsk->icsk_ack.rcv_mss) { 131 if (len >= icsk->icsk_ack.rcv_mss) {
133 icsk->icsk_ack.rcv_mss = len; 132 icsk->icsk_ack.rcv_mss = len;
134 } else { 133 } else {
@@ -157,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
157 return; 156 return;
158 } 157 }
159 } 158 }
159 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
160 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
160 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; 161 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
161 } 162 }
162} 163}
@@ -934,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
934 const struct inet_connection_sock *icsk = inet_csk(sk); 935 const struct inet_connection_sock *icsk = inet_csk(sk);
935 struct tcp_sock *tp = tcp_sk(sk); 936 struct tcp_sock *tp = tcp_sk(sk);
936 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; 937 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
937 struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); 938 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
938 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 939 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
939 int reord = tp->packets_out; 940 int reord = tp->packets_out;
940 int prior_fackets; 941 int prior_fackets;
@@ -2238,13 +2239,12 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2238 return acked; 2239 return acked;
2239} 2240}
2240 2241
2241static u32 tcp_usrtt(const struct sk_buff *skb) 2242static u32 tcp_usrtt(struct timeval *tv)
2242{ 2243{
2243 struct timeval tv, now; 2244 struct timeval now;
2244 2245
2245 do_gettimeofday(&now); 2246 do_gettimeofday(&now);
2246 skb_get_timestamp(skb, &tv); 2247 return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
2247 return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
2248} 2248}
2249 2249
2250/* Remove acknowledged frames from the retransmission queue. */ 2250/* Remove acknowledged frames from the retransmission queue. */
@@ -2259,6 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2259 u32 pkts_acked = 0; 2259 u32 pkts_acked = 0;
2260 void (*rtt_sample)(struct sock *sk, u32 usrtt) 2260 void (*rtt_sample)(struct sock *sk, u32 usrtt)
2261 = icsk->icsk_ca_ops->rtt_sample; 2261 = icsk->icsk_ca_ops->rtt_sample;
2262 struct timeval tv;
2262 2263
2263 while ((skb = skb_peek(&sk->sk_write_queue)) && 2264 while ((skb = skb_peek(&sk->sk_write_queue)) &&
2264 skb != sk->sk_send_head) { 2265 skb != sk->sk_send_head) {
@@ -2307,8 +2308,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2307 seq_rtt = -1; 2308 seq_rtt = -1;
2308 } else if (seq_rtt < 0) { 2309 } else if (seq_rtt < 0) {
2309 seq_rtt = now - scb->when; 2310 seq_rtt = now - scb->when;
2310 if (rtt_sample) 2311 skb_get_timestamp(skb, &tv);
2311 (*rtt_sample)(sk, tcp_usrtt(skb));
2312 } 2312 }
2313 if (sacked & TCPCB_SACKED_ACKED) 2313 if (sacked & TCPCB_SACKED_ACKED)
2314 tp->sacked_out -= tcp_skb_pcount(skb); 2314 tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2321,8 +2321,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2321 } 2321 }
2322 } else if (seq_rtt < 0) { 2322 } else if (seq_rtt < 0) {
2323 seq_rtt = now - scb->when; 2323 seq_rtt = now - scb->when;
2324 if (rtt_sample) 2324 skb_get_timestamp(skb, &tv);
2325 (*rtt_sample)(sk, tcp_usrtt(skb));
2326 } 2325 }
2327 tcp_dec_pcount_approx(&tp->fackets_out, skb); 2326 tcp_dec_pcount_approx(&tp->fackets_out, skb);
2328 tcp_packets_out_dec(tp, skb); 2327 tcp_packets_out_dec(tp, skb);
@@ -2334,6 +2333,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2334 if (acked&FLAG_ACKED) { 2333 if (acked&FLAG_ACKED) {
2335 tcp_ack_update_rtt(sk, acked, seq_rtt); 2334 tcp_ack_update_rtt(sk, acked, seq_rtt);
2336 tcp_ack_packets_out(sk, tp); 2335 tcp_ack_packets_out(sk, tp);
2336 if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
2337 (*rtt_sample)(sk, tcp_usrtt(&tv));
2337 2338
2338 if (icsk->icsk_ca_ops->pkts_acked) 2339 if (icsk->icsk_ca_ops->pkts_acked)
2339 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); 2340 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
@@ -2506,8 +2507,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2506 if (before(ack, prior_snd_una)) 2507 if (before(ack, prior_snd_una))
2507 goto old_ack; 2508 goto old_ack;
2508 2509
2509 if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) 2510 if (sysctl_tcp_abc) {
2510 tp->bytes_acked += ack - prior_snd_una; 2511 if (icsk->icsk_ca_state < TCP_CA_CWR)
2512 tp->bytes_acked += ack - prior_snd_una;
2513 else if (icsk->icsk_ca_state == TCP_CA_Loss)
2514 /* we assume just one segment left network */
2515 tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
2516 }
2511 2517
2512 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 2518 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
2513 /* Window is constant, pure forward advance. 2519 /* Window is constant, pure forward advance.
@@ -2623,7 +2629,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2623 switch(opcode) { 2629 switch(opcode) {
2624 case TCPOPT_MSS: 2630 case TCPOPT_MSS:
2625 if(opsize==TCPOLEN_MSS && th->syn && !estab) { 2631 if(opsize==TCPOLEN_MSS && th->syn && !estab) {
2626 u16 in_mss = ntohs(get_unaligned((__u16 *)ptr)); 2632 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
2627 if (in_mss) { 2633 if (in_mss) {
2628 if (opt_rx->user_mss && opt_rx->user_mss < in_mss) 2634 if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
2629 in_mss = opt_rx->user_mss; 2635 in_mss = opt_rx->user_mss;
@@ -2651,8 +2657,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2651 if ((estab && opt_rx->tstamp_ok) || 2657 if ((estab && opt_rx->tstamp_ok) ||
2652 (!estab && sysctl_tcp_timestamps)) { 2658 (!estab && sysctl_tcp_timestamps)) {
2653 opt_rx->saw_tstamp = 1; 2659 opt_rx->saw_tstamp = 1;
2654 opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr)); 2660 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
2655 opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4))); 2661 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
2656 } 2662 }
2657 } 2663 }
2658 break; 2664 break;
@@ -2689,8 +2695,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
2689 return 0; 2695 return 0;
2690 } else if (tp->rx_opt.tstamp_ok && 2696 } else if (tp->rx_opt.tstamp_ok &&
2691 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { 2697 th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
2692 __u32 *ptr = (__u32 *)(th + 1); 2698 __be32 *ptr = (__be32 *)(th + 1);
2693 if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 2699 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
2694 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { 2700 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
2695 tp->rx_opt.saw_tstamp = 1; 2701 tp->rx_opt.saw_tstamp = 1;
2696 ++ptr; 2702 ++ptr;
@@ -3542,7 +3548,8 @@ void tcp_cwnd_application_limited(struct sock *sk)
3542 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && 3548 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
3543 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 3549 sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
3544 /* Limited by application or receiver window. */ 3550 /* Limited by application or receiver window. */
3545 u32 win_used = max(tp->snd_cwnd_used, 2U); 3551 u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
3552 u32 win_used = max(tp->snd_cwnd_used, init_win);
3546 if (win_used < tp->snd_cwnd) { 3553 if (win_used < tp->snd_cwnd) {
3547 tp->snd_ssthresh = tcp_current_ssthresh(sk); 3554 tp->snd_ssthresh = tcp_current_ssthresh(sk);
3548 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; 3555 tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
@@ -3904,10 +3911,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3904 3911
3905 /* Check timestamp */ 3912 /* Check timestamp */
3906 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { 3913 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
3907 __u32 *ptr = (__u32 *)(th + 1); 3914 __be32 *ptr = (__be32 *)(th + 1);
3908 3915
3909 /* No? Slow path! */ 3916 /* No? Slow path! */
3910 if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 3917 if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3911 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) 3918 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
3912 goto slow_path; 3919 goto slow_path;
3913 3920
@@ -4178,8 +4185,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4178 */ 4185 */
4179 4186
4180 TCP_ECN_rcv_synack(tp, th); 4187 TCP_ECN_rcv_synack(tp, th);
4181 if (tp->ecn_flags&TCP_ECN_OK)
4182 sock_set_flag(sk, SOCK_NO_LARGESEND);
4183 4188
4184 tp->snd_wl1 = TCP_SKB_CB(skb)->seq; 4189 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4185 tcp_ack(sk, skb, FLAG_SLOWPATH); 4190 tcp_ack(sk, skb, FLAG_SLOWPATH);
@@ -4322,8 +4327,6 @@ discard:
4322 tp->max_window = tp->snd_wnd; 4327 tp->max_window = tp->snd_wnd;
4323 4328
4324 TCP_ECN_rcv_syn(tp, th); 4329 TCP_ECN_rcv_syn(tp, th);
4325 if (tp->ecn_flags&TCP_ECN_OK)
4326 sock_set_flag(sk, SOCK_NO_LARGESEND);
4327 4330
4328 tcp_mtup_init(sk); 4331 tcp_mtup_init(sk);
4329 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 4332 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);