diff options
author | Jeff Garzik <jgarzik@pobox.com> | 2005-09-08 05:43:49 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-09-08 05:43:49 -0400 |
commit | 1d6ae775d7a948c9575658eb41184fd2e506c0df (patch) | |
tree | 8128a28e89d82f13bb8e3a2160382240c66e2816 /net/ipv4/tcp_output.c | |
parent | 739cdbf1d8f0739b80035b80d69d871e33749b86 (diff) | |
parent | caf39e87cc1182f7dae84eefc43ca14d54c78ef9 (diff) |
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 198 |
1 files changed, 100 insertions, 98 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dd30dd137b74..6094db5e11be 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -105,18 +105,19 @@ static __u16 tcp_advertise_mss(struct sock *sk) | |||
105 | 105 | ||
106 | /* RFC2861. Reset CWND after idle period longer RTO to "restart window". | 106 | /* RFC2861. Reset CWND after idle period longer RTO to "restart window". |
107 | * This is the first part of cwnd validation mechanism. */ | 107 | * This is the first part of cwnd validation mechanism. */ |
108 | static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) | 108 | static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) |
109 | { | 109 | { |
110 | struct tcp_sock *tp = tcp_sk(sk); | ||
110 | s32 delta = tcp_time_stamp - tp->lsndtime; | 111 | s32 delta = tcp_time_stamp - tp->lsndtime; |
111 | u32 restart_cwnd = tcp_init_cwnd(tp, dst); | 112 | u32 restart_cwnd = tcp_init_cwnd(tp, dst); |
112 | u32 cwnd = tp->snd_cwnd; | 113 | u32 cwnd = tp->snd_cwnd; |
113 | 114 | ||
114 | tcp_ca_event(tp, CA_EVENT_CWND_RESTART); | 115 | tcp_ca_event(sk, CA_EVENT_CWND_RESTART); |
115 | 116 | ||
116 | tp->snd_ssthresh = tcp_current_ssthresh(tp); | 117 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
117 | restart_cwnd = min(restart_cwnd, cwnd); | 118 | restart_cwnd = min(restart_cwnd, cwnd); |
118 | 119 | ||
119 | while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) | 120 | while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) |
120 | cwnd >>= 1; | 121 | cwnd >>= 1; |
121 | tp->snd_cwnd = max(cwnd, restart_cwnd); | 122 | tp->snd_cwnd = max(cwnd, restart_cwnd); |
122 | tp->snd_cwnd_stamp = tcp_time_stamp; | 123 | tp->snd_cwnd_stamp = tcp_time_stamp; |
@@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) | |||
126 | static inline void tcp_event_data_sent(struct tcp_sock *tp, | 127 | static inline void tcp_event_data_sent(struct tcp_sock *tp, |
127 | struct sk_buff *skb, struct sock *sk) | 128 | struct sk_buff *skb, struct sock *sk) |
128 | { | 129 | { |
129 | u32 now = tcp_time_stamp; | 130 | struct inet_connection_sock *icsk = inet_csk(sk); |
131 | const u32 now = tcp_time_stamp; | ||
130 | 132 | ||
131 | if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) | 133 | if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) |
132 | tcp_cwnd_restart(tp, __sk_dst_get(sk)); | 134 | tcp_cwnd_restart(sk, __sk_dst_get(sk)); |
133 | 135 | ||
134 | tp->lsndtime = now; | 136 | tp->lsndtime = now; |
135 | 137 | ||
136 | /* If it is a reply for ato after last received | 138 | /* If it is a reply for ato after last received |
137 | * packet, enter pingpong mode. | 139 | * packet, enter pingpong mode. |
138 | */ | 140 | */ |
139 | if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) | 141 | if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) |
140 | tp->ack.pingpong = 1; | 142 | icsk->icsk_ack.pingpong = 1; |
141 | } | 143 | } |
142 | 144 | ||
143 | static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | 145 | static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
144 | { | 146 | { |
145 | struct tcp_sock *tp = tcp_sk(sk); | 147 | tcp_dec_quickack_mode(sk, pkts); |
146 | 148 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | |
147 | tcp_dec_quickack_mode(tp, pkts); | ||
148 | tcp_clear_xmit_timer(sk, TCP_TIME_DACK); | ||
149 | } | 149 | } |
150 | 150 | ||
151 | /* Determine a window scaling and initial window to offer. | 151 | /* Determine a window scaling and initial window to offer. |
@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) | |||
265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | 265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) |
266 | { | 266 | { |
267 | if (skb != NULL) { | 267 | if (skb != NULL) { |
268 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
268 | struct inet_sock *inet = inet_sk(sk); | 269 | struct inet_sock *inet = inet_sk(sk); |
269 | struct tcp_sock *tp = tcp_sk(sk); | 270 | struct tcp_sock *tp = tcp_sk(sk); |
270 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 271 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); |
@@ -280,8 +281,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
280 | #define SYSCTL_FLAG_SACK 0x4 | 281 | #define SYSCTL_FLAG_SACK 0x4 |
281 | 282 | ||
282 | /* If congestion control is doing timestamping */ | 283 | /* If congestion control is doing timestamping */ |
283 | if (tp->ca_ops->rtt_sample) | 284 | if (icsk->icsk_ca_ops->rtt_sample) |
284 | do_gettimeofday(&skb->stamp); | 285 | __net_timestamp(skb); |
285 | 286 | ||
286 | sysctl_flags = 0; | 287 | sysctl_flags = 0; |
287 | if (tcb->flags & TCPCB_FLAG_SYN) { | 288 | if (tcb->flags & TCPCB_FLAG_SYN) { |
@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
308 | } | 309 | } |
309 | 310 | ||
310 | if (tcp_packets_in_flight(tp) == 0) | 311 | if (tcp_packets_in_flight(tp) == 0) |
311 | tcp_ca_event(tp, CA_EVENT_TX_START); | 312 | tcp_ca_event(sk, CA_EVENT_TX_START); |
312 | 313 | ||
313 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | 314 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); |
314 | skb->h.th = th; | 315 | skb->h.th = th; |
@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
366 | if (err <= 0) | 367 | if (err <= 0) |
367 | return err; | 368 | return err; |
368 | 369 | ||
369 | tcp_enter_cwr(tp); | 370 | tcp_enter_cwr(sk); |
370 | 371 | ||
371 | /* NET_XMIT_CN is special. It does not guarantee, | 372 | /* NET_XMIT_CN is special. It does not guarantee, |
372 | * that this packet is lost. It tells that device | 373 | * that this packet is lost. It tells that device |
@@ -427,11 +428,11 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned | |||
427 | * packet to the list. This won't be called frequently, I hope. | 428 | * packet to the list. This won't be called frequently, I hope. |
428 | * Remember, these are still headerless SKBs at this point. | 429 | * Remember, these are still headerless SKBs at this point. |
429 | */ | 430 | */ |
430 | static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) | 431 | int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) |
431 | { | 432 | { |
432 | struct tcp_sock *tp = tcp_sk(sk); | 433 | struct tcp_sock *tp = tcp_sk(sk); |
433 | struct sk_buff *buff; | 434 | struct sk_buff *buff; |
434 | int nsize; | 435 | int nsize, old_factor; |
435 | u16 flags; | 436 | u16 flags; |
436 | 437 | ||
437 | nsize = skb_headlen(skb) - len; | 438 | nsize = skb_headlen(skb) - len; |
@@ -482,30 +483,41 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned | |||
482 | * skbs, which it never sent before. --ANK | 483 | * skbs, which it never sent before. --ANK |
483 | */ | 484 | */ |
484 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; | 485 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; |
485 | buff->stamp = skb->stamp; | 486 | buff->tstamp = skb->tstamp; |
486 | 487 | ||
487 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { | 488 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { |
488 | tp->lost_out -= tcp_skb_pcount(skb); | 489 | tp->lost_out -= tcp_skb_pcount(skb); |
489 | tp->left_out -= tcp_skb_pcount(skb); | 490 | tp->left_out -= tcp_skb_pcount(skb); |
490 | } | 491 | } |
491 | 492 | ||
493 | old_factor = tcp_skb_pcount(skb); | ||
494 | |||
492 | /* Fix up tso_factor for both original and new SKB. */ | 495 | /* Fix up tso_factor for both original and new SKB. */ |
493 | tcp_set_skb_tso_segs(sk, skb, mss_now); | 496 | tcp_set_skb_tso_segs(sk, skb, mss_now); |
494 | tcp_set_skb_tso_segs(sk, buff, mss_now); | 497 | tcp_set_skb_tso_segs(sk, buff, mss_now); |
495 | 498 | ||
496 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { | 499 | /* If this packet has been sent out already, we must |
497 | tp->lost_out += tcp_skb_pcount(skb); | 500 | * adjust the various packet counters. |
498 | tp->left_out += tcp_skb_pcount(skb); | 501 | */ |
499 | } | 502 | if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { |
503 | int diff = old_factor - tcp_skb_pcount(skb) - | ||
504 | tcp_skb_pcount(buff); | ||
500 | 505 | ||
501 | if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { | 506 | tp->packets_out -= diff; |
502 | tp->lost_out += tcp_skb_pcount(buff); | 507 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { |
503 | tp->left_out += tcp_skb_pcount(buff); | 508 | tp->lost_out -= diff; |
509 | tp->left_out -= diff; | ||
510 | } | ||
511 | if (diff > 0) { | ||
512 | tp->fackets_out -= diff; | ||
513 | if ((int)tp->fackets_out < 0) | ||
514 | tp->fackets_out = 0; | ||
515 | } | ||
504 | } | 516 | } |
505 | 517 | ||
506 | /* Link BUFF into the send queue. */ | 518 | /* Link BUFF into the send queue. */ |
507 | skb_header_release(buff); | 519 | skb_header_release(buff); |
508 | __skb_append(skb, buff); | 520 | __skb_append(skb, buff, &sk->sk_write_queue); |
509 | 521 | ||
510 | return 0; | 522 | return 0; |
511 | } | 523 | } |
@@ -696,7 +708,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) | |||
696 | if (tp->packets_out > tp->snd_cwnd_used) | 708 | if (tp->packets_out > tp->snd_cwnd_used) |
697 | tp->snd_cwnd_used = tp->packets_out; | 709 | tp->snd_cwnd_used = tp->packets_out; |
698 | 710 | ||
699 | if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) | 711 | if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) |
700 | tcp_cwnd_application_limited(sk); | 712 | tcp_cwnd_application_limited(sk); |
701 | } | 713 | } |
702 | } | 714 | } |
@@ -893,7 +905,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
893 | 905 | ||
894 | /* Link BUFF into the send queue. */ | 906 | /* Link BUFF into the send queue. */ |
895 | skb_header_release(buff); | 907 | skb_header_release(buff); |
896 | __skb_append(skb, buff); | 908 | __skb_append(skb, buff, &sk->sk_write_queue); |
897 | 909 | ||
898 | return 0; | 910 | return 0; |
899 | } | 911 | } |
@@ -905,12 +917,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
905 | */ | 917 | */ |
906 | static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) | 918 | static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) |
907 | { | 919 | { |
920 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
908 | u32 send_win, cong_win, limit, in_flight; | 921 | u32 send_win, cong_win, limit, in_flight; |
909 | 922 | ||
910 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) | 923 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) |
911 | return 0; | 924 | return 0; |
912 | 925 | ||
913 | if (tp->ca_state != TCP_CA_Open) | 926 | if (icsk->icsk_ca_state != TCP_CA_Open) |
914 | return 0; | 927 | return 0; |
915 | 928 | ||
916 | in_flight = tcp_packets_in_flight(tp); | 929 | in_flight = tcp_packets_in_flight(tp); |
@@ -1147,6 +1160,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1147 | */ | 1160 | */ |
1148 | u32 __tcp_select_window(struct sock *sk) | 1161 | u32 __tcp_select_window(struct sock *sk) |
1149 | { | 1162 | { |
1163 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1150 | struct tcp_sock *tp = tcp_sk(sk); | 1164 | struct tcp_sock *tp = tcp_sk(sk); |
1151 | /* MSS for the peer's data. Previous verions used mss_clamp | 1165 | /* MSS for the peer's data. Previous verions used mss_clamp |
1152 | * here. I don't know if the value based on our guesses | 1166 | * here. I don't know if the value based on our guesses |
@@ -1154,7 +1168,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1154 | * but may be worse for the performance because of rcv_mss | 1168 | * but may be worse for the performance because of rcv_mss |
1155 | * fluctuations. --SAW 1998/11/1 | 1169 | * fluctuations. --SAW 1998/11/1 |
1156 | */ | 1170 | */ |
1157 | int mss = tp->ack.rcv_mss; | 1171 | int mss = icsk->icsk_ack.rcv_mss; |
1158 | int free_space = tcp_space(sk); | 1172 | int free_space = tcp_space(sk); |
1159 | int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); | 1173 | int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); |
1160 | int window; | 1174 | int window; |
@@ -1163,7 +1177,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1163 | mss = full_space; | 1177 | mss = full_space; |
1164 | 1178 | ||
1165 | if (free_space < full_space/2) { | 1179 | if (free_space < full_space/2) { |
1166 | tp->ack.quick = 0; | 1180 | icsk->icsk_ack.quick = 0; |
1167 | 1181 | ||
1168 | if (tcp_memory_pressure) | 1182 | if (tcp_memory_pressure) |
1169 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); | 1183 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); |
@@ -1238,7 +1252,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1238 | tcp_skb_pcount(next_skb) != 1); | 1252 | tcp_skb_pcount(next_skb) != 1); |
1239 | 1253 | ||
1240 | /* Ok. We will be able to collapse the packet. */ | 1254 | /* Ok. We will be able to collapse the packet. */ |
1241 | __skb_unlink(next_skb, next_skb->list); | 1255 | __skb_unlink(next_skb, &sk->sk_write_queue); |
1242 | 1256 | ||
1243 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); | 1257 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); |
1244 | 1258 | ||
@@ -1286,6 +1300,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1286 | */ | 1300 | */ |
1287 | void tcp_simple_retransmit(struct sock *sk) | 1301 | void tcp_simple_retransmit(struct sock *sk) |
1288 | { | 1302 | { |
1303 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1289 | struct tcp_sock *tp = tcp_sk(sk); | 1304 | struct tcp_sock *tp = tcp_sk(sk); |
1290 | struct sk_buff *skb; | 1305 | struct sk_buff *skb; |
1291 | unsigned int mss = tcp_current_mss(sk, 0); | 1306 | unsigned int mss = tcp_current_mss(sk, 0); |
@@ -1316,12 +1331,12 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1316 | * in network, but units changed and effective | 1331 | * in network, but units changed and effective |
1317 | * cwnd/ssthresh really reduced now. | 1332 | * cwnd/ssthresh really reduced now. |
1318 | */ | 1333 | */ |
1319 | if (tp->ca_state != TCP_CA_Loss) { | 1334 | if (icsk->icsk_ca_state != TCP_CA_Loss) { |
1320 | tp->high_seq = tp->snd_nxt; | 1335 | tp->high_seq = tp->snd_nxt; |
1321 | tp->snd_ssthresh = tcp_current_ssthresh(tp); | 1336 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
1322 | tp->prior_ssthresh = 0; | 1337 | tp->prior_ssthresh = 0; |
1323 | tp->undo_marker = 0; | 1338 | tp->undo_marker = 0; |
1324 | tcp_set_ca_state(tp, TCP_CA_Loss); | 1339 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1325 | } | 1340 | } |
1326 | tcp_xmit_retransmit_queue(sk); | 1341 | tcp_xmit_retransmit_queue(sk); |
1327 | } | 1342 | } |
@@ -1346,12 +1361,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1346 | if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { | 1361 | if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { |
1347 | if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1362 | if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) |
1348 | BUG(); | 1363 | BUG(); |
1349 | |||
1350 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1351 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1352 | sock_set_flag(sk, SOCK_NO_LARGESEND); | ||
1353 | } | ||
1354 | |||
1355 | if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) | 1364 | if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) |
1356 | return -ENOMEM; | 1365 | return -ENOMEM; |
1357 | } | 1366 | } |
@@ -1366,22 +1375,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1366 | return -EAGAIN; | 1375 | return -EAGAIN; |
1367 | 1376 | ||
1368 | if (skb->len > cur_mss) { | 1377 | if (skb->len > cur_mss) { |
1369 | int old_factor = tcp_skb_pcount(skb); | ||
1370 | int diff; | ||
1371 | |||
1372 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) | 1378 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) |
1373 | return -ENOMEM; /* We'll try again later. */ | 1379 | return -ENOMEM; /* We'll try again later. */ |
1374 | |||
1375 | /* New SKB created, account for it. */ | ||
1376 | diff = old_factor - tcp_skb_pcount(skb) - | ||
1377 | tcp_skb_pcount(skb->next); | ||
1378 | tp->packets_out -= diff; | ||
1379 | |||
1380 | if (diff > 0) { | ||
1381 | tp->fackets_out -= diff; | ||
1382 | if ((int)tp->fackets_out < 0) | ||
1383 | tp->fackets_out = 0; | ||
1384 | } | ||
1385 | } | 1380 | } |
1386 | 1381 | ||
1387 | /* Collapse two adjacent packets if worthwhile and we can. */ | 1382 | /* Collapse two adjacent packets if worthwhile and we can. */ |
@@ -1461,6 +1456,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1461 | */ | 1456 | */ |
1462 | void tcp_xmit_retransmit_queue(struct sock *sk) | 1457 | void tcp_xmit_retransmit_queue(struct sock *sk) |
1463 | { | 1458 | { |
1459 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1464 | struct tcp_sock *tp = tcp_sk(sk); | 1460 | struct tcp_sock *tp = tcp_sk(sk); |
1465 | struct sk_buff *skb; | 1461 | struct sk_buff *skb; |
1466 | int packet_cnt = tp->lost_out; | 1462 | int packet_cnt = tp->lost_out; |
@@ -1484,14 +1480,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1484 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { | 1480 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { |
1485 | if (tcp_retransmit_skb(sk, skb)) | 1481 | if (tcp_retransmit_skb(sk, skb)) |
1486 | return; | 1482 | return; |
1487 | if (tp->ca_state != TCP_CA_Loss) | 1483 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
1488 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); | 1484 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); |
1489 | else | 1485 | else |
1490 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); | 1486 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); |
1491 | 1487 | ||
1492 | if (skb == | 1488 | if (skb == |
1493 | skb_peek(&sk->sk_write_queue)) | 1489 | skb_peek(&sk->sk_write_queue)) |
1494 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1490 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1491 | inet_csk(sk)->icsk_rto, | ||
1492 | TCP_RTO_MAX); | ||
1495 | } | 1493 | } |
1496 | 1494 | ||
1497 | packet_cnt -= tcp_skb_pcount(skb); | 1495 | packet_cnt -= tcp_skb_pcount(skb); |
@@ -1504,7 +1502,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1504 | /* OK, demanded retransmission is finished. */ | 1502 | /* OK, demanded retransmission is finished. */ |
1505 | 1503 | ||
1506 | /* Forward retransmissions are possible only during Recovery. */ | 1504 | /* Forward retransmissions are possible only during Recovery. */ |
1507 | if (tp->ca_state != TCP_CA_Recovery) | 1505 | if (icsk->icsk_ca_state != TCP_CA_Recovery) |
1508 | return; | 1506 | return; |
1509 | 1507 | ||
1510 | /* No forward retransmissions in Reno are possible. */ | 1508 | /* No forward retransmissions in Reno are possible. */ |
@@ -1544,7 +1542,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1544 | break; | 1542 | break; |
1545 | 1543 | ||
1546 | if (skb == skb_peek(&sk->sk_write_queue)) | 1544 | if (skb == skb_peek(&sk->sk_write_queue)) |
1547 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1545 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1546 | inet_csk(sk)->icsk_rto, | ||
1547 | TCP_RTO_MAX); | ||
1548 | 1548 | ||
1549 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); | 1549 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); |
1550 | } | 1550 | } |
@@ -1573,7 +1573,7 @@ void tcp_send_fin(struct sock *sk) | |||
1573 | } else { | 1573 | } else { |
1574 | /* Socket is locked, keep trying until memory is available. */ | 1574 | /* Socket is locked, keep trying until memory is available. */ |
1575 | for (;;) { | 1575 | for (;;) { |
1576 | skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); | 1576 | skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); |
1577 | if (skb) | 1577 | if (skb) |
1578 | break; | 1578 | break; |
1579 | yield(); | 1579 | yield(); |
@@ -1780,8 +1780,8 @@ static inline void tcp_connect_init(struct sock *sk) | |||
1780 | tp->rcv_wup = 0; | 1780 | tp->rcv_wup = 0; |
1781 | tp->copied_seq = 0; | 1781 | tp->copied_seq = 0; |
1782 | 1782 | ||
1783 | tp->rto = TCP_TIMEOUT_INIT; | 1783 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
1784 | tp->retransmits = 0; | 1784 | inet_csk(sk)->icsk_retransmits = 0; |
1785 | tcp_clear_retrans(tp); | 1785 | tcp_clear_retrans(tp); |
1786 | } | 1786 | } |
1787 | 1787 | ||
@@ -1795,7 +1795,7 @@ int tcp_connect(struct sock *sk) | |||
1795 | 1795 | ||
1796 | tcp_connect_init(sk); | 1796 | tcp_connect_init(sk); |
1797 | 1797 | ||
1798 | buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); | 1798 | buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); |
1799 | if (unlikely(buff == NULL)) | 1799 | if (unlikely(buff == NULL)) |
1800 | return -ENOBUFS; | 1800 | return -ENOBUFS; |
1801 | 1801 | ||
@@ -1824,7 +1824,8 @@ int tcp_connect(struct sock *sk) | |||
1824 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 1824 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); |
1825 | 1825 | ||
1826 | /* Timer for repeating the SYN until an answer. */ | 1826 | /* Timer for repeating the SYN until an answer. */ |
1827 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1827 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1828 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
1828 | return 0; | 1829 | return 0; |
1829 | } | 1830 | } |
1830 | 1831 | ||
@@ -1834,20 +1835,21 @@ int tcp_connect(struct sock *sk) | |||
1834 | */ | 1835 | */ |
1835 | void tcp_send_delayed_ack(struct sock *sk) | 1836 | void tcp_send_delayed_ack(struct sock *sk) |
1836 | { | 1837 | { |
1837 | struct tcp_sock *tp = tcp_sk(sk); | 1838 | struct inet_connection_sock *icsk = inet_csk(sk); |
1838 | int ato = tp->ack.ato; | 1839 | int ato = icsk->icsk_ack.ato; |
1839 | unsigned long timeout; | 1840 | unsigned long timeout; |
1840 | 1841 | ||
1841 | if (ato > TCP_DELACK_MIN) { | 1842 | if (ato > TCP_DELACK_MIN) { |
1843 | const struct tcp_sock *tp = tcp_sk(sk); | ||
1842 | int max_ato = HZ/2; | 1844 | int max_ato = HZ/2; |
1843 | 1845 | ||
1844 | if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) | 1846 | if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) |
1845 | max_ato = TCP_DELACK_MAX; | 1847 | max_ato = TCP_DELACK_MAX; |
1846 | 1848 | ||
1847 | /* Slow path, intersegment interval is "high". */ | 1849 | /* Slow path, intersegment interval is "high". */ |
1848 | 1850 | ||
1849 | /* If some rtt estimate is known, use it to bound delayed ack. | 1851 | /* If some rtt estimate is known, use it to bound delayed ack. |
1850 | * Do not use tp->rto here, use results of rtt measurements | 1852 | * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements |
1851 | * directly. | 1853 | * directly. |
1852 | */ | 1854 | */ |
1853 | if (tp->srtt) { | 1855 | if (tp->srtt) { |
@@ -1864,21 +1866,22 @@ void tcp_send_delayed_ack(struct sock *sk) | |||
1864 | timeout = jiffies + ato; | 1866 | timeout = jiffies + ato; |
1865 | 1867 | ||
1866 | /* Use new timeout only if there wasn't a older one earlier. */ | 1868 | /* Use new timeout only if there wasn't a older one earlier. */ |
1867 | if (tp->ack.pending&TCP_ACK_TIMER) { | 1869 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { |
1868 | /* If delack timer was blocked or is about to expire, | 1870 | /* If delack timer was blocked or is about to expire, |
1869 | * send ACK now. | 1871 | * send ACK now. |
1870 | */ | 1872 | */ |
1871 | if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { | 1873 | if (icsk->icsk_ack.blocked || |
1874 | time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { | ||
1872 | tcp_send_ack(sk); | 1875 | tcp_send_ack(sk); |
1873 | return; | 1876 | return; |
1874 | } | 1877 | } |
1875 | 1878 | ||
1876 | if (!time_before(timeout, tp->ack.timeout)) | 1879 | if (!time_before(timeout, icsk->icsk_ack.timeout)) |
1877 | timeout = tp->ack.timeout; | 1880 | timeout = icsk->icsk_ack.timeout; |
1878 | } | 1881 | } |
1879 | tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; | 1882 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; |
1880 | tp->ack.timeout = timeout; | 1883 | icsk->icsk_ack.timeout = timeout; |
1881 | sk_reset_timer(sk, &tp->delack_timer, timeout); | 1884 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); |
1882 | } | 1885 | } |
1883 | 1886 | ||
1884 | /* This routine sends an ack and also updates the window. */ | 1887 | /* This routine sends an ack and also updates the window. */ |
@@ -1895,9 +1898,10 @@ void tcp_send_ack(struct sock *sk) | |||
1895 | */ | 1898 | */ |
1896 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 1899 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); |
1897 | if (buff == NULL) { | 1900 | if (buff == NULL) { |
1898 | tcp_schedule_ack(tp); | 1901 | inet_csk_schedule_ack(sk); |
1899 | tp->ack.ato = TCP_ATO_MIN; | 1902 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
1900 | tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); | 1903 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
1904 | TCP_DELACK_MAX, TCP_RTO_MAX); | ||
1901 | return; | 1905 | return; |
1902 | } | 1906 | } |
1903 | 1907 | ||
@@ -1980,12 +1984,6 @@ int tcp_write_wakeup(struct sock *sk) | |||
1980 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 1984 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
1981 | if (tcp_fragment(sk, skb, seg_size, mss)) | 1985 | if (tcp_fragment(sk, skb, seg_size, mss)) |
1982 | return -1; | 1986 | return -1; |
1983 | /* SWS override triggered forced fragmentation. | ||
1984 | * Disable TSO, the connection is too sick. */ | ||
1985 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1986 | sock_set_flag(sk, SOCK_NO_LARGESEND); | ||
1987 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1988 | } | ||
1989 | } else if (!tcp_skb_pcount(skb)) | 1987 | } else if (!tcp_skb_pcount(skb)) |
1990 | tcp_set_skb_tso_segs(sk, skb, mss); | 1988 | tcp_set_skb_tso_segs(sk, skb, mss); |
1991 | 1989 | ||
@@ -2011,6 +2009,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2011 | */ | 2009 | */ |
2012 | void tcp_send_probe0(struct sock *sk) | 2010 | void tcp_send_probe0(struct sock *sk) |
2013 | { | 2011 | { |
2012 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2014 | struct tcp_sock *tp = tcp_sk(sk); | 2013 | struct tcp_sock *tp = tcp_sk(sk); |
2015 | int err; | 2014 | int err; |
2016 | 2015 | ||
@@ -2018,28 +2017,31 @@ void tcp_send_probe0(struct sock *sk) | |||
2018 | 2017 | ||
2019 | if (tp->packets_out || !sk->sk_send_head) { | 2018 | if (tp->packets_out || !sk->sk_send_head) { |
2020 | /* Cancel probe timer, if it is not required. */ | 2019 | /* Cancel probe timer, if it is not required. */ |
2021 | tp->probes_out = 0; | 2020 | icsk->icsk_probes_out = 0; |
2022 | tp->backoff = 0; | 2021 | icsk->icsk_backoff = 0; |
2023 | return; | 2022 | return; |
2024 | } | 2023 | } |
2025 | 2024 | ||
2026 | if (err <= 0) { | 2025 | if (err <= 0) { |
2027 | if (tp->backoff < sysctl_tcp_retries2) | 2026 | if (icsk->icsk_backoff < sysctl_tcp_retries2) |
2028 | tp->backoff++; | 2027 | icsk->icsk_backoff++; |
2029 | tp->probes_out++; | 2028 | icsk->icsk_probes_out++; |
2030 | tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, | 2029 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2031 | min(tp->rto << tp->backoff, TCP_RTO_MAX)); | 2030 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), |
2031 | TCP_RTO_MAX); | ||
2032 | } else { | 2032 | } else { |
2033 | /* If packet was not sent due to local congestion, | 2033 | /* If packet was not sent due to local congestion, |
2034 | * do not backoff and do not remember probes_out. | 2034 | * do not backoff and do not remember icsk_probes_out. |
2035 | * Let local senders to fight for local resources. | 2035 | * Let local senders to fight for local resources. |
2036 | * | 2036 | * |
2037 | * Use accumulated backoff yet. | 2037 | * Use accumulated backoff yet. |
2038 | */ | 2038 | */ |
2039 | if (!tp->probes_out) | 2039 | if (!icsk->icsk_probes_out) |
2040 | tp->probes_out=1; | 2040 | icsk->icsk_probes_out = 1; |
2041 | tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, | 2041 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2042 | min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); | 2042 | min(icsk->icsk_rto << icsk->icsk_backoff, |
2043 | TCP_RESOURCE_PROBE_INTERVAL), | ||
2044 | TCP_RTO_MAX); | ||
2043 | } | 2045 | } |
2044 | } | 2046 | } |
2045 | 2047 | ||