aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c198
1 files changed, 101 insertions, 97 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a65..e70a6840cb64 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,14 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65static void update_send_head(struct sock *sk, struct tcp_sock *tp, 65static void update_send_head(struct sock *sk, struct sk_buff *skb)
66 struct sk_buff *skb)
67{ 66{
68 sk->sk_send_head = skb->next; 67 struct tcp_sock *tp = tcp_sk(sk);
69 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) 68
70 sk->sk_send_head = NULL; 69 tcp_advance_send_head(sk, skb);
71 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
72 tcp_packets_out_inc(sk, tp, skb); 71 tcp_packets_out_inc(sk, skb);
73} 72}
74 73
75/* SND.NXT, if window was not shrunk. 74/* SND.NXT, if window was not shrunk.
@@ -78,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
78 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already 77 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
79 * invalid. OK, let's make this for now: 78 * invalid. OK, let's make this for now:
80 */ 79 */
81static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp) 80static inline __u32 tcp_acceptable_seq(struct sock *sk)
82{ 81{
82 struct tcp_sock *tp = tcp_sk(sk);
83
83 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) 84 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
84 return tp->snd_nxt; 85 return tp->snd_nxt;
85 else 86 else
@@ -238,7 +239,7 @@ static u16 tcp_select_window(struct sock *sk)
238 u32 new_win = __tcp_select_window(sk); 239 u32 new_win = __tcp_select_window(sk);
239 240
240 /* Never shrink the offered window */ 241 /* Never shrink the offered window */
241 if(new_win < cur_win) { 242 if (new_win < cur_win) {
242 /* Danger Will Robinson! 243 /* Danger Will Robinson!
243 * Don't update rcv_wup/rcv_wnd here or else 244 * Don't update rcv_wup/rcv_wnd here or else
244 * we will not be able to advertise a zero 245 * we will not be able to advertise a zero
@@ -289,10 +290,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
289 (TCPOPT_SACK << 8) | 290 (TCPOPT_SACK << 8) |
290 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * 291 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
291 TCPOLEN_SACK_PERBLOCK))); 292 TCPOLEN_SACK_PERBLOCK)));
292 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { 293
294 for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
293 *ptr++ = htonl(sp[this_sack].start_seq); 295 *ptr++ = htonl(sp[this_sack].start_seq);
294 *ptr++ = htonl(sp[this_sack].end_seq); 296 *ptr++ = htonl(sp[this_sack].end_seq);
295 } 297 }
298
296 if (tp->rx_opt.dsack) { 299 if (tp->rx_opt.dsack) {
297 tp->rx_opt.dsack = 0; 300 tp->rx_opt.dsack = 0;
298 tp->rx_opt.eff_sacks--; 301 tp->rx_opt.eff_sacks--;
@@ -337,7 +340,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
337 */ 340 */
338 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); 341 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
339 if (ts) { 342 if (ts) {
340 if(sack) 343 if (sack)
341 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | 344 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
342 (TCPOLEN_SACK_PERM << 16) | 345 (TCPOLEN_SACK_PERM << 16) |
343 (TCPOPT_TIMESTAMP << 8) | 346 (TCPOPT_TIMESTAMP << 8) |
@@ -349,7 +352,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
349 TCPOLEN_TIMESTAMP); 352 TCPOLEN_TIMESTAMP);
350 *ptr++ = htonl(tstamp); /* TSVAL */ 353 *ptr++ = htonl(tstamp); /* TSVAL */
351 *ptr++ = htonl(ts_recent); /* TSECR */ 354 *ptr++ = htonl(ts_recent); /* TSECR */
352 } else if(sack) 355 } else if (sack)
353 *ptr++ = htonl((TCPOPT_NOP << 24) | 356 *ptr++ = htonl((TCPOPT_NOP << 24) |
354 (TCPOPT_NOP << 16) | 357 (TCPOPT_NOP << 16) |
355 (TCPOPT_SACK_PERM << 8) | 358 (TCPOPT_SACK_PERM << 8) |
@@ -406,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
406 /* If congestion control is doing timestamping, we must 409 /* If congestion control is doing timestamping, we must
407 * take such a timestamp before we potentially clone/copy. 410 * take such a timestamp before we potentially clone/copy.
408 */ 411 */
409 if (icsk->icsk_ca_ops->rtt_sample) 412 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
410 __net_timestamp(skb); 413 __net_timestamp(skb);
411 414
412 if (likely(clone_it)) { 415 if (likely(clone_it)) {
@@ -430,7 +433,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
430 sysctl_flags = 0; 433 sysctl_flags = 0;
431 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 434 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
432 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; 435 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
433 if(sysctl_tcp_timestamps) { 436 if (sysctl_tcp_timestamps) {
434 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; 437 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
435 sysctl_flags |= SYSCTL_FLAG_TSTAMPS; 438 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
436 } 439 }
@@ -465,11 +468,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
465 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; 468 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
466#endif 469#endif
467 470
468 th = (struct tcphdr *) skb_push(skb, tcp_header_size); 471 skb_push(skb, tcp_header_size);
469 skb->h.th = th; 472 skb_reset_transport_header(skb);
470 skb_set_owner_w(skb, sk); 473 skb_set_owner_w(skb, sk);
471 474
472 /* Build TCP header and checksum it. */ 475 /* Build TCP header and checksum it. */
476 th = tcp_hdr(skb);
473 th->source = inet->sport; 477 th->source = inet->sport;
474 th->dest = inet->dport; 478 th->dest = inet->dport;
475 th->seq = htonl(tcb->seq); 479 th->seq = htonl(tcb->seq);
@@ -515,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
515 md5 ? &md5_hash_location : 519 md5 ? &md5_hash_location :
516#endif 520#endif
517 NULL); 521 NULL);
518 TCP_ECN_send(sk, tp, skb, tcp_header_size); 522 TCP_ECN_send(sk, skb, tcp_header_size);
519 } 523 }
520 524
521#ifdef CONFIG_TCP_MD5SIG 525#ifdef CONFIG_TCP_MD5SIG
@@ -524,7 +528,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
524 tp->af_specific->calc_md5_hash(md5_hash_location, 528 tp->af_specific->calc_md5_hash(md5_hash_location,
525 md5, 529 md5,
526 sk, NULL, NULL, 530 sk, NULL, NULL,
527 skb->h.th, 531 tcp_hdr(skb),
528 sk->sk_protocol, 532 sk->sk_protocol,
529 skb->len); 533 skb->len);
530 } 534 }
@@ -545,7 +549,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
545 if (likely(err <= 0)) 549 if (likely(err <= 0))
546 return err; 550 return err;
547 551
548 tcp_enter_cwr(sk); 552 tcp_enter_cwr(sk, 1);
549 553
550 return net_xmit_eval(err); 554 return net_xmit_eval(err);
551 555
@@ -567,12 +571,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
567 /* Advance write_seq and place onto the write_queue. */ 571 /* Advance write_seq and place onto the write_queue. */
568 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 572 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
569 skb_header_release(skb); 573 skb_header_release(skb);
570 __skb_queue_tail(&sk->sk_write_queue, skb); 574 tcp_add_write_queue_tail(sk, skb);
571 sk_charge_skb(sk, skb); 575 sk_charge_skb(sk, skb);
572
573 /* Queue it, remembering where we must start sending. */
574 if (sk->sk_send_head == NULL)
575 sk->sk_send_head = skb;
576} 576}
577 577
578static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 578static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +705,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
705 705
706 /* Link BUFF into the send queue. */ 706 /* Link BUFF into the send queue. */
707 skb_header_release(buff); 707 skb_header_release(buff);
708 __skb_append(skb, buff, &sk->sk_write_queue); 708 tcp_insert_write_queue_after(skb, buff, sk);
709 709
710 return 0; 710 return 0;
711} 711}
@@ -736,7 +736,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
736 } 736 }
737 skb_shinfo(skb)->nr_frags = k; 737 skb_shinfo(skb)->nr_frags = k;
738 738
739 skb->tail = skb->data; 739 skb_reset_tail_pointer(skb);
740 skb->data_len -= len; 740 skb->data_len -= len;
741 skb->len = skb->data_len; 741 skb->len = skb->data_len;
742} 742}
@@ -930,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
930 930
931/* Congestion window validation. (RFC2861) */ 931/* Congestion window validation. (RFC2861) */
932 932
933static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) 933static void tcp_cwnd_validate(struct sock *sk)
934{ 934{
935 struct tcp_sock *tp = tcp_sk(sk);
935 __u32 packets_out = tp->packets_out; 936 __u32 packets_out = tp->packets_out;
936 937
937 if (packets_out >= tp->snd_cwnd) { 938 if (packets_out >= tp->snd_cwnd) {
@@ -1056,7 +1057,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
1056 return !after(end_seq, tp->snd_una + tp->snd_wnd); 1057 return !after(end_seq, tp->snd_una + tp->snd_wnd);
1057} 1058}
1058 1059
1059/* This checks if the data bearing packet SKB (usually sk->sk_send_head) 1060/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
1060 * should be put on the wire right now. If so, it returns the number of 1061 * should be put on the wire right now. If so, it returns the number of
1061 * packets allowed by the congestion window. 1062 * packets allowed by the congestion window.
1062 */ 1063 */
@@ -1079,15 +1080,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1079 return cwnd_quota; 1080 return cwnd_quota;
1080} 1081}
1081 1082
1082static inline int tcp_skb_is_last(const struct sock *sk, 1083int tcp_may_send_now(struct sock *sk)
1083 const struct sk_buff *skb)
1084{
1085 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
1086}
1087
1088int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
1089{ 1084{
1090 struct sk_buff *skb = sk->sk_send_head; 1085 struct tcp_sock *tp = tcp_sk(sk);
1086 struct sk_buff *skb = tcp_send_head(sk);
1091 1087
1092 return (skb && 1088 return (skb &&
1093 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), 1089 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1143,7 +1139,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1143 1139
1144 /* Link BUFF into the send queue. */ 1140 /* Link BUFF into the send queue. */
1145 skb_header_release(buff); 1141 skb_header_release(buff);
1146 __skb_append(skb, buff, &sk->sk_write_queue); 1142 tcp_insert_write_queue_after(skb, buff, sk);
1147 1143
1148 return 0; 1144 return 0;
1149} 1145}
@@ -1153,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1153 * 1149 *
1154 * This algorithm is from John Heffner. 1150 * This algorithm is from John Heffner.
1155 */ 1151 */
1156static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) 1152static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1157{ 1153{
1154 struct tcp_sock *tp = tcp_sk(sk);
1158 const struct inet_connection_sock *icsk = inet_csk(sk); 1155 const struct inet_connection_sock *icsk = inet_csk(sk);
1159 u32 send_win, cong_win, limit, in_flight; 1156 u32 send_win, cong_win, limit, in_flight;
1160 1157
@@ -1249,10 +1246,10 @@ static int tcp_mtu_probe(struct sock *sk)
1249 1246
1250 /* Have enough data in the send queue to probe? */ 1247 /* Have enough data in the send queue to probe? */
1251 len = 0; 1248 len = 0;
1252 if ((skb = sk->sk_send_head) == NULL) 1249 if ((skb = tcp_send_head(sk)) == NULL)
1253 return -1; 1250 return -1;
1254 while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb)) 1251 while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
1255 skb = skb->next; 1252 skb = tcp_write_queue_next(sk, skb);
1256 if (len < probe_size) 1253 if (len < probe_size)
1257 return -1; 1254 return -1;
1258 1255
@@ -1279,9 +1276,9 @@ static int tcp_mtu_probe(struct sock *sk)
1279 return -1; 1276 return -1;
1280 sk_charge_skb(sk, nskb); 1277 sk_charge_skb(sk, nskb);
1281 1278
1282 skb = sk->sk_send_head; 1279 skb = tcp_send_head(sk);
1283 __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue); 1280 tcp_insert_write_queue_before(nskb, skb, sk);
1284 sk->sk_send_head = nskb; 1281 tcp_advance_send_head(sk, skb);
1285 1282
1286 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1283 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1287 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1284 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1292,7 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk)
1292 1289
1293 len = 0; 1290 len = 0;
1294 while (len < probe_size) { 1291 while (len < probe_size) {
1295 next = skb->next; 1292 next = tcp_write_queue_next(sk, skb);
1296 1293
1297 copy = min_t(int, skb->len, probe_size - len); 1294 copy = min_t(int, skb->len, probe_size - len);
1298 if (nskb->ip_summed) 1295 if (nskb->ip_summed)
@@ -1305,7 +1302,7 @@ static int tcp_mtu_probe(struct sock *sk)
1305 /* We've eaten all the data from this skb. 1302 /* We've eaten all the data from this skb.
1306 * Throw it away. */ 1303 * Throw it away. */
1307 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1304 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1308 __skb_unlink(skb, &sk->sk_write_queue); 1305 tcp_unlink_write_queue(skb, sk);
1309 sk_stream_free_skb(sk, skb); 1306 sk_stream_free_skb(sk, skb);
1310 } else { 1307 } else {
1311 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1308 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1333,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk)
1333 /* Decrement cwnd here because we are sending 1330 /* Decrement cwnd here because we are sending
1334 * effectively two packets. */ 1331 * effectively two packets. */
1335 tp->snd_cwnd--; 1332 tp->snd_cwnd--;
1336 update_send_head(sk, tp, nskb); 1333 update_send_head(sk, nskb);
1337 1334
1338 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); 1335 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1339 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; 1336 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1377,7 +1374,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1377 sent_pkts = 1; 1374 sent_pkts = 1;
1378 } 1375 }
1379 1376
1380 while ((skb = sk->sk_send_head)) { 1377 while ((skb = tcp_send_head(sk))) {
1381 unsigned int limit; 1378 unsigned int limit;
1382 1379
1383 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1380 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1396,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1396 nonagle : TCP_NAGLE_PUSH)))) 1393 nonagle : TCP_NAGLE_PUSH))))
1397 break; 1394 break;
1398 } else { 1395 } else {
1399 if (tcp_tso_should_defer(sk, tp, skb)) 1396 if (tcp_tso_should_defer(sk, skb))
1400 break; 1397 break;
1401 } 1398 }
1402 1399
@@ -1425,31 +1422,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1425 /* Advance the send_head. This one is sent out. 1422 /* Advance the send_head. This one is sent out.
1426 * This call will increment packets_out. 1423 * This call will increment packets_out.
1427 */ 1424 */
1428 update_send_head(sk, tp, skb); 1425 update_send_head(sk, skb);
1429 1426
1430 tcp_minshall_update(tp, mss_now, skb); 1427 tcp_minshall_update(tp, mss_now, skb);
1431 sent_pkts++; 1428 sent_pkts++;
1432 } 1429 }
1433 1430
1434 if (likely(sent_pkts)) { 1431 if (likely(sent_pkts)) {
1435 tcp_cwnd_validate(sk, tp); 1432 tcp_cwnd_validate(sk);
1436 return 0; 1433 return 0;
1437 } 1434 }
1438 return !tp->packets_out && sk->sk_send_head; 1435 return !tp->packets_out && tcp_send_head(sk);
1439} 1436}
1440 1437
1441/* Push out any pending frames which were held back due to 1438/* Push out any pending frames which were held back due to
1442 * TCP_CORK or attempt at coalescing tiny packets. 1439 * TCP_CORK or attempt at coalescing tiny packets.
1443 * The socket must be locked by the caller. 1440 * The socket must be locked by the caller.
1444 */ 1441 */
1445void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, 1442void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1446 unsigned int cur_mss, int nonagle) 1443 int nonagle)
1447{ 1444{
1448 struct sk_buff *skb = sk->sk_send_head; 1445 struct sk_buff *skb = tcp_send_head(sk);
1449 1446
1450 if (skb) { 1447 if (skb) {
1451 if (tcp_write_xmit(sk, cur_mss, nonagle)) 1448 if (tcp_write_xmit(sk, cur_mss, nonagle))
1452 tcp_check_probe_timer(sk, tp); 1449 tcp_check_probe_timer(sk);
1453 } 1450 }
1454} 1451}
1455 1452
@@ -1459,7 +1456,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
1459void tcp_push_one(struct sock *sk, unsigned int mss_now) 1456void tcp_push_one(struct sock *sk, unsigned int mss_now)
1460{ 1457{
1461 struct tcp_sock *tp = tcp_sk(sk); 1458 struct tcp_sock *tp = tcp_sk(sk);
1462 struct sk_buff *skb = sk->sk_send_head; 1459 struct sk_buff *skb = tcp_send_head(sk);
1463 unsigned int tso_segs, cwnd_quota; 1460 unsigned int tso_segs, cwnd_quota;
1464 1461
1465 BUG_ON(!skb || skb->len < mss_now); 1462 BUG_ON(!skb || skb->len < mss_now);
@@ -1493,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1493 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1490 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1494 1491
1495 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { 1492 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1496 update_send_head(sk, tp, skb); 1493 update_send_head(sk, skb);
1497 tcp_cwnd_validate(sk, tp); 1494 tcp_cwnd_validate(sk);
1498 return; 1495 return;
1499 } 1496 }
1500 } 1497 }
@@ -1620,7 +1617,7 @@ u32 __tcp_select_window(struct sock *sk)
1620static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) 1617static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
1621{ 1618{
1622 struct tcp_sock *tp = tcp_sk(sk); 1619 struct tcp_sock *tp = tcp_sk(sk);
1623 struct sk_buff *next_skb = skb->next; 1620 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1624 1621
1625 /* The first test we must make is that neither of these two 1622 /* The first test we must make is that neither of these two
1626 * SKB's are still referenced by someone else. 1623 * SKB's are still referenced by someone else.
@@ -1630,7 +1627,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1630 u16 flags = TCP_SKB_CB(skb)->flags; 1627 u16 flags = TCP_SKB_CB(skb)->flags;
1631 1628
1632 /* Also punt if next skb has been SACK'd. */ 1629 /* Also punt if next skb has been SACK'd. */
1633 if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) 1630 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1634 return; 1631 return;
1635 1632
1636 /* Next skb is out of window. */ 1633 /* Next skb is out of window. */
@@ -1652,9 +1649,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1652 clear_all_retrans_hints(tp); 1649 clear_all_retrans_hints(tp);
1653 1650
1654 /* Ok. We will be able to collapse the packet. */ 1651 /* Ok. We will be able to collapse the packet. */
1655 __skb_unlink(next_skb, &sk->sk_write_queue); 1652 tcp_unlink_write_queue(next_skb, sk);
1656 1653
1657 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1654 skb_copy_from_linear_data(next_skb,
1655 skb_put(skb, next_skb_size),
1656 next_skb_size);
1658 1657
1659 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 1658 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1660 skb->ip_summed = CHECKSUM_PARTIAL; 1659 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1706,7 +1705,9 @@ void tcp_simple_retransmit(struct sock *sk)
1706 unsigned int mss = tcp_current_mss(sk, 0); 1705 unsigned int mss = tcp_current_mss(sk, 0);
1707 int lost = 0; 1706 int lost = 0;
1708 1707
1709 sk_stream_for_retrans_queue(skb, sk) { 1708 tcp_for_write_queue(skb, sk) {
1709 if (skb == tcp_send_head(sk))
1710 break;
1710 if (skb->len > mss && 1711 if (skb->len > mss &&
1711 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1712 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
1712 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1713 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1788,13 +1789,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1788 } 1789 }
1789 1790
1790 /* Collapse two adjacent packets if worthwhile and we can. */ 1791 /* Collapse two adjacent packets if worthwhile and we can. */
1791 if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && 1792 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
1792 (skb->len < (cur_mss >> 1)) && 1793 (skb->len < (cur_mss >> 1)) &&
1793 (skb->next != sk->sk_send_head) && 1794 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1794 (skb->next != (struct sk_buff *)&sk->sk_write_queue) && 1795 (!tcp_skb_is_last(sk, skb)) &&
1795 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && 1796 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1796 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) && 1797 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1797 (sysctl_tcp_retrans_collapse != 0)) 1798 (sysctl_tcp_retrans_collapse != 0))
1798 tcp_retrans_try_collapse(sk, skb, cur_mss); 1799 tcp_retrans_try_collapse(sk, skb, cur_mss);
1799 1800
1800 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 1801 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1804,9 +1805,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1804 * retransmit when old data is attached. So strip it off 1805 * retransmit when old data is attached. So strip it off
1805 * since it is cheap to do so and saves bytes on the network. 1806 * since it is cheap to do so and saves bytes on the network.
1806 */ 1807 */
1807 if(skb->len > 0 && 1808 if (skb->len > 0 &&
1808 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1809 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1809 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1810 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1810 if (!pskb_trim(skb, 0)) { 1811 if (!pskb_trim(skb, 0)) {
1811 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1812 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
1812 skb_shinfo(skb)->gso_segs = 1; 1813 skb_shinfo(skb)->gso_segs = 1;
@@ -1872,15 +1873,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1872 skb = tp->retransmit_skb_hint; 1873 skb = tp->retransmit_skb_hint;
1873 packet_cnt = tp->retransmit_cnt_hint; 1874 packet_cnt = tp->retransmit_cnt_hint;
1874 }else{ 1875 }else{
1875 skb = sk->sk_write_queue.next; 1876 skb = tcp_write_queue_head(sk);
1876 packet_cnt = 0; 1877 packet_cnt = 0;
1877 } 1878 }
1878 1879
1879 /* First pass: retransmit lost packets. */ 1880 /* First pass: retransmit lost packets. */
1880 if (tp->lost_out) { 1881 if (tp->lost_out) {
1881 sk_stream_for_retrans_queue_from(skb, sk) { 1882 tcp_for_write_queue_from(skb, sk) {
1882 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1883 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1883 1884
1885 if (skb == tcp_send_head(sk))
1886 break;
1884 /* we could do better than to assign each time */ 1887 /* we could do better than to assign each time */
1885 tp->retransmit_skb_hint = skb; 1888 tp->retransmit_skb_hint = skb;
1886 tp->retransmit_cnt_hint = packet_cnt; 1889 tp->retransmit_cnt_hint = packet_cnt;
@@ -1906,8 +1909,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1906 else 1909 else
1907 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); 1910 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
1908 1911
1909 if (skb == 1912 if (skb == tcp_write_queue_head(sk))
1910 skb_peek(&sk->sk_write_queue))
1911 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1913 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1912 inet_csk(sk)->icsk_rto, 1914 inet_csk(sk)->icsk_rto,
1913 TCP_RTO_MAX); 1915 TCP_RTO_MAX);
@@ -1937,18 +1939,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1937 * segments to send. 1939 * segments to send.
1938 */ 1940 */
1939 1941
1940 if (tcp_may_send_now(sk, tp)) 1942 if (tcp_may_send_now(sk))
1941 return; 1943 return;
1942 1944
1943 if (tp->forward_skb_hint) { 1945 if (tp->forward_skb_hint) {
1944 skb = tp->forward_skb_hint; 1946 skb = tp->forward_skb_hint;
1945 packet_cnt = tp->forward_cnt_hint; 1947 packet_cnt = tp->forward_cnt_hint;
1946 } else{ 1948 } else{
1947 skb = sk->sk_write_queue.next; 1949 skb = tcp_write_queue_head(sk);
1948 packet_cnt = 0; 1950 packet_cnt = 0;
1949 } 1951 }
1950 1952
1951 sk_stream_for_retrans_queue_from(skb, sk) { 1953 tcp_for_write_queue_from(skb, sk) {
1954 if (skb == tcp_send_head(sk))
1955 break;
1952 tp->forward_cnt_hint = packet_cnt; 1956 tp->forward_cnt_hint = packet_cnt;
1953 tp->forward_skb_hint = skb; 1957 tp->forward_skb_hint = skb;
1954 1958
@@ -1973,7 +1977,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1973 break; 1977 break;
1974 } 1978 }
1975 1979
1976 if (skb == skb_peek(&sk->sk_write_queue)) 1980 if (skb == tcp_write_queue_head(sk))
1977 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1981 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1978 inet_csk(sk)->icsk_rto, 1982 inet_csk(sk)->icsk_rto,
1979 TCP_RTO_MAX); 1983 TCP_RTO_MAX);
@@ -1989,7 +1993,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1989void tcp_send_fin(struct sock *sk) 1993void tcp_send_fin(struct sock *sk)
1990{ 1994{
1991 struct tcp_sock *tp = tcp_sk(sk); 1995 struct tcp_sock *tp = tcp_sk(sk);
1992 struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); 1996 struct sk_buff *skb = tcp_write_queue_tail(sk);
1993 int mss_now; 1997 int mss_now;
1994 1998
1995 /* Optimization, tack on the FIN if we have a queue of 1999 /* Optimization, tack on the FIN if we have a queue of
@@ -1998,7 +2002,7 @@ void tcp_send_fin(struct sock *sk)
1998 */ 2002 */
1999 mss_now = tcp_current_mss(sk, 1); 2003 mss_now = tcp_current_mss(sk, 1);
2000 2004
2001 if (sk->sk_send_head != NULL) { 2005 if (tcp_send_head(sk) != NULL) {
2002 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2006 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
2003 TCP_SKB_CB(skb)->end_seq++; 2007 TCP_SKB_CB(skb)->end_seq++;
2004 tp->write_seq++; 2008 tp->write_seq++;
@@ -2025,7 +2029,7 @@ void tcp_send_fin(struct sock *sk)
2025 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2029 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
2026 tcp_queue_skb(sk, skb); 2030 tcp_queue_skb(sk, skb);
2027 } 2031 }
2028 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF); 2032 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2029} 2033}
2030 2034
2031/* We get here when a process closes a file descriptor (either due to 2035/* We get here when a process closes a file descriptor (either due to
@@ -2035,7 +2039,6 @@ void tcp_send_fin(struct sock *sk)
2035 */ 2039 */
2036void tcp_send_active_reset(struct sock *sk, gfp_t priority) 2040void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2037{ 2041{
2038 struct tcp_sock *tp = tcp_sk(sk);
2039 struct sk_buff *skb; 2042 struct sk_buff *skb;
2040 2043
2041 /* NOTE: No TCP options attached and we never retransmit this. */ 2044 /* NOTE: No TCP options attached and we never retransmit this. */
@@ -2055,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2055 skb_shinfo(skb)->gso_type = 0; 2058 skb_shinfo(skb)->gso_type = 0;
2056 2059
2057 /* Send it off. */ 2060 /* Send it off. */
2058 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); 2061 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
2059 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; 2062 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2060 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2063 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2061 if (tcp_transmit_skb(sk, skb, 0, priority)) 2064 if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2071,7 +2074,7 @@ int tcp_send_synack(struct sock *sk)
2071{ 2074{
2072 struct sk_buff* skb; 2075 struct sk_buff* skb;
2073 2076
2074 skb = skb_peek(&sk->sk_write_queue); 2077 skb = tcp_write_queue_head(sk);
2075 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { 2078 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
2076 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2079 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2077 return -EFAULT; 2080 return -EFAULT;
@@ -2081,9 +2084,9 @@ int tcp_send_synack(struct sock *sk)
2081 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2084 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2082 if (nskb == NULL) 2085 if (nskb == NULL)
2083 return -ENOMEM; 2086 return -ENOMEM;
2084 __skb_unlink(skb, &sk->sk_write_queue); 2087 tcp_unlink_write_queue(skb, sk);
2085 skb_header_release(nskb); 2088 skb_header_release(nskb);
2086 __skb_queue_head(&sk->sk_write_queue, nskb); 2089 __tcp_add_write_queue_head(sk, nskb);
2087 sk_stream_free_skb(sk, skb); 2090 sk_stream_free_skb(sk, skb);
2088 sk_charge_skb(sk, nskb); 2091 sk_charge_skb(sk, nskb);
2089 skb = nskb; 2092 skb = nskb;
@@ -2133,8 +2136,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2133 if (md5) 2136 if (md5)
2134 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; 2137 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
2135#endif 2138#endif
2136 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 2139 skb_push(skb, tcp_header_size);
2140 skb_reset_transport_header(skb);
2137 2141
2142 th = tcp_hdr(skb);
2138 memset(th, 0, sizeof(struct tcphdr)); 2143 memset(th, 0, sizeof(struct tcphdr));
2139 th->syn = 1; 2144 th->syn = 1;
2140 th->ack = 1; 2145 th->ack = 1;
@@ -2188,7 +2193,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2188 tp->af_specific->calc_md5_hash(md5_hash_location, 2193 tp->af_specific->calc_md5_hash(md5_hash_location,
2189 md5, 2194 md5,
2190 NULL, dst, req, 2195 NULL, dst, req,
2191 skb->h.th, sk->sk_protocol, 2196 tcp_hdr(skb), sk->sk_protocol,
2192 skb->len); 2197 skb->len);
2193 } 2198 }
2194#endif 2199#endif
@@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk)
2271 skb_reserve(buff, MAX_TCP_HEADER); 2276 skb_reserve(buff, MAX_TCP_HEADER);
2272 2277
2273 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; 2278 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2274 TCP_ECN_send_syn(sk, tp, buff); 2279 TCP_ECN_send_syn(sk, buff);
2275 TCP_SKB_CB(buff)->sacked = 0; 2280 TCP_SKB_CB(buff)->sacked = 0;
2276 skb_shinfo(buff)->gso_segs = 1; 2281 skb_shinfo(buff)->gso_segs = 1;
2277 skb_shinfo(buff)->gso_size = 0; 2282 skb_shinfo(buff)->gso_size = 0;
@@ -2285,7 +2290,7 @@ int tcp_connect(struct sock *sk)
2285 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2290 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2286 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2291 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2287 skb_header_release(buff); 2292 skb_header_release(buff);
2288 __skb_queue_tail(&sk->sk_write_queue, buff); 2293 __tcp_add_write_queue_tail(sk, buff);
2289 sk_charge_skb(sk, buff); 2294 sk_charge_skb(sk, buff);
2290 tp->packets_out += tcp_skb_pcount(buff); 2295 tp->packets_out += tcp_skb_pcount(buff);
2291 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2296 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk)
2363{ 2368{
2364 /* If we have been reset, we may not send again. */ 2369 /* If we have been reset, we may not send again. */
2365 if (sk->sk_state != TCP_CLOSE) { 2370 if (sk->sk_state != TCP_CLOSE) {
2366 struct tcp_sock *tp = tcp_sk(sk);
2367 struct sk_buff *buff; 2371 struct sk_buff *buff;
2368 2372
2369 /* We are not putting this on the write queue, so 2373 /* We are not putting this on the write queue, so
@@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk)
2389 skb_shinfo(buff)->gso_type = 0; 2393 skb_shinfo(buff)->gso_type = 0;
2390 2394
2391 /* Send it off, this clears delayed acks for us. */ 2395 /* Send it off, this clears delayed acks for us. */
2392 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); 2396 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
2393 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2397 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2394 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); 2398 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2395 } 2399 }
@@ -2441,7 +2445,7 @@ int tcp_write_wakeup(struct sock *sk)
2441 struct tcp_sock *tp = tcp_sk(sk); 2445 struct tcp_sock *tp = tcp_sk(sk);
2442 struct sk_buff *skb; 2446 struct sk_buff *skb;
2443 2447
2444 if ((skb = sk->sk_send_head) != NULL && 2448 if ((skb = tcp_send_head(sk)) != NULL &&
2445 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { 2449 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
2446 int err; 2450 int err;
2447 unsigned int mss = tcp_current_mss(sk, 0); 2451 unsigned int mss = tcp_current_mss(sk, 0);
@@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk)
2467 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2471 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2468 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2472 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2469 if (!err) { 2473 if (!err) {
2470 update_send_head(sk, tp, skb); 2474 update_send_head(sk, skb);
2471 } 2475 }
2472 return err; 2476 return err;
2473 } else { 2477 } else {
@@ -2491,7 +2495,7 @@ void tcp_send_probe0(struct sock *sk)
2491 2495
2492 err = tcp_write_wakeup(sk); 2496 err = tcp_write_wakeup(sk);
2493 2497
2494 if (tp->packets_out || !sk->sk_send_head) { 2498 if (tp->packets_out || !tcp_send_head(sk)) {
2495 /* Cancel probe timer, if it is not required. */ 2499 /* Cancel probe timer, if it is not required. */
2496 icsk->icsk_probes_out = 0; 2500 icsk->icsk_probes_out = 0;
2497 icsk->icsk_backoff = 0; 2501 icsk->icsk_backoff = 0;