aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c61
1 files changed, 47 insertions, 14 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456a79f4..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len > skb->len); 438 BUG_ON(len > skb->len);
439
440 clear_all_retrans_hints(tp);
439 nsize = skb_headlen(skb) - len; 441 nsize = skb_headlen(skb) - len;
440 if (nsize < 0) 442 if (nsize < 0)
441 nsize = 0; 443 nsize = 0;
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
599 for TCP options, but includes only bare TCP header. 601 for TCP options, but includes only bare TCP header.
600 602
601 tp->rx_opt.mss_clamp is mss negotiated at connection setup. 603 tp->rx_opt.mss_clamp is mss negotiated at connection setup.
602 It is minumum of user_mss and mss received with SYN. 604 It is minimum of user_mss and mss received with SYN.
603 It also does not include TCP options. 605 It also does not include TCP options.
604 606
605 tp->pmtu_cookie is last pmtu, seen by this function. 607 tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
1171{ 1173{
1172 struct inet_connection_sock *icsk = inet_csk(sk); 1174 struct inet_connection_sock *icsk = inet_csk(sk);
1173 struct tcp_sock *tp = tcp_sk(sk); 1175 struct tcp_sock *tp = tcp_sk(sk);
1174 /* MSS for the peer's data. Previous verions used mss_clamp 1176 /* MSS for the peer's data. Previous versions used mss_clamp
1175 * here. I don't know if the value based on our guesses 1177 * here. I don't know if the value based on our guesses
1176 * of peer's MSS is better for the performance. It's more correct 1178 * of peer's MSS is better for the performance. It's more correct
1177 * but may be worse for the performance because of rcv_mss 1179 * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1260 BUG_ON(tcp_skb_pcount(skb) != 1 || 1262 BUG_ON(tcp_skb_pcount(skb) != 1 ||
1261 tcp_skb_pcount(next_skb) != 1); 1263 tcp_skb_pcount(next_skb) != 1);
1262 1264
1263 /* Ok. We will be able to collapse the packet. */ 1265 /* changing transmit queue under us so clear hints */
1266 clear_all_retrans_hints(tp);
1267
1268 /* Ok. We will be able to collapse the packet. */
1264 __skb_unlink(next_skb, &sk->sk_write_queue); 1269 __skb_unlink(next_skb, &sk->sk_write_queue);
1265 1270
1266 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1271 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
1330 } 1335 }
1331 } 1336 }
1332 1337
1338 clear_all_retrans_hints(tp);
1339
1333 if (!lost) 1340 if (!lost)
1334 return; 1341 return;
1335 1342
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1361 int err; 1368 int err;
1362 1369
1363 /* Do not sent more than we queued. 1/4 is reserved for possible 1370 /* Do not sent more than we queued. 1/4 is reserved for possible
1364 * copying overhead: frgagmentation, tunneling, mangling etc. 1371 * copying overhead: fragmentation, tunneling, mangling etc.
1365 */ 1372 */
1366 if (atomic_read(&sk->sk_wmem_alloc) > 1373 if (atomic_read(&sk->sk_wmem_alloc) >
1367 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) 1374 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1468 const struct inet_connection_sock *icsk = inet_csk(sk); 1475 const struct inet_connection_sock *icsk = inet_csk(sk);
1469 struct tcp_sock *tp = tcp_sk(sk); 1476 struct tcp_sock *tp = tcp_sk(sk);
1470 struct sk_buff *skb; 1477 struct sk_buff *skb;
1471 int packet_cnt = tp->lost_out; 1478 int packet_cnt;
1479
1480 if (tp->retransmit_skb_hint) {
1481 skb = tp->retransmit_skb_hint;
1482 packet_cnt = tp->retransmit_cnt_hint;
1483 }else{
1484 skb = sk->sk_write_queue.next;
1485 packet_cnt = 0;
1486 }
1472 1487
1473 /* First pass: retransmit lost packets. */ 1488 /* First pass: retransmit lost packets. */
1474 if (packet_cnt) { 1489 if (tp->lost_out) {
1475 sk_stream_for_retrans_queue(skb, sk) { 1490 sk_stream_for_retrans_queue_from(skb, sk) {
1476 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1491 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1477 1492
1493 /* we could do better than to assign each time */
1494 tp->retransmit_skb_hint = skb;
1495 tp->retransmit_cnt_hint = packet_cnt;
1496
1478 /* Assume this retransmit will generate 1497 /* Assume this retransmit will generate
1479 * only one packet for congestion window 1498 * only one packet for congestion window
1480 * calculation purposes. This works because 1499 * calculation purposes. This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1485 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 1504 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1486 return; 1505 return;
1487 1506
1488 if (sacked&TCPCB_LOST) { 1507 if (sacked & TCPCB_LOST) {
1489 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1508 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1490 if (tcp_retransmit_skb(sk, skb)) 1509 if (tcp_retransmit_skb(sk, skb)) {
1510 tp->retransmit_skb_hint = NULL;
1491 return; 1511 return;
1512 }
1492 if (icsk->icsk_ca_state != TCP_CA_Loss) 1513 if (icsk->icsk_ca_state != TCP_CA_Loss)
1493 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 1514 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
1494 else 1515 else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1501 TCP_RTO_MAX); 1522 TCP_RTO_MAX);
1502 } 1523 }
1503 1524
1504 packet_cnt -= tcp_skb_pcount(skb); 1525 packet_cnt += tcp_skb_pcount(skb);
1505 if (packet_cnt <= 0) 1526 if (packet_cnt >= tp->lost_out)
1506 break; 1527 break;
1507 } 1528 }
1508 } 1529 }
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1528 if (tcp_may_send_now(sk, tp)) 1549 if (tcp_may_send_now(sk, tp))
1529 return; 1550 return;
1530 1551
1531 packet_cnt = 0; 1552 if (tp->forward_skb_hint) {
1553 skb = tp->forward_skb_hint;
1554 packet_cnt = tp->forward_cnt_hint;
1555 } else{
1556 skb = sk->sk_write_queue.next;
1557 packet_cnt = 0;
1558 }
1559
1560 sk_stream_for_retrans_queue_from(skb, sk) {
1561 tp->forward_cnt_hint = packet_cnt;
1562 tp->forward_skb_hint = skb;
1532 1563
1533 sk_stream_for_retrans_queue(skb, sk) {
1534 /* Similar to the retransmit loop above we 1564 /* Similar to the retransmit loop above we
1535 * can pretend that the retransmitted SKB 1565 * can pretend that the retransmitted SKB
1536 * we send out here will be composed of one 1566 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1547 continue; 1577 continue;
1548 1578
1549 /* Ok, retransmit it. */ 1579 /* Ok, retransmit it. */
1550 if (tcp_retransmit_skb(sk, skb)) 1580 if (tcp_retransmit_skb(sk, skb)) {
1581 tp->forward_skb_hint = NULL;
1551 break; 1582 break;
1583 }
1552 1584
1553 if (skb == skb_peek(&sk->sk_write_queue)) 1585 if (skb == skb_peek(&sk->sk_write_queue))
1554 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1586 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
2058EXPORT_SYMBOL(tcp_make_synack); 2090EXPORT_SYMBOL(tcp_make_synack);
2059EXPORT_SYMBOL(tcp_simple_retransmit); 2091EXPORT_SYMBOL(tcp_simple_retransmit);
2060EXPORT_SYMBOL(tcp_sync_mss); 2092EXPORT_SYMBOL(tcp_sync_mss);
2093EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);