aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c144
-rw-r--r--net/ipv4/tcp_output.c54
2 files changed, 172 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 34cfa58eab76..40a26b7157b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -897,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
897 int prior_fackets; 897 int prior_fackets;
898 u32 lost_retrans = 0; 898 u32 lost_retrans = 0;
899 int flag = 0; 899 int flag = 0;
900 int dup_sack = 0;
900 int i; 901 int i;
901 902
902 if (!tp->sacked_out) 903 if (!tp->sacked_out)
903 tp->fackets_out = 0; 904 tp->fackets_out = 0;
904 prior_fackets = tp->fackets_out; 905 prior_fackets = tp->fackets_out;
905 906
906 for (i=0; i<num_sacks; i++, sp++) { 907 /* SACK fastpath:
907 struct sk_buff *skb; 908 * if the only SACK change is the increase of the end_seq of
908 __u32 start_seq = ntohl(sp->start_seq); 909 * the first block then only apply that SACK block
909 __u32 end_seq = ntohl(sp->end_seq); 910 * and use retrans queue hinting otherwise slowpath */
910 int fack_count = 0; 911 flag = 1;
911 int dup_sack = 0; 912 for (i = 0; i< num_sacks; i++) {
913 __u32 start_seq = ntohl(sp[i].start_seq);
914 __u32 end_seq = ntohl(sp[i].end_seq);
915
916 if (i == 0){
917 if (tp->recv_sack_cache[i].start_seq != start_seq)
918 flag = 0;
919 } else {
920 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
921 (tp->recv_sack_cache[i].end_seq != end_seq))
922 flag = 0;
923 }
924 tp->recv_sack_cache[i].start_seq = start_seq;
925 tp->recv_sack_cache[i].end_seq = end_seq;
912 926
913 /* Check for D-SACK. */ 927 /* Check for D-SACK. */
914 if (i == 0) { 928 if (i == 0) {
@@ -940,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
940 if (before(ack, prior_snd_una - tp->max_window)) 954 if (before(ack, prior_snd_una - tp->max_window))
941 return 0; 955 return 0;
942 } 956 }
957 }
958
959 if (flag)
960 num_sacks = 1;
961 else {
962 int j;
963 tp->fastpath_skb_hint = NULL;
964
965 /* order SACK blocks to allow in order walk of the retrans queue */
966 for (i = num_sacks-1; i > 0; i--) {
967 for (j = 0; j < i; j++){
968 if (after(ntohl(sp[j].start_seq),
969 ntohl(sp[j+1].start_seq))){
970 sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
971 sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
972 sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
973 sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
974 }
975
976 }
977 }
978 }
979
980 /* clear flag as used for different purpose in following code */
981 flag = 0;
982
983 for (i=0; i<num_sacks; i++, sp++) {
984 struct sk_buff *skb;
985 __u32 start_seq = ntohl(sp->start_seq);
986 __u32 end_seq = ntohl(sp->end_seq);
987 int fack_count;
988
989 /* Use SACK fastpath hint if valid */
990 if (tp->fastpath_skb_hint) {
991 skb = tp->fastpath_skb_hint;
992 fack_count = tp->fastpath_cnt_hint;
993 } else {
994 skb = sk->sk_write_queue.next;
995 fack_count = 0;
996 }
943 997
944 /* Event "B" in the comment above. */ 998 /* Event "B" in the comment above. */
945 if (after(end_seq, tp->high_seq)) 999 if (after(end_seq, tp->high_seq))
946 flag |= FLAG_DATA_LOST; 1000 flag |= FLAG_DATA_LOST;
947 1001
948 sk_stream_for_retrans_queue(skb, sk) { 1002 sk_stream_for_retrans_queue_from(skb, sk) {
949 int in_sack, pcount; 1003 int in_sack, pcount;
950 u8 sacked; 1004 u8 sacked;
951 1005
1006 tp->fastpath_skb_hint = skb;
1007 tp->fastpath_cnt_hint = fack_count;
1008
952 /* The retransmission queue is always in order, so 1009 /* The retransmission queue is always in order, so
953 * we can short-circuit the walk early. 1010 * we can short-circuit the walk early.
954 */ 1011 */
@@ -1023,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1023 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1080 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1024 tp->lost_out -= tcp_skb_pcount(skb); 1081 tp->lost_out -= tcp_skb_pcount(skb);
1025 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083
1084 /* clear lost hint */
1085 tp->retransmit_skb_hint = NULL;
1026 } 1086 }
1027 } else { 1087 } else {
1028 /* New sack for not retransmitted frame, 1088 /* New sack for not retransmitted frame,
@@ -1035,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1035 if (sacked & TCPCB_LOST) { 1095 if (sacked & TCPCB_LOST) {
1036 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1096 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1037 tp->lost_out -= tcp_skb_pcount(skb); 1097 tp->lost_out -= tcp_skb_pcount(skb);
1098
1099 /* clear lost hint */
1100 tp->retransmit_skb_hint = NULL;
1038 } 1101 }
1039 } 1102 }
1040 1103
@@ -1058,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1058 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { 1121 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1059 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1122 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1060 tp->retrans_out -= tcp_skb_pcount(skb); 1123 tp->retrans_out -= tcp_skb_pcount(skb);
1124 tp->retransmit_skb_hint = NULL;
1061 } 1125 }
1062 } 1126 }
1063 } 1127 }
@@ -1085,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1085 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1149 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1086 tp->retrans_out -= tcp_skb_pcount(skb); 1150 tp->retrans_out -= tcp_skb_pcount(skb);
1087 1151
1152 /* clear lost hint */
1153 tp->retransmit_skb_hint = NULL;
1154
1088 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1089 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1090 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1192,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
1192 tcp_set_ca_state(sk, TCP_CA_Loss); 1259 tcp_set_ca_state(sk, TCP_CA_Loss);
1193 tp->high_seq = tp->frto_highmark; 1260 tp->high_seq = tp->frto_highmark;
1194 TCP_ECN_queue_cwr(tp); 1261 TCP_ECN_queue_cwr(tp);
1262
1263 clear_all_retrans_hints(tp);
1195} 1264}
1196 1265
1197void tcp_clear_retrans(struct tcp_sock *tp) 1266void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1258,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
1258 tcp_set_ca_state(sk, TCP_CA_Loss); 1327 tcp_set_ca_state(sk, TCP_CA_Loss);
1259 tp->high_seq = tp->snd_nxt; 1328 tp->high_seq = tp->snd_nxt;
1260 TCP_ECN_queue_cwr(tp); 1329 TCP_ECN_queue_cwr(tp);
1330
1331 clear_all_retrans_hints(tp);
1261} 1332}
1262 1333
1263static int tcp_check_sack_reneging(struct sock *sk) 1334static int tcp_check_sack_reneging(struct sock *sk)
@@ -1482,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1482 int packets, u32 high_seq) 1553 int packets, u32 high_seq)
1483{ 1554{
1484 struct sk_buff *skb; 1555 struct sk_buff *skb;
1485 int cnt = packets; 1556 int cnt;
1486 1557
1487 BUG_TRAP(cnt <= tp->packets_out); 1558 BUG_TRAP(packets <= tp->packets_out);
1559 if (tp->lost_skb_hint) {
1560 skb = tp->lost_skb_hint;
1561 cnt = tp->lost_cnt_hint;
1562 } else {
1563 skb = sk->sk_write_queue.next;
1564 cnt = 0;
1565 }
1488 1566
1489 sk_stream_for_retrans_queue(skb, sk) { 1567 sk_stream_for_retrans_queue_from(skb, sk) {
1490 cnt -= tcp_skb_pcount(skb); 1568 /* TODO: do this better */
1491 if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) 1569 /* this is not the most efficient way to do this... */
1570 tp->lost_skb_hint = skb;
1571 tp->lost_cnt_hint = cnt;
1572 cnt += tcp_skb_pcount(skb);
1573 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
1492 break; 1574 break;
1493 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1575 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1494 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1576 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1495 tp->lost_out += tcp_skb_pcount(skb); 1577 tp->lost_out += tcp_skb_pcount(skb);
1578
1579 /* clear xmit_retransmit_queue hints
1580 * if this is beyond hint */
1581 if(tp->retransmit_skb_hint != NULL &&
1582 before(TCP_SKB_CB(skb)->seq,
1583 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
1584
1585 tp->retransmit_skb_hint = NULL;
1586 }
1496 } 1587 }
1497 } 1588 }
1498 tcp_sync_left_out(tp); 1589 tcp_sync_left_out(tp);
@@ -1519,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
1519 if (tcp_head_timedout(sk, tp)) { 1610 if (tcp_head_timedout(sk, tp)) {
1520 struct sk_buff *skb; 1611 struct sk_buff *skb;
1521 1612
1522 sk_stream_for_retrans_queue(skb, sk) { 1613 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
1523 if (tcp_skb_timedout(sk, skb) && 1614 : sk->sk_write_queue.next;
1524 !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1615
1616 sk_stream_for_retrans_queue_from(skb, sk) {
1617 if (!tcp_skb_timedout(sk, skb))
1618 break;
1619
1620 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1525 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1621 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1526 tp->lost_out += tcp_skb_pcount(skb); 1622 tp->lost_out += tcp_skb_pcount(skb);
1623
1624 /* clear xmit_retrans hint */
1625 if (tp->retransmit_skb_hint &&
1626 before(TCP_SKB_CB(skb)->seq,
1627 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1628
1629 tp->retransmit_skb_hint = NULL;
1527 } 1630 }
1528 } 1631 }
1632
1633 tp->scoreboard_skb_hint = skb;
1634
1529 tcp_sync_left_out(tp); 1635 tcp_sync_left_out(tp);
1530 } 1636 }
1531} 1637}
@@ -1605,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
1605 } 1711 }
1606 tcp_moderate_cwnd(tp); 1712 tcp_moderate_cwnd(tp);
1607 tp->snd_cwnd_stamp = tcp_time_stamp; 1713 tp->snd_cwnd_stamp = tcp_time_stamp;
1714
1715 /* There is something screwy going on with the retrans hints after
1716 an undo */
1717 clear_all_retrans_hints(tp);
1608} 1718}
1609 1719
1610static inline int tcp_may_undo(struct tcp_sock *tp) 1720static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1688,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
1688 sk_stream_for_retrans_queue(skb, sk) { 1798 sk_stream_for_retrans_queue(skb, sk) {
1689 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1799 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1690 } 1800 }
1801
1802 clear_all_retrans_hints(tp);
1803
1691 DBGUNDO(sk, tp, "partial loss"); 1804 DBGUNDO(sk, tp, "partial loss");
1692 tp->lost_out = 0; 1805 tp->lost_out = 0;
1693 tp->left_out = tp->sacked_out; 1806 tp->left_out = tp->sacked_out;
@@ -2117,6 +2230,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2117 tcp_packets_out_dec(tp, skb); 2230 tcp_packets_out_dec(tp, skb);
2118 __skb_unlink(skb, &sk->sk_write_queue); 2231 __skb_unlink(skb, &sk->sk_write_queue);
2119 sk_stream_free_skb(sk, skb); 2232 sk_stream_free_skb(sk, skb);
2233 clear_all_retrans_hints(tp);
2120 } 2234 }
2121 2235
2122 if (acked&FLAG_ACKED) { 2236 if (acked&FLAG_ACKED) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 602e7057e438..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len > skb->len); 438 BUG_ON(len > skb->len);
439
440 clear_all_retrans_hints(tp);
439 nsize = skb_headlen(skb) - len; 441 nsize = skb_headlen(skb) - len;
440 if (nsize < 0) 442 if (nsize < 0)
441 nsize = 0; 443 nsize = 0;
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1260 BUG_ON(tcp_skb_pcount(skb) != 1 || 1262 BUG_ON(tcp_skb_pcount(skb) != 1 ||
1261 tcp_skb_pcount(next_skb) != 1); 1263 tcp_skb_pcount(next_skb) != 1);
1262 1264
1263 /* Ok. We will be able to collapse the packet. */ 1265 /* changing transmit queue under us so clear hints */
1266 clear_all_retrans_hints(tp);
1267
1268 /* Ok. We will be able to collapse the packet. */
1264 __skb_unlink(next_skb, &sk->sk_write_queue); 1269 __skb_unlink(next_skb, &sk->sk_write_queue);
1265 1270
1266 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1271 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
1330 } 1335 }
1331 } 1336 }
1332 1337
1338 clear_all_retrans_hints(tp);
1339
1333 if (!lost) 1340 if (!lost)
1334 return; 1341 return;
1335 1342
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1468 const struct inet_connection_sock *icsk = inet_csk(sk); 1475 const struct inet_connection_sock *icsk = inet_csk(sk);
1469 struct tcp_sock *tp = tcp_sk(sk); 1476 struct tcp_sock *tp = tcp_sk(sk);
1470 struct sk_buff *skb; 1477 struct sk_buff *skb;
1471 int packet_cnt = tp->lost_out; 1478 int packet_cnt;
1479
1480 if (tp->retransmit_skb_hint) {
1481 skb = tp->retransmit_skb_hint;
1482 packet_cnt = tp->retransmit_cnt_hint;
1483 }else{
1484 skb = sk->sk_write_queue.next;
1485 packet_cnt = 0;
1486 }
1472 1487
1473 /* First pass: retransmit lost packets. */ 1488 /* First pass: retransmit lost packets. */
1474 if (packet_cnt) { 1489 if (tp->lost_out) {
1475 sk_stream_for_retrans_queue(skb, sk) { 1490 sk_stream_for_retrans_queue_from(skb, sk) {
1476 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1491 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1477 1492
1493 /* we could do better than to assign each time */
1494 tp->retransmit_skb_hint = skb;
1495 tp->retransmit_cnt_hint = packet_cnt;
1496
1478 /* Assume this retransmit will generate 1497 /* Assume this retransmit will generate
1479 * only one packet for congestion window 1498 * only one packet for congestion window
1480 * calculation purposes. This works because 1499 * calculation purposes. This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1485 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 1504 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1486 return; 1505 return;
1487 1506
1488 if (sacked&TCPCB_LOST) { 1507 if (sacked & TCPCB_LOST) {
1489 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1508 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1490 if (tcp_retransmit_skb(sk, skb)) 1509 if (tcp_retransmit_skb(sk, skb)) {
1510 tp->retransmit_skb_hint = NULL;
1491 return; 1511 return;
1512 }
1492 if (icsk->icsk_ca_state != TCP_CA_Loss) 1513 if (icsk->icsk_ca_state != TCP_CA_Loss)
1493 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 1514 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
1494 else 1515 else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1501 TCP_RTO_MAX); 1522 TCP_RTO_MAX);
1502 } 1523 }
1503 1524
1504 packet_cnt -= tcp_skb_pcount(skb); 1525 packet_cnt += tcp_skb_pcount(skb);
1505 if (packet_cnt <= 0) 1526 if (packet_cnt >= tp->lost_out)
1506 break; 1527 break;
1507 } 1528 }
1508 } 1529 }
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1528 if (tcp_may_send_now(sk, tp)) 1549 if (tcp_may_send_now(sk, tp))
1529 return; 1550 return;
1530 1551
1531 packet_cnt = 0; 1552 if (tp->forward_skb_hint) {
1553 skb = tp->forward_skb_hint;
1554 packet_cnt = tp->forward_cnt_hint;
1555 } else{
1556 skb = sk->sk_write_queue.next;
1557 packet_cnt = 0;
1558 }
1559
1560 sk_stream_for_retrans_queue_from(skb, sk) {
1561 tp->forward_cnt_hint = packet_cnt;
1562 tp->forward_skb_hint = skb;
1532 1563
1533 sk_stream_for_retrans_queue(skb, sk) {
1534 /* Similar to the retransmit loop above we 1564 /* Similar to the retransmit loop above we
1535 * can pretend that the retransmitted SKB 1565 * can pretend that the retransmitted SKB
1536 * we send out here will be composed of one 1566 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1547 continue; 1577 continue;
1548 1578
1549 /* Ok, retransmit it. */ 1579 /* Ok, retransmit it. */
1550 if (tcp_retransmit_skb(sk, skb)) 1580 if (tcp_retransmit_skb(sk, skb)) {
1581 tp->forward_skb_hint = NULL;
1551 break; 1582 break;
1583 }
1552 1584
1553 if (skb == skb_peek(&sk->sk_write_queue)) 1585 if (skb == skb_peek(&sk->sk_write_queue))
1554 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1586 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,