diff options
-rw-r--r-- | include/linux/skbuff.h | 33 | ||||
-rw-r--r-- | include/net/tcp.h | 5 | ||||
-rw-r--r-- | net/core/skbuff.c | 140 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 256 |
4 files changed, 427 insertions, 7 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f84e3bc..acf17af45af9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -493,6 +493,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, | |||
493 | } | 493 | } |
494 | 494 | ||
495 | /** | 495 | /** |
496 | * skb_queue_is_first - check if skb is the first entry in the queue | ||
497 | * @list: queue head | ||
498 | * @skb: buffer | ||
499 | * | ||
500 | * Returns true if @skb is the first buffer on the list. | ||
501 | */ | ||
502 | static inline bool skb_queue_is_first(const struct sk_buff_head *list, | ||
503 | const struct sk_buff *skb) | ||
504 | { | ||
505 | return (skb->prev == (struct sk_buff *) list); | ||
506 | } | ||
507 | |||
508 | /** | ||
496 | * skb_queue_next - return the next packet in the queue | 509 | * skb_queue_next - return the next packet in the queue |
497 | * @list: queue head | 510 | * @list: queue head |
498 | * @skb: current buffer | 511 | * @skb: current buffer |
@@ -511,6 +524,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, | |||
511 | } | 524 | } |
512 | 525 | ||
513 | /** | 526 | /** |
527 | * skb_queue_prev - return the prev packet in the queue | ||
528 | * @list: queue head | ||
529 | * @skb: current buffer | ||
530 | * | ||
531 | * Return the prev packet in @list before @skb. It is only valid to | ||
532 | * call this if skb_queue_is_first() evaluates to false. | ||
533 | */ | ||
534 | static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, | ||
535 | const struct sk_buff *skb) | ||
536 | { | ||
537 | /* This BUG_ON may seem severe, but if we just return then we | ||
538 | * are going to dereference garbage. | ||
539 | */ | ||
540 | BUG_ON(skb_queue_is_first(list, skb)); | ||
541 | return skb->prev; | ||
542 | } | ||
543 | |||
544 | /** | ||
514 | * skb_get - reference buffer | 545 | * skb_get - reference buffer |
515 | * @skb: buffer to reference | 546 | * @skb: buffer to reference |
516 | * | 547 | * |
@@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, | |||
1652 | extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); | 1683 | extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); |
1653 | extern void skb_split(struct sk_buff *skb, | 1684 | extern void skb_split(struct sk_buff *skb, |
1654 | struct sk_buff *skb1, const u32 len); | 1685 | struct sk_buff *skb1, const u32 len); |
1686 | extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, | ||
1687 | int shiftlen); | ||
1655 | 1688 | ||
1656 | extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); | 1689 | extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); |
1657 | 1690 | ||
diff --git a/include/net/tcp.h b/include/net/tcp.h index 90b4c3b4c336..265392470b26 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu | |||
1192 | return skb_queue_next(&sk->sk_write_queue, skb); | 1192 | return skb_queue_next(&sk->sk_write_queue, skb); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) | ||
1196 | { | ||
1197 | return skb_queue_prev(&sk->sk_write_queue, skb); | ||
1198 | } | ||
1199 | |||
1195 | #define tcp_for_write_queue(skb, sk) \ | 1200 | #define tcp_for_write_queue(skb, sk) \ |
1196 | skb_queue_walk(&(sk)->sk_write_queue, skb) | 1201 | skb_queue_walk(&(sk)->sk_write_queue, skb) |
1197 | 1202 | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 267185a848f6..844b8abeb18c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -2018,6 +2018,146 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | |||
2018 | skb_split_no_header(skb, skb1, len, pos); | 2018 | skb_split_no_header(skb, skb1, len, pos); |
2019 | } | 2019 | } |
2020 | 2020 | ||
2021 | /* Shifting from/to a cloned skb is a no-go. | ||
2022 | * | ||
2023 | * TODO: handle cloned skbs by using pskb_expand_head() | ||
2024 | */ | ||
2025 | static int skb_prepare_for_shift(struct sk_buff *skb) | ||
2026 | { | ||
2027 | return skb_cloned(skb); | ||
2028 | } | ||
2029 | |||
2030 | /** | ||
2031 | * skb_shift - Shifts paged data partially from skb to another | ||
2032 | * @tgt: buffer into which tail data gets added | ||
2033 | * @skb: buffer from which the paged data comes from | ||
2034 | * @shiftlen: shift up to this many bytes | ||
2035 | * | ||
2036 | * Attempts to shift up to shiftlen worth of bytes, which may be less than | ||
2037 | * the length of the skb, from tgt to skb. Returns number bytes shifted. | ||
2038 | * It's up to caller to free skb if everything was shifted. | ||
2039 | * | ||
2040 | * If @tgt runs out of frags, the whole operation is aborted. | ||
2041 | * | ||
2042 | * Skb cannot include anything else but paged data while tgt is allowed | ||
2043 | * to have non-paged data as well. | ||
2044 | * | ||
2045 | * TODO: full sized shift could be optimized but that would need | ||
2046 | * specialized skb free'er to handle frags without up-to-date nr_frags. | ||
2047 | */ | ||
2048 | int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) | ||
2049 | { | ||
2050 | int from, to, merge, todo; | ||
2051 | struct skb_frag_struct *fragfrom, *fragto; | ||
2052 | |||
2053 | BUG_ON(shiftlen > skb->len); | ||
2054 | BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ | ||
2055 | |||
2056 | todo = shiftlen; | ||
2057 | from = 0; | ||
2058 | to = skb_shinfo(tgt)->nr_frags; | ||
2059 | fragfrom = &skb_shinfo(skb)->frags[from]; | ||
2060 | |||
2061 | /* Actual merge is delayed until the point when we know we can | ||
2062 | * commit all, so that we don't have to undo partial changes | ||
2063 | */ | ||
2064 | if (!to || | ||
2065 | !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { | ||
2066 | merge = -1; | ||
2067 | } else { | ||
2068 | merge = to - 1; | ||
2069 | |||
2070 | todo -= fragfrom->size; | ||
2071 | if (todo < 0) { | ||
2072 | if (skb_prepare_for_shift(skb) || | ||
2073 | skb_prepare_for_shift(tgt)) | ||
2074 | return 0; | ||
2075 | |||
2076 | fragto = &skb_shinfo(tgt)->frags[merge]; | ||
2077 | |||
2078 | fragto->size += shiftlen; | ||
2079 | fragfrom->size -= shiftlen; | ||
2080 | fragfrom->page_offset += shiftlen; | ||
2081 | |||
2082 | goto onlymerged; | ||
2083 | } | ||
2084 | |||
2085 | from++; | ||
2086 | } | ||
2087 | |||
2088 | /* Skip full, not-fitting skb to avoid expensive operations */ | ||
2089 | if ((shiftlen == skb->len) && | ||
2090 | (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) | ||
2091 | return 0; | ||
2092 | |||
2093 | if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) | ||
2094 | return 0; | ||
2095 | |||
2096 | while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { | ||
2097 | if (to == MAX_SKB_FRAGS) | ||
2098 | return 0; | ||
2099 | |||
2100 | fragfrom = &skb_shinfo(skb)->frags[from]; | ||
2101 | fragto = &skb_shinfo(tgt)->frags[to]; | ||
2102 | |||
2103 | if (todo >= fragfrom->size) { | ||
2104 | *fragto = *fragfrom; | ||
2105 | todo -= fragfrom->size; | ||
2106 | from++; | ||
2107 | to++; | ||
2108 | |||
2109 | } else { | ||
2110 | get_page(fragfrom->page); | ||
2111 | fragto->page = fragfrom->page; | ||
2112 | fragto->page_offset = fragfrom->page_offset; | ||
2113 | fragto->size = todo; | ||
2114 | |||
2115 | fragfrom->page_offset += todo; | ||
2116 | fragfrom->size -= todo; | ||
2117 | todo = 0; | ||
2118 | |||
2119 | to++; | ||
2120 | break; | ||
2121 | } | ||
2122 | } | ||
2123 | |||
2124 | /* Ready to "commit" this state change to tgt */ | ||
2125 | skb_shinfo(tgt)->nr_frags = to; | ||
2126 | |||
2127 | if (merge >= 0) { | ||
2128 | fragfrom = &skb_shinfo(skb)->frags[0]; | ||
2129 | fragto = &skb_shinfo(tgt)->frags[merge]; | ||
2130 | |||
2131 | fragto->size += fragfrom->size; | ||
2132 | put_page(fragfrom->page); | ||
2133 | } | ||
2134 | |||
2135 | /* Reposition in the original skb */ | ||
2136 | to = 0; | ||
2137 | while (from < skb_shinfo(skb)->nr_frags) | ||
2138 | skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; | ||
2139 | skb_shinfo(skb)->nr_frags = to; | ||
2140 | |||
2141 | BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); | ||
2142 | |||
2143 | onlymerged: | ||
2144 | /* Most likely the tgt won't ever need its checksum anymore, skb on | ||
2145 | * the other hand might need it if it needs to be resent | ||
2146 | */ | ||
2147 | tgt->ip_summed = CHECKSUM_PARTIAL; | ||
2148 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
2149 | |||
2150 | /* Yak, is it really working this way? Some helper please? */ | ||
2151 | skb->len -= shiftlen; | ||
2152 | skb->data_len -= shiftlen; | ||
2153 | skb->truesize -= shiftlen; | ||
2154 | tgt->len += shiftlen; | ||
2155 | tgt->data_len += shiftlen; | ||
2156 | tgt->truesize += shiftlen; | ||
2157 | |||
2158 | return shiftlen; | ||
2159 | } | ||
2160 | |||
2021 | /** | 2161 | /** |
2022 | * skb_prepare_seq_read - Prepare a sequential read of skb data | 2162 | * skb_prepare_seq_read - Prepare a sequential read of skb data |
2023 | * @skb: the buffer to read | 2163 | * @skb: the buffer to read |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c8e297e2c39..97d57676b8ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1242,6 +1242,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, | |||
1242 | * aligned portion of it that matches. Therefore we might need to fragment | 1242 | * aligned portion of it that matches. Therefore we might need to fragment |
1243 | * which may fail and creates some hassle (caller must handle error case | 1243 | * which may fail and creates some hassle (caller must handle error case |
1244 | * returns). | 1244 | * returns). |
1245 | * | ||
1246 | * FIXME: this could be merged to shift decision code | ||
1245 | */ | 1247 | */ |
1246 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | 1248 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, |
1247 | u32 start_seq, u32 end_seq) | 1249 | u32 start_seq, u32 end_seq) |
@@ -1353,9 +1355,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1353 | 1355 | ||
1354 | if (fack_count > tp->fackets_out) | 1356 | if (fack_count > tp->fackets_out) |
1355 | tp->fackets_out = fack_count; | 1357 | tp->fackets_out = fack_count; |
1356 | |||
1357 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | ||
1358 | tcp_advance_highest_sack(sk, skb); | ||
1359 | } | 1358 | } |
1360 | 1359 | ||
1361 | /* D-SACK. We can detect redundant retransmission in S|R and plain R | 1360 | /* D-SACK. We can detect redundant retransmission in S|R and plain R |
@@ -1370,12 +1369,231 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1370 | return flag; | 1369 | return flag; |
1371 | } | 1370 | } |
1372 | 1371 | ||
1372 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, | ||
1373 | struct sk_buff *skb, unsigned int pcount, | ||
1374 | int shifted, int fack_count, int *reord, | ||
1375 | int *flag, int mss) | ||
1376 | { | ||
1377 | struct tcp_sock *tp = tcp_sk(sk); | ||
1378 | u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ | ||
1379 | |||
1380 | BUG_ON(!pcount); | ||
1381 | |||
1382 | TCP_SKB_CB(prev)->end_seq += shifted; | ||
1383 | TCP_SKB_CB(skb)->seq += shifted; | ||
1384 | |||
1385 | skb_shinfo(prev)->gso_segs += pcount; | ||
1386 | BUG_ON(skb_shinfo(skb)->gso_segs < pcount); | ||
1387 | skb_shinfo(skb)->gso_segs -= pcount; | ||
1388 | |||
1389 | /* When we're adding to gso_segs == 1, gso_size will be zero, | ||
1390 | * in theory this shouldn't be necessary but as long as DSACK | ||
1391 | * code can come after this skb later on it's better to keep | ||
1392 | * setting gso_size to something. | ||
1393 | */ | ||
1394 | if (!skb_shinfo(prev)->gso_size) { | ||
1395 | skb_shinfo(prev)->gso_size = mss; | ||
1396 | skb_shinfo(prev)->gso_type = sk->sk_gso_type; | ||
1397 | } | ||
1398 | |||
1399 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ | ||
1400 | if (skb_shinfo(skb)->gso_segs <= 1) { | ||
1401 | skb_shinfo(skb)->gso_size = 0; | ||
1402 | skb_shinfo(skb)->gso_type = 0; | ||
1403 | } | ||
1404 | |||
1405 | *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, | ||
1406 | pcount); | ||
1407 | |||
1408 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | ||
1409 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | ||
1410 | |||
1411 | tcp_clear_all_retrans_hints(tp); | ||
1412 | |||
1413 | if (skb->len > 0) { | ||
1414 | BUG_ON(!tcp_skb_pcount(skb)); | ||
1415 | return 0; | ||
1416 | } | ||
1417 | |||
1418 | /* Whole SKB was eaten :-) */ | ||
1419 | |||
1420 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; | ||
1421 | if (skb == tcp_highest_sack(sk)) | ||
1422 | tcp_advance_highest_sack(sk, skb); | ||
1423 | |||
1424 | tcp_unlink_write_queue(skb, sk); | ||
1425 | sk_wmem_free_skb(sk, skb); | ||
1426 | |||
1427 | return 1; | ||
1428 | } | ||
1429 | |||
1430 | /* I wish gso_size would have a bit more sane initialization than | ||
1431 | * something-or-zero which complicates things | ||
1432 | */ | ||
1433 | static int tcp_shift_mss(struct sk_buff *skb) | ||
1434 | { | ||
1435 | int mss = tcp_skb_mss(skb); | ||
1436 | |||
1437 | if (!mss) | ||
1438 | mss = skb->len; | ||
1439 | |||
1440 | return mss; | ||
1441 | } | ||
1442 | |||
1443 | /* Shifting pages past head area doesn't work */ | ||
1444 | static int skb_can_shift(struct sk_buff *skb) | ||
1445 | { | ||
1446 | return !skb_headlen(skb) && skb_is_nonlinear(skb); | ||
1447 | } | ||
1448 | |||
1449 | /* Try collapsing SACK blocks spanning across multiple skbs to a single | ||
1450 | * skb. | ||
1451 | */ | ||
1452 | static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | ||
1453 | u32 start_seq, u32 end_seq, | ||
1454 | int dup_sack, int *fack_count, | ||
1455 | int *reord, int *flag) | ||
1456 | { | ||
1457 | struct tcp_sock *tp = tcp_sk(sk); | ||
1458 | struct sk_buff *prev; | ||
1459 | int mss; | ||
1460 | int pcount = 0; | ||
1461 | int len; | ||
1462 | int in_sack; | ||
1463 | |||
1464 | if (!sk_can_gso(sk)) | ||
1465 | goto fallback; | ||
1466 | |||
1467 | /* Normally R but no L won't result in plain S */ | ||
1468 | if (!dup_sack && | ||
1469 | (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) | ||
1470 | goto fallback; | ||
1471 | if (!skb_can_shift(skb)) | ||
1472 | goto fallback; | ||
1473 | /* This frame is about to be dropped (was ACKed). */ | ||
1474 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | ||
1475 | goto fallback; | ||
1476 | |||
1477 | /* Can only happen with delayed DSACK + discard craziness */ | ||
1478 | if (unlikely(skb == tcp_write_queue_head(sk))) | ||
1479 | goto fallback; | ||
1480 | prev = tcp_write_queue_prev(sk, skb); | ||
1481 | |||
1482 | if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) | ||
1483 | goto fallback; | ||
1484 | |||
1485 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | ||
1486 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | ||
1487 | |||
1488 | if (in_sack) { | ||
1489 | len = skb->len; | ||
1490 | pcount = tcp_skb_pcount(skb); | ||
1491 | mss = tcp_shift_mss(skb); | ||
1492 | |||
1493 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1494 | * drop this restriction as unnecessary | ||
1495 | */ | ||
1496 | if (mss != tcp_shift_mss(prev)) | ||
1497 | goto fallback; | ||
1498 | } else { | ||
1499 | if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) | ||
1500 | goto noop; | ||
1501 | /* CHECKME: This is non-MSS split case only?, this will | ||
1502 | * cause skipped skbs due to advancing loop btw, original | ||
1503 | * has that feature too | ||
1504 | */ | ||
1505 | if (tcp_skb_pcount(skb) <= 1) | ||
1506 | goto noop; | ||
1507 | |||
1508 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); | ||
1509 | if (!in_sack) { | ||
1510 | /* TODO: head merge to next could be attempted here | ||
1511 | * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), | ||
1512 | * though it might not be worth of the additional hassle | ||
1513 | * | ||
1514 | * ...we can probably just fallback to what was done | ||
1515 | * previously. We could try merging non-SACKed ones | ||
1516 | * as well but it probably isn't going to buy off | ||
1517 | * because later SACKs might again split them, and | ||
1518 | * it would make skb timestamp tracking considerably | ||
1519 | * harder problem. | ||
1520 | */ | ||
1521 | goto fallback; | ||
1522 | } | ||
1523 | |||
1524 | len = end_seq - TCP_SKB_CB(skb)->seq; | ||
1525 | BUG_ON(len < 0); | ||
1526 | BUG_ON(len > skb->len); | ||
1527 | |||
1528 | /* MSS boundaries should be honoured or else pcount will | ||
1529 | * severely break even though it makes things bit trickier. | ||
1530 | * Optimize common case to avoid most of the divides | ||
1531 | */ | ||
1532 | mss = tcp_skb_mss(skb); | ||
1533 | |||
1534 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1535 | * drop this restriction as unnecessary | ||
1536 | */ | ||
1537 | if (mss != tcp_shift_mss(prev)) | ||
1538 | goto fallback; | ||
1539 | |||
1540 | if (len == mss) { | ||
1541 | pcount = 1; | ||
1542 | } else if (len < mss) { | ||
1543 | goto noop; | ||
1544 | } else { | ||
1545 | pcount = len / mss; | ||
1546 | len = pcount * mss; | ||
1547 | } | ||
1548 | } | ||
1549 | |||
1550 | if (!skb_shift(prev, skb, len)) | ||
1551 | goto fallback; | ||
1552 | if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, | ||
1553 | flag, mss)) | ||
1554 | goto out; | ||
1555 | |||
1556 | /* Hole filled allows collapsing with the next as well, this is very | ||
1557 | * useful when hole on every nth skb pattern happens | ||
1558 | */ | ||
1559 | if (prev == tcp_write_queue_tail(sk)) | ||
1560 | goto out; | ||
1561 | skb = tcp_write_queue_next(sk, prev); | ||
1562 | |||
1563 | if (!skb_can_shift(skb)) | ||
1564 | goto out; | ||
1565 | if (skb == tcp_send_head(sk)) | ||
1566 | goto out; | ||
1567 | if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) | ||
1568 | goto out; | ||
1569 | |||
1570 | len = skb->len; | ||
1571 | if (skb_shift(prev, skb, len)) { | ||
1572 | pcount += tcp_skb_pcount(skb); | ||
1573 | tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, | ||
1574 | *fack_count, reord, flag, mss); | ||
1575 | } | ||
1576 | |||
1577 | out: | ||
1578 | *fack_count += pcount; | ||
1579 | return prev; | ||
1580 | |||
1581 | noop: | ||
1582 | return skb; | ||
1583 | |||
1584 | fallback: | ||
1585 | return NULL; | ||
1586 | } | ||
1587 | |||
1373 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | 1588 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, |
1374 | struct tcp_sack_block *next_dup, | 1589 | struct tcp_sack_block *next_dup, |
1375 | u32 start_seq, u32 end_seq, | 1590 | u32 start_seq, u32 end_seq, |
1376 | int dup_sack_in, int *fack_count, | 1591 | int dup_sack_in, int *fack_count, |
1377 | int *reord, int *flag) | 1592 | int *reord, int *flag) |
1378 | { | 1593 | { |
1594 | struct tcp_sock *tp = tcp_sk(sk); | ||
1595 | struct sk_buff *tmp; | ||
1596 | |||
1379 | tcp_for_write_queue_from(skb, sk) { | 1597 | tcp_for_write_queue_from(skb, sk) { |
1380 | int in_sack = 0; | 1598 | int in_sack = 0; |
1381 | int dup_sack = dup_sack_in; | 1599 | int dup_sack = dup_sack_in; |
@@ -1396,18 +1614,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1396 | dup_sack = 1; | 1614 | dup_sack = 1; |
1397 | } | 1615 | } |
1398 | 1616 | ||
1399 | if (in_sack <= 0) | 1617 | /* skb reference here is a bit tricky to get right, since |
1400 | in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, | 1618 | * shifting can eat and free both this skb and the next, |
1401 | end_seq); | 1619 | * so not even _safe variant of the loop is enough. |
1620 | */ | ||
1621 | if (in_sack <= 0) { | ||
1622 | tmp = tcp_shift_skb_data(sk, skb, start_seq, | ||
1623 | end_seq, dup_sack, | ||
1624 | fack_count, reord, flag); | ||
1625 | if (tmp != NULL) { | ||
1626 | if (tmp != skb) { | ||
1627 | skb = tmp; | ||
1628 | continue; | ||
1629 | } | ||
1630 | |||
1631 | in_sack = 0; | ||
1632 | } else { | ||
1633 | in_sack = tcp_match_skb_to_sack(sk, skb, | ||
1634 | start_seq, | ||
1635 | end_seq); | ||
1636 | } | ||
1637 | } | ||
1638 | |||
1402 | if (unlikely(in_sack < 0)) | 1639 | if (unlikely(in_sack < 0)) |
1403 | break; | 1640 | break; |
1404 | 1641 | ||
1405 | if (in_sack) | 1642 | if (in_sack) { |
1406 | *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, | 1643 | *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, |
1407 | *fack_count, | 1644 | *fack_count, |
1408 | &(TCP_SKB_CB(skb)->sacked), | 1645 | &(TCP_SKB_CB(skb)->sacked), |
1409 | tcp_skb_pcount(skb)); | 1646 | tcp_skb_pcount(skb)); |
1410 | 1647 | ||
1648 | if (!before(TCP_SKB_CB(skb)->seq, | ||
1649 | tcp_highest_sack_seq(tp))) | ||
1650 | tcp_advance_highest_sack(sk, skb); | ||
1651 | } | ||
1652 | |||
1411 | *fack_count += tcp_skb_pcount(skb); | 1653 | *fack_count += tcp_skb_pcount(skb); |
1412 | } | 1654 | } |
1413 | return skb; | 1655 | return skb; |