diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 511 |
1 files changed, 417 insertions, 94 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d77c0d29e239..99b7ecbe8893 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -701,13 +701,10 @@ static inline void tcp_set_rto(struct sock *sk) | |||
701 | * all the algo is pure shit and should be replaced | 701 | * all the algo is pure shit and should be replaced |
702 | * with correct one. It is exactly, which we pretend to do. | 702 | * with correct one. It is exactly, which we pretend to do. |
703 | */ | 703 | */ |
704 | } | ||
705 | 704 | ||
706 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 705 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
707 | * guarantees that rto is higher. | 706 | * guarantees that rto is higher. |
708 | */ | 707 | */ |
709 | static inline void tcp_bound_rto(struct sock *sk) | ||
710 | { | ||
711 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | 708 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) |
712 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | 709 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; |
713 | } | 710 | } |
@@ -928,7 +925,6 @@ static void tcp_init_metrics(struct sock *sk) | |||
928 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 925 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
929 | } | 926 | } |
930 | tcp_set_rto(sk); | 927 | tcp_set_rto(sk); |
931 | tcp_bound_rto(sk); | ||
932 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 928 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) |
933 | goto reset; | 929 | goto reset; |
934 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 930 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
@@ -1002,7 +998,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) | |||
1002 | } | 998 | } |
1003 | } | 999 | } |
1004 | 1000 | ||
1005 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) | 1001 | static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, |
1002 | struct sk_buff *skb) | ||
1006 | { | 1003 | { |
1007 | tcp_verify_retransmit_hint(tp, skb); | 1004 | tcp_verify_retransmit_hint(tp, skb); |
1008 | 1005 | ||
@@ -1236,31 +1233,58 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, | |||
1236 | return dup_sack; | 1233 | return dup_sack; |
1237 | } | 1234 | } |
1238 | 1235 | ||
1236 | struct tcp_sacktag_state { | ||
1237 | int reord; | ||
1238 | int fack_count; | ||
1239 | int flag; | ||
1240 | }; | ||
1241 | |||
1239 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, | 1242 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, |
1240 | * the incoming SACK may not exactly match but we can find smaller MSS | 1243 | * the incoming SACK may not exactly match but we can find smaller MSS |
1241 | * aligned portion of it that matches. Therefore we might need to fragment | 1244 | * aligned portion of it that matches. Therefore we might need to fragment |
1242 | * which may fail and creates some hassle (caller must handle error case | 1245 | * which may fail and creates some hassle (caller must handle error case |
1243 | * returns). | 1246 | * returns). |
1247 | * | ||
1248 | * FIXME: this could be merged to shift decision code | ||
1244 | */ | 1249 | */ |
1245 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | 1250 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, |
1246 | u32 start_seq, u32 end_seq) | 1251 | u32 start_seq, u32 end_seq) |
1247 | { | 1252 | { |
1248 | int in_sack, err; | 1253 | int in_sack, err; |
1249 | unsigned int pkt_len; | 1254 | unsigned int pkt_len; |
1255 | unsigned int mss; | ||
1250 | 1256 | ||
1251 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | 1257 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && |
1252 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | 1258 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); |
1253 | 1259 | ||
1254 | if (tcp_skb_pcount(skb) > 1 && !in_sack && | 1260 | if (tcp_skb_pcount(skb) > 1 && !in_sack && |
1255 | after(TCP_SKB_CB(skb)->end_seq, start_seq)) { | 1261 | after(TCP_SKB_CB(skb)->end_seq, start_seq)) { |
1256 | 1262 | mss = tcp_skb_mss(skb); | |
1257 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); | 1263 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); |
1258 | 1264 | ||
1259 | if (!in_sack) | 1265 | if (!in_sack) { |
1260 | pkt_len = start_seq - TCP_SKB_CB(skb)->seq; | 1266 | pkt_len = start_seq - TCP_SKB_CB(skb)->seq; |
1261 | else | 1267 | if (pkt_len < mss) |
1268 | pkt_len = mss; | ||
1269 | } else { | ||
1262 | pkt_len = end_seq - TCP_SKB_CB(skb)->seq; | 1270 | pkt_len = end_seq - TCP_SKB_CB(skb)->seq; |
1263 | err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); | 1271 | if (pkt_len < mss) |
1272 | return -EINVAL; | ||
1273 | } | ||
1274 | |||
1275 | /* Round if necessary so that SACKs cover only full MSSes | ||
1276 | * and/or the remaining small portion (if present) | ||
1277 | */ | ||
1278 | if (pkt_len > mss) { | ||
1279 | unsigned int new_len = (pkt_len / mss) * mss; | ||
1280 | if (!in_sack && new_len < pkt_len) { | ||
1281 | new_len += mss; | ||
1282 | if (new_len > skb->len) | ||
1283 | return 0; | ||
1284 | } | ||
1285 | pkt_len = new_len; | ||
1286 | } | ||
1287 | err = tcp_fragment(sk, skb, pkt_len, mss); | ||
1264 | if (err < 0) | 1288 | if (err < 0) |
1265 | return err; | 1289 | return err; |
1266 | } | 1290 | } |
@@ -1268,24 +1292,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1268 | return in_sack; | 1292 | return in_sack; |
1269 | } | 1293 | } |
1270 | 1294 | ||
1271 | static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | 1295 | static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, |
1272 | int *reord, int dup_sack, int fack_count) | 1296 | struct tcp_sacktag_state *state, |
1297 | int dup_sack, int pcount) | ||
1273 | { | 1298 | { |
1274 | struct tcp_sock *tp = tcp_sk(sk); | 1299 | struct tcp_sock *tp = tcp_sk(sk); |
1275 | u8 sacked = TCP_SKB_CB(skb)->sacked; | 1300 | u8 sacked = TCP_SKB_CB(skb)->sacked; |
1276 | int flag = 0; | 1301 | int fack_count = state->fack_count; |
1277 | 1302 | ||
1278 | /* Account D-SACK for retransmitted packet. */ | 1303 | /* Account D-SACK for retransmitted packet. */ |
1279 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1304 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1280 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1305 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) |
1281 | tp->undo_retrans--; | 1306 | tp->undo_retrans--; |
1282 | if (sacked & TCPCB_SACKED_ACKED) | 1307 | if (sacked & TCPCB_SACKED_ACKED) |
1283 | *reord = min(fack_count, *reord); | 1308 | state->reord = min(fack_count, state->reord); |
1284 | } | 1309 | } |
1285 | 1310 | ||
1286 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ | 1311 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ |
1287 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1312 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) |
1288 | return flag; | 1313 | return sacked; |
1289 | 1314 | ||
1290 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 1315 | if (!(sacked & TCPCB_SACKED_ACKED)) { |
1291 | if (sacked & TCPCB_SACKED_RETRANS) { | 1316 | if (sacked & TCPCB_SACKED_RETRANS) { |
@@ -1294,10 +1319,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1294 | * that retransmission is still in flight. | 1319 | * that retransmission is still in flight. |
1295 | */ | 1320 | */ |
1296 | if (sacked & TCPCB_LOST) { | 1321 | if (sacked & TCPCB_LOST) { |
1297 | TCP_SKB_CB(skb)->sacked &= | 1322 | sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1298 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1323 | tp->lost_out -= pcount; |
1299 | tp->lost_out -= tcp_skb_pcount(skb); | 1324 | tp->retrans_out -= pcount; |
1300 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
1301 | } | 1325 | } |
1302 | } else { | 1326 | } else { |
1303 | if (!(sacked & TCPCB_RETRANS)) { | 1327 | if (!(sacked & TCPCB_RETRANS)) { |
@@ -1306,56 +1330,280 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1306 | */ | 1330 | */ |
1307 | if (before(TCP_SKB_CB(skb)->seq, | 1331 | if (before(TCP_SKB_CB(skb)->seq, |
1308 | tcp_highest_sack_seq(tp))) | 1332 | tcp_highest_sack_seq(tp))) |
1309 | *reord = min(fack_count, *reord); | 1333 | state->reord = min(fack_count, |
1334 | state->reord); | ||
1310 | 1335 | ||
1311 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1336 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ |
1312 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) | 1337 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) |
1313 | flag |= FLAG_ONLY_ORIG_SACKED; | 1338 | state->flag |= FLAG_ONLY_ORIG_SACKED; |
1314 | } | 1339 | } |
1315 | 1340 | ||
1316 | if (sacked & TCPCB_LOST) { | 1341 | if (sacked & TCPCB_LOST) { |
1317 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1342 | sacked &= ~TCPCB_LOST; |
1318 | tp->lost_out -= tcp_skb_pcount(skb); | 1343 | tp->lost_out -= pcount; |
1319 | } | 1344 | } |
1320 | } | 1345 | } |
1321 | 1346 | ||
1322 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; | 1347 | sacked |= TCPCB_SACKED_ACKED; |
1323 | flag |= FLAG_DATA_SACKED; | 1348 | state->flag |= FLAG_DATA_SACKED; |
1324 | tp->sacked_out += tcp_skb_pcount(skb); | 1349 | tp->sacked_out += pcount; |
1325 | 1350 | ||
1326 | fack_count += tcp_skb_pcount(skb); | 1351 | fack_count += pcount; |
1327 | 1352 | ||
1328 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1353 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
1329 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1354 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && |
1330 | before(TCP_SKB_CB(skb)->seq, | 1355 | before(TCP_SKB_CB(skb)->seq, |
1331 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) | 1356 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
1332 | tp->lost_cnt_hint += tcp_skb_pcount(skb); | 1357 | tp->lost_cnt_hint += pcount; |
1333 | 1358 | ||
1334 | if (fack_count > tp->fackets_out) | 1359 | if (fack_count > tp->fackets_out) |
1335 | tp->fackets_out = fack_count; | 1360 | tp->fackets_out = fack_count; |
1336 | |||
1337 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | ||
1338 | tcp_advance_highest_sack(sk, skb); | ||
1339 | } | 1361 | } |
1340 | 1362 | ||
1341 | /* D-SACK. We can detect redundant retransmission in S|R and plain R | 1363 | /* D-SACK. We can detect redundant retransmission in S|R and plain R |
1342 | * frames and clear it. undo_retrans is decreased above, L|R frames | 1364 | * frames and clear it. undo_retrans is decreased above, L|R frames |
1343 | * are accounted above as well. | 1365 | * are accounted above as well. |
1344 | */ | 1366 | */ |
1345 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { | 1367 | if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) { |
1346 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1368 | sacked &= ~TCPCB_SACKED_RETRANS; |
1347 | tp->retrans_out -= tcp_skb_pcount(skb); | 1369 | tp->retrans_out -= pcount; |
1348 | } | 1370 | } |
1349 | 1371 | ||
1350 | return flag; | 1372 | return sacked; |
1373 | } | ||
1374 | |||
1375 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | ||
1376 | struct tcp_sacktag_state *state, | ||
1377 | unsigned int pcount, int shifted, int mss) | ||
1378 | { | ||
1379 | struct tcp_sock *tp = tcp_sk(sk); | ||
1380 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | ||
1381 | |||
1382 | BUG_ON(!pcount); | ||
1383 | |||
1384 | /* Tweak before seqno plays */ | ||
1385 | if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && | ||
1386 | !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) | ||
1387 | tp->lost_cnt_hint += pcount; | ||
1388 | |||
1389 | TCP_SKB_CB(prev)->end_seq += shifted; | ||
1390 | TCP_SKB_CB(skb)->seq += shifted; | ||
1391 | |||
1392 | skb_shinfo(prev)->gso_segs += pcount; | ||
1393 | BUG_ON(skb_shinfo(skb)->gso_segs < pcount); | ||
1394 | skb_shinfo(skb)->gso_segs -= pcount; | ||
1395 | |||
1396 | /* When we're adding to gso_segs == 1, gso_size will be zero, | ||
1397 | * in theory this shouldn't be necessary but as long as DSACK | ||
1398 | * code can come after this skb later on it's better to keep | ||
1399 | * setting gso_size to something. | ||
1400 | */ | ||
1401 | if (!skb_shinfo(prev)->gso_size) { | ||
1402 | skb_shinfo(prev)->gso_size = mss; | ||
1403 | skb_shinfo(prev)->gso_type = sk->sk_gso_type; | ||
1404 | } | ||
1405 | |||
1406 | /* CHECKME: To clear or not to clear? Mimics normal skb currently */ | ||
1407 | if (skb_shinfo(skb)->gso_segs <= 1) { | ||
1408 | skb_shinfo(skb)->gso_size = 0; | ||
1409 | skb_shinfo(skb)->gso_type = 0; | ||
1410 | } | ||
1411 | |||
1412 | /* We discard results */ | ||
1413 | tcp_sacktag_one(skb, sk, state, 0, pcount); | ||
1414 | |||
1415 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | ||
1416 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | ||
1417 | |||
1418 | if (skb->len > 0) { | ||
1419 | BUG_ON(!tcp_skb_pcount(skb)); | ||
1420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); | ||
1421 | return 0; | ||
1422 | } | ||
1423 | |||
1424 | /* Whole SKB was eaten :-) */ | ||
1425 | |||
1426 | if (skb == tp->retransmit_skb_hint) | ||
1427 | tp->retransmit_skb_hint = prev; | ||
1428 | if (skb == tp->scoreboard_skb_hint) | ||
1429 | tp->scoreboard_skb_hint = prev; | ||
1430 | if (skb == tp->lost_skb_hint) { | ||
1431 | tp->lost_skb_hint = prev; | ||
1432 | tp->lost_cnt_hint -= tcp_skb_pcount(prev); | ||
1433 | } | ||
1434 | |||
1435 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; | ||
1436 | if (skb == tcp_highest_sack(sk)) | ||
1437 | tcp_advance_highest_sack(sk, skb); | ||
1438 | |||
1439 | tcp_unlink_write_queue(skb, sk); | ||
1440 | sk_wmem_free_skb(sk, skb); | ||
1441 | |||
1442 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); | ||
1443 | |||
1444 | return 1; | ||
1445 | } | ||
1446 | |||
1447 | /* I wish gso_size would have a bit more sane initialization than | ||
1448 | * something-or-zero which complicates things | ||
1449 | */ | ||
1450 | static int tcp_skb_seglen(struct sk_buff *skb) | ||
1451 | { | ||
1452 | return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); | ||
1453 | } | ||
1454 | |||
1455 | /* Shifting pages past head area doesn't work */ | ||
1456 | static int skb_can_shift(struct sk_buff *skb) | ||
1457 | { | ||
1458 | return !skb_headlen(skb) && skb_is_nonlinear(skb); | ||
1459 | } | ||
1460 | |||
1461 | /* Try collapsing SACK blocks spanning across multiple skbs to a single | ||
1462 | * skb. | ||
1463 | */ | ||
1464 | static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | ||
1465 | struct tcp_sacktag_state *state, | ||
1466 | u32 start_seq, u32 end_seq, | ||
1467 | int dup_sack) | ||
1468 | { | ||
1469 | struct tcp_sock *tp = tcp_sk(sk); | ||
1470 | struct sk_buff *prev; | ||
1471 | int mss; | ||
1472 | int pcount = 0; | ||
1473 | int len; | ||
1474 | int in_sack; | ||
1475 | |||
1476 | if (!sk_can_gso(sk)) | ||
1477 | goto fallback; | ||
1478 | |||
1479 | /* Normally R but no L won't result in plain S */ | ||
1480 | if (!dup_sack && | ||
1481 | (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) | ||
1482 | goto fallback; | ||
1483 | if (!skb_can_shift(skb)) | ||
1484 | goto fallback; | ||
1485 | /* This frame is about to be dropped (was ACKed). */ | ||
1486 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | ||
1487 | goto fallback; | ||
1488 | |||
1489 | /* Can only happen with delayed DSACK + discard craziness */ | ||
1490 | if (unlikely(skb == tcp_write_queue_head(sk))) | ||
1491 | goto fallback; | ||
1492 | prev = tcp_write_queue_prev(sk, skb); | ||
1493 | |||
1494 | if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) | ||
1495 | goto fallback; | ||
1496 | |||
1497 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | ||
1498 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | ||
1499 | |||
1500 | if (in_sack) { | ||
1501 | len = skb->len; | ||
1502 | pcount = tcp_skb_pcount(skb); | ||
1503 | mss = tcp_skb_seglen(skb); | ||
1504 | |||
1505 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1506 | * drop this restriction as unnecessary | ||
1507 | */ | ||
1508 | if (mss != tcp_skb_seglen(prev)) | ||
1509 | goto fallback; | ||
1510 | } else { | ||
1511 | if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) | ||
1512 | goto noop; | ||
1513 | /* CHECKME: This is non-MSS split case only?, this will | ||
1514 | * cause skipped skbs due to advancing loop btw, original | ||
1515 | * has that feature too | ||
1516 | */ | ||
1517 | if (tcp_skb_pcount(skb) <= 1) | ||
1518 | goto noop; | ||
1519 | |||
1520 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); | ||
1521 | if (!in_sack) { | ||
1522 | /* TODO: head merge to next could be attempted here | ||
1523 | * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), | ||
1524 | * though it might not be worth of the additional hassle | ||
1525 | * | ||
1526 | * ...we can probably just fallback to what was done | ||
1527 | * previously. We could try merging non-SACKed ones | ||
1528 | * as well but it probably isn't going to buy off | ||
1529 | * because later SACKs might again split them, and | ||
1530 | * it would make skb timestamp tracking considerably | ||
1531 | * harder problem. | ||
1532 | */ | ||
1533 | goto fallback; | ||
1534 | } | ||
1535 | |||
1536 | len = end_seq - TCP_SKB_CB(skb)->seq; | ||
1537 | BUG_ON(len < 0); | ||
1538 | BUG_ON(len > skb->len); | ||
1539 | |||
1540 | /* MSS boundaries should be honoured or else pcount will | ||
1541 | * severely break even though it makes things bit trickier. | ||
1542 | * Optimize common case to avoid most of the divides | ||
1543 | */ | ||
1544 | mss = tcp_skb_mss(skb); | ||
1545 | |||
1546 | /* TODO: Fix DSACKs to not fragment already SACKed and we can | ||
1547 | * drop this restriction as unnecessary | ||
1548 | */ | ||
1549 | if (mss != tcp_skb_seglen(prev)) | ||
1550 | goto fallback; | ||
1551 | |||
1552 | if (len == mss) { | ||
1553 | pcount = 1; | ||
1554 | } else if (len < mss) { | ||
1555 | goto noop; | ||
1556 | } else { | ||
1557 | pcount = len / mss; | ||
1558 | len = pcount * mss; | ||
1559 | } | ||
1560 | } | ||
1561 | |||
1562 | if (!skb_shift(prev, skb, len)) | ||
1563 | goto fallback; | ||
1564 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss)) | ||
1565 | goto out; | ||
1566 | |||
1567 | /* Hole filled allows collapsing with the next as well, this is very | ||
1568 | * useful when hole on every nth skb pattern happens | ||
1569 | */ | ||
1570 | if (prev == tcp_write_queue_tail(sk)) | ||
1571 | goto out; | ||
1572 | skb = tcp_write_queue_next(sk, prev); | ||
1573 | |||
1574 | if (!skb_can_shift(skb) || | ||
1575 | (skb == tcp_send_head(sk)) || | ||
1576 | ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || | ||
1577 | (mss != tcp_skb_seglen(skb))) | ||
1578 | goto out; | ||
1579 | |||
1580 | len = skb->len; | ||
1581 | if (skb_shift(prev, skb, len)) { | ||
1582 | pcount += tcp_skb_pcount(skb); | ||
1583 | tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss); | ||
1584 | } | ||
1585 | |||
1586 | out: | ||
1587 | state->fack_count += pcount; | ||
1588 | return prev; | ||
1589 | |||
1590 | noop: | ||
1591 | return skb; | ||
1592 | |||
1593 | fallback: | ||
1594 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); | ||
1595 | return NULL; | ||
1351 | } | 1596 | } |
1352 | 1597 | ||
1353 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | 1598 | static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, |
1354 | struct tcp_sack_block *next_dup, | 1599 | struct tcp_sack_block *next_dup, |
1600 | struct tcp_sacktag_state *state, | ||
1355 | u32 start_seq, u32 end_seq, | 1601 | u32 start_seq, u32 end_seq, |
1356 | int dup_sack_in, int *fack_count, | 1602 | int dup_sack_in) |
1357 | int *reord, int *flag) | ||
1358 | { | 1603 | { |
1604 | struct tcp_sock *tp = tcp_sk(sk); | ||
1605 | struct sk_buff *tmp; | ||
1606 | |||
1359 | tcp_for_write_queue_from(skb, sk) { | 1607 | tcp_for_write_queue_from(skb, sk) { |
1360 | int in_sack = 0; | 1608 | int in_sack = 0; |
1361 | int dup_sack = dup_sack_in; | 1609 | int dup_sack = dup_sack_in; |
@@ -1376,17 +1624,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1376 | dup_sack = 1; | 1624 | dup_sack = 1; |
1377 | } | 1625 | } |
1378 | 1626 | ||
1379 | if (in_sack <= 0) | 1627 | /* skb reference here is a bit tricky to get right, since |
1380 | in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, | 1628 | * shifting can eat and free both this skb and the next, |
1381 | end_seq); | 1629 | * so not even _safe variant of the loop is enough. |
1630 | */ | ||
1631 | if (in_sack <= 0) { | ||
1632 | tmp = tcp_shift_skb_data(sk, skb, state, | ||
1633 | start_seq, end_seq, dup_sack); | ||
1634 | if (tmp != NULL) { | ||
1635 | if (tmp != skb) { | ||
1636 | skb = tmp; | ||
1637 | continue; | ||
1638 | } | ||
1639 | |||
1640 | in_sack = 0; | ||
1641 | } else { | ||
1642 | in_sack = tcp_match_skb_to_sack(sk, skb, | ||
1643 | start_seq, | ||
1644 | end_seq); | ||
1645 | } | ||
1646 | } | ||
1647 | |||
1382 | if (unlikely(in_sack < 0)) | 1648 | if (unlikely(in_sack < 0)) |
1383 | break; | 1649 | break; |
1384 | 1650 | ||
1385 | if (in_sack) | 1651 | if (in_sack) { |
1386 | *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, | 1652 | TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, |
1387 | *fack_count); | 1653 | state, |
1654 | dup_sack, | ||
1655 | tcp_skb_pcount(skb)); | ||
1656 | |||
1657 | if (!before(TCP_SKB_CB(skb)->seq, | ||
1658 | tcp_highest_sack_seq(tp))) | ||
1659 | tcp_advance_highest_sack(sk, skb); | ||
1660 | } | ||
1388 | 1661 | ||
1389 | *fack_count += tcp_skb_pcount(skb); | 1662 | state->fack_count += tcp_skb_pcount(skb); |
1390 | } | 1663 | } |
1391 | return skb; | 1664 | return skb; |
1392 | } | 1665 | } |
@@ -1395,16 +1668,17 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1395 | * a normal way | 1668 | * a normal way |
1396 | */ | 1669 | */ |
1397 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | 1670 | static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, |
1398 | u32 skip_to_seq, int *fack_count) | 1671 | struct tcp_sacktag_state *state, |
1672 | u32 skip_to_seq) | ||
1399 | { | 1673 | { |
1400 | tcp_for_write_queue_from(skb, sk) { | 1674 | tcp_for_write_queue_from(skb, sk) { |
1401 | if (skb == tcp_send_head(sk)) | 1675 | if (skb == tcp_send_head(sk)) |
1402 | break; | 1676 | break; |
1403 | 1677 | ||
1404 | if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) | 1678 | if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) |
1405 | break; | 1679 | break; |
1406 | 1680 | ||
1407 | *fack_count += tcp_skb_pcount(skb); | 1681 | state->fack_count += tcp_skb_pcount(skb); |
1408 | } | 1682 | } |
1409 | return skb; | 1683 | return skb; |
1410 | } | 1684 | } |
@@ -1412,18 +1686,17 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, | |||
1412 | static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, | 1686 | static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, |
1413 | struct sock *sk, | 1687 | struct sock *sk, |
1414 | struct tcp_sack_block *next_dup, | 1688 | struct tcp_sack_block *next_dup, |
1415 | u32 skip_to_seq, | 1689 | struct tcp_sacktag_state *state, |
1416 | int *fack_count, int *reord, | 1690 | u32 skip_to_seq) |
1417 | int *flag) | ||
1418 | { | 1691 | { |
1419 | if (next_dup == NULL) | 1692 | if (next_dup == NULL) |
1420 | return skb; | 1693 | return skb; |
1421 | 1694 | ||
1422 | if (before(next_dup->start_seq, skip_to_seq)) { | 1695 | if (before(next_dup->start_seq, skip_to_seq)) { |
1423 | skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); | 1696 | skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq); |
1424 | skb = tcp_sacktag_walk(skb, sk, NULL, | 1697 | skb = tcp_sacktag_walk(skb, sk, NULL, state, |
1425 | next_dup->start_seq, next_dup->end_seq, | 1698 | next_dup->start_seq, next_dup->end_seq, |
1426 | 1, fack_count, reord, flag); | 1699 | 1); |
1427 | } | 1700 | } |
1428 | 1701 | ||
1429 | return skb; | 1702 | return skb; |
@@ -1445,16 +1718,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1445 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); | 1718 | struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); |
1446 | struct tcp_sack_block sp[TCP_NUM_SACKS]; | 1719 | struct tcp_sack_block sp[TCP_NUM_SACKS]; |
1447 | struct tcp_sack_block *cache; | 1720 | struct tcp_sack_block *cache; |
1721 | struct tcp_sacktag_state state; | ||
1448 | struct sk_buff *skb; | 1722 | struct sk_buff *skb; |
1449 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); | 1723 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); |
1450 | int used_sacks; | 1724 | int used_sacks; |
1451 | int reord = tp->packets_out; | ||
1452 | int flag = 0; | ||
1453 | int found_dup_sack = 0; | 1725 | int found_dup_sack = 0; |
1454 | int fack_count; | ||
1455 | int i, j; | 1726 | int i, j; |
1456 | int first_sack_index; | 1727 | int first_sack_index; |
1457 | 1728 | ||
1729 | state.flag = 0; | ||
1730 | state.reord = tp->packets_out; | ||
1731 | |||
1458 | if (!tp->sacked_out) { | 1732 | if (!tp->sacked_out) { |
1459 | if (WARN_ON(tp->fackets_out)) | 1733 | if (WARN_ON(tp->fackets_out)) |
1460 | tp->fackets_out = 0; | 1734 | tp->fackets_out = 0; |
@@ -1464,7 +1738,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1464 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, | 1738 | found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, |
1465 | num_sacks, prior_snd_una); | 1739 | num_sacks, prior_snd_una); |
1466 | if (found_dup_sack) | 1740 | if (found_dup_sack) |
1467 | flag |= FLAG_DSACKING_ACK; | 1741 | state.flag |= FLAG_DSACKING_ACK; |
1468 | 1742 | ||
1469 | /* Eliminate too old ACKs, but take into | 1743 | /* Eliminate too old ACKs, but take into |
1470 | * account more or less fresh ones, they can | 1744 | * account more or less fresh ones, they can |
@@ -1533,7 +1807,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1533 | } | 1807 | } |
1534 | 1808 | ||
1535 | skb = tcp_write_queue_head(sk); | 1809 | skb = tcp_write_queue_head(sk); |
1536 | fack_count = 0; | 1810 | state.fack_count = 0; |
1537 | i = 0; | 1811 | i = 0; |
1538 | 1812 | ||
1539 | if (!tp->sacked_out) { | 1813 | if (!tp->sacked_out) { |
@@ -1558,7 +1832,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1558 | 1832 | ||
1559 | /* Event "B" in the comment above. */ | 1833 | /* Event "B" in the comment above. */ |
1560 | if (after(end_seq, tp->high_seq)) | 1834 | if (after(end_seq, tp->high_seq)) |
1561 | flag |= FLAG_DATA_LOST; | 1835 | state.flag |= FLAG_DATA_LOST; |
1562 | 1836 | ||
1563 | /* Skip too early cached blocks */ | 1837 | /* Skip too early cached blocks */ |
1564 | while (tcp_sack_cache_ok(tp, cache) && | 1838 | while (tcp_sack_cache_ok(tp, cache) && |
@@ -1571,13 +1845,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1571 | 1845 | ||
1572 | /* Head todo? */ | 1846 | /* Head todo? */ |
1573 | if (before(start_seq, cache->start_seq)) { | 1847 | if (before(start_seq, cache->start_seq)) { |
1574 | skb = tcp_sacktag_skip(skb, sk, start_seq, | 1848 | skb = tcp_sacktag_skip(skb, sk, &state, |
1575 | &fack_count); | 1849 | start_seq); |
1576 | skb = tcp_sacktag_walk(skb, sk, next_dup, | 1850 | skb = tcp_sacktag_walk(skb, sk, next_dup, |
1851 | &state, | ||
1577 | start_seq, | 1852 | start_seq, |
1578 | cache->start_seq, | 1853 | cache->start_seq, |
1579 | dup_sack, &fack_count, | 1854 | dup_sack); |
1580 | &reord, &flag); | ||
1581 | } | 1855 | } |
1582 | 1856 | ||
1583 | /* Rest of the block already fully processed? */ | 1857 | /* Rest of the block already fully processed? */ |
@@ -1585,9 +1859,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1585 | goto advance_sp; | 1859 | goto advance_sp; |
1586 | 1860 | ||
1587 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, | 1861 | skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, |
1588 | cache->end_seq, | 1862 | &state, |
1589 | &fack_count, &reord, | 1863 | cache->end_seq); |
1590 | &flag); | ||
1591 | 1864 | ||
1592 | /* ...tail remains todo... */ | 1865 | /* ...tail remains todo... */ |
1593 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { | 1866 | if (tcp_highest_sack_seq(tp) == cache->end_seq) { |
@@ -1595,13 +1868,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1595 | skb = tcp_highest_sack(sk); | 1868 | skb = tcp_highest_sack(sk); |
1596 | if (skb == NULL) | 1869 | if (skb == NULL) |
1597 | break; | 1870 | break; |
1598 | fack_count = tp->fackets_out; | 1871 | state.fack_count = tp->fackets_out; |
1599 | cache++; | 1872 | cache++; |
1600 | goto walk; | 1873 | goto walk; |
1601 | } | 1874 | } |
1602 | 1875 | ||
1603 | skb = tcp_sacktag_skip(skb, sk, cache->end_seq, | 1876 | skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); |
1604 | &fack_count); | ||
1605 | /* Check overlap against next cached too (past this one already) */ | 1877 | /* Check overlap against next cached too (past this one already) */ |
1606 | cache++; | 1878 | cache++; |
1607 | continue; | 1879 | continue; |
@@ -1611,20 +1883,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1611 | skb = tcp_highest_sack(sk); | 1883 | skb = tcp_highest_sack(sk); |
1612 | if (skb == NULL) | 1884 | if (skb == NULL) |
1613 | break; | 1885 | break; |
1614 | fack_count = tp->fackets_out; | 1886 | state.fack_count = tp->fackets_out; |
1615 | } | 1887 | } |
1616 | skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count); | 1888 | skb = tcp_sacktag_skip(skb, sk, &state, start_seq); |
1617 | 1889 | ||
1618 | walk: | 1890 | walk: |
1619 | skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, | 1891 | skb = tcp_sacktag_walk(skb, sk, next_dup, &state, |
1620 | dup_sack, &fack_count, &reord, &flag); | 1892 | start_seq, end_seq, dup_sack); |
1621 | 1893 | ||
1622 | advance_sp: | 1894 | advance_sp: |
1623 | /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct | 1895 | /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct |
1624 | * due to in-order walk | 1896 | * due to in-order walk |
1625 | */ | 1897 | */ |
1626 | if (after(end_seq, tp->frto_highmark)) | 1898 | if (after(end_seq, tp->frto_highmark)) |
1627 | flag &= ~FLAG_ONLY_ORIG_SACKED; | 1899 | state.flag &= ~FLAG_ONLY_ORIG_SACKED; |
1628 | 1900 | ||
1629 | i++; | 1901 | i++; |
1630 | } | 1902 | } |
@@ -1641,10 +1913,10 @@ advance_sp: | |||
1641 | 1913 | ||
1642 | tcp_verify_left_out(tp); | 1914 | tcp_verify_left_out(tp); |
1643 | 1915 | ||
1644 | if ((reord < tp->fackets_out) && | 1916 | if ((state.reord < tp->fackets_out) && |
1645 | ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && | 1917 | ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && |
1646 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) | 1918 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) |
1647 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 1919 | tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); |
1648 | 1920 | ||
1649 | out: | 1921 | out: |
1650 | 1922 | ||
@@ -1654,13 +1926,13 @@ out: | |||
1654 | WARN_ON((int)tp->retrans_out < 0); | 1926 | WARN_ON((int)tp->retrans_out < 0); |
1655 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); | 1927 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); |
1656 | #endif | 1928 | #endif |
1657 | return flag; | 1929 | return state.flag; |
1658 | } | 1930 | } |
1659 | 1931 | ||
1660 | /* Limits sacked_out so that sum with lost_out isn't ever larger than | 1932 | /* Limits sacked_out so that sum with lost_out isn't ever larger than |
1661 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. | 1933 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. |
1662 | */ | 1934 | */ |
1663 | int tcp_limit_reno_sacked(struct tcp_sock *tp) | 1935 | static int tcp_limit_reno_sacked(struct tcp_sock *tp) |
1664 | { | 1936 | { |
1665 | u32 holes; | 1937 | u32 holes; |
1666 | 1938 | ||
@@ -2336,9 +2608,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2336 | struct inet_sock *inet = inet_sk(sk); | 2608 | struct inet_sock *inet = inet_sk(sk); |
2337 | 2609 | ||
2338 | if (sk->sk_family == AF_INET) { | 2610 | if (sk->sk_family == AF_INET) { |
2339 | printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", | 2611 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
2340 | msg, | 2612 | msg, |
2341 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 2613 | &inet->daddr, ntohs(inet->dport), |
2342 | tp->snd_cwnd, tcp_left_out(tp), | 2614 | tp->snd_cwnd, tcp_left_out(tp), |
2343 | tp->snd_ssthresh, tp->prior_ssthresh, | 2615 | tp->snd_ssthresh, tp->prior_ssthresh, |
2344 | tp->packets_out); | 2616 | tp->packets_out); |
@@ -2346,9 +2618,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2346 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 2618 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2347 | else if (sk->sk_family == AF_INET6) { | 2619 | else if (sk->sk_family == AF_INET6) { |
2348 | struct ipv6_pinfo *np = inet6_sk(sk); | 2620 | struct ipv6_pinfo *np = inet6_sk(sk); |
2349 | printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", | 2621 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
2350 | msg, | 2622 | msg, |
2351 | NIP6(np->daddr), ntohs(inet->dport), | 2623 | &np->daddr, ntohs(inet->dport), |
2352 | tp->snd_cwnd, tcp_left_out(tp), | 2624 | tp->snd_cwnd, tcp_left_out(tp), |
2353 | tp->snd_ssthresh, tp->prior_ssthresh, | 2625 | tp->snd_ssthresh, tp->prior_ssthresh, |
2354 | tp->packets_out); | 2626 | tp->packets_out); |
@@ -2559,6 +2831,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) | |||
2559 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 2831 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
2560 | } | 2832 | } |
2561 | 2833 | ||
2834 | /* Do a simple retransmit without using the backoff mechanisms in | ||
2835 | * tcp_timer. This is used for path mtu discovery. | ||
2836 | * The socket is already locked here. | ||
2837 | */ | ||
2838 | void tcp_simple_retransmit(struct sock *sk) | ||
2839 | { | ||
2840 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2841 | struct tcp_sock *tp = tcp_sk(sk); | ||
2842 | struct sk_buff *skb; | ||
2843 | unsigned int mss = tcp_current_mss(sk, 0); | ||
2844 | u32 prior_lost = tp->lost_out; | ||
2845 | |||
2846 | tcp_for_write_queue(skb, sk) { | ||
2847 | if (skb == tcp_send_head(sk)) | ||
2848 | break; | ||
2849 | if (tcp_skb_seglen(skb) > mss && | ||
2850 | !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | ||
2851 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | ||
2852 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
2853 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
2854 | } | ||
2855 | tcp_skb_mark_lost_uncond_verify(tp, skb); | ||
2856 | } | ||
2857 | } | ||
2858 | |||
2859 | tcp_clear_retrans_hints_partial(tp); | ||
2860 | |||
2861 | if (prior_lost == tp->lost_out) | ||
2862 | return; | ||
2863 | |||
2864 | if (tcp_is_reno(tp)) | ||
2865 | tcp_limit_reno_sacked(tp); | ||
2866 | |||
2867 | tcp_verify_left_out(tp); | ||
2868 | |||
2869 | /* Don't muck with the congestion window here. | ||
2870 | * Reason is that we do not increase amount of _data_ | ||
2871 | * in network, but units changed and effective | ||
2872 | * cwnd/ssthresh really reduced now. | ||
2873 | */ | ||
2874 | if (icsk->icsk_ca_state != TCP_CA_Loss) { | ||
2875 | tp->high_seq = tp->snd_nxt; | ||
2876 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
2877 | tp->prior_ssthresh = 0; | ||
2878 | tp->undo_marker = 0; | ||
2879 | tcp_set_ca_state(sk, TCP_CA_Loss); | ||
2880 | } | ||
2881 | tcp_xmit_retransmit_queue(sk); | ||
2882 | } | ||
2883 | |||
2562 | /* Process an event, which can update packets-in-flight not trivially. | 2884 | /* Process an event, which can update packets-in-flight not trivially. |
2563 | * Main goal of this function is to calculate new estimate for left_out, | 2885 | * Main goal of this function is to calculate new estimate for left_out, |
2564 | * taking into account both packets sitting in receiver's buffer and | 2886 | * taking into account both packets sitting in receiver's buffer and |
@@ -2730,6 +3052,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
2730 | tcp_xmit_retransmit_queue(sk); | 3052 | tcp_xmit_retransmit_queue(sk); |
2731 | } | 3053 | } |
2732 | 3054 | ||
3055 | static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | ||
3056 | { | ||
3057 | tcp_rtt_estimator(sk, seq_rtt); | ||
3058 | tcp_set_rto(sk); | ||
3059 | inet_csk(sk)->icsk_backoff = 0; | ||
3060 | } | ||
3061 | |||
2733 | /* Read draft-ietf-tcplw-high-performance before mucking | 3062 | /* Read draft-ietf-tcplw-high-performance before mucking |
2734 | * with this code. (Supersedes RFC1323) | 3063 | * with this code. (Supersedes RFC1323) |
2735 | */ | 3064 | */ |
@@ -2751,11 +3080,8 @@ static void tcp_ack_saw_tstamp(struct sock *sk, int flag) | |||
2751 | * in window is lost... Voila. --ANK (010210) | 3080 | * in window is lost... Voila. --ANK (010210) |
2752 | */ | 3081 | */ |
2753 | struct tcp_sock *tp = tcp_sk(sk); | 3082 | struct tcp_sock *tp = tcp_sk(sk); |
2754 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 3083 | |
2755 | tcp_rtt_estimator(sk, seq_rtt); | 3084 | tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); |
2756 | tcp_set_rto(sk); | ||
2757 | inet_csk(sk)->icsk_backoff = 0; | ||
2758 | tcp_bound_rto(sk); | ||
2759 | } | 3085 | } |
2760 | 3086 | ||
2761 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) | 3087 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) |
@@ -2772,10 +3098,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) | |||
2772 | if (flag & FLAG_RETRANS_DATA_ACKED) | 3098 | if (flag & FLAG_RETRANS_DATA_ACKED) |
2773 | return; | 3099 | return; |
2774 | 3100 | ||
2775 | tcp_rtt_estimator(sk, seq_rtt); | 3101 | tcp_valid_rtt_meas(sk, seq_rtt); |
2776 | tcp_set_rto(sk); | ||
2777 | inet_csk(sk)->icsk_backoff = 0; | ||
2778 | tcp_bound_rto(sk); | ||
2779 | } | 3102 | } |
2780 | 3103 | ||
2781 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 3104 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |