diff options
author | Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> | 2007-08-25 01:54:44 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-10-10 19:48:29 -0400 |
commit | 5b3c98821a8753239aefc1c217409aa3e5c90787 (patch) | |
tree | 39d4d2bc47ad533cd084a900fa94542ef1b6e8bf /net | |
parent | 6728e7dc3e577241f36921c720cfb4eb8f5aed1a (diff) |
[TCP]: Discard fuzzy SACK blocks
SACK processing code has been a sort of russian roulette as no
validation of SACK blocks is previously attempted. Besides, it
is not very clear what all kinds of broken SACK blocks really
mean (e.g., one that has start and end sequence numbers
reversed). So now close the roulette once and for all.
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp_input.c | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ead46f2bcd5..a2364ebf8585 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1019,7 +1019,86 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
1019 | * for retransmitted and already SACKed segment -> reordering.. | 1019 | * for retransmitted and already SACKed segment -> reordering.. |
1020 | * Both of these heuristics are not used in Loss state, when we cannot | 1020 | * Both of these heuristics are not used in Loss state, when we cannot |
1021 | * account for retransmits accurately. | 1021 | * account for retransmits accurately. |
1022 | * | ||
1023 | * SACK block validation. | ||
1024 | * ---------------------- | ||
1025 | * | ||
1026 | * SACK block range validation checks that the received SACK block fits to | ||
1027 | * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT. | ||
1028 | * Note that SND.UNA is not included to the range though being valid because | ||
1029 | * it means that the receiver is rather inconsistent with itself (reports | ||
1030 | * SACK reneging when it should advance SND.UNA). | ||
1031 | * | ||
1032 | * Implements also blockage to start_seq wrap-around. Problem lies in the | ||
1033 | * fact that though start_seq (s) is before end_seq (i.e., not reversed), | ||
1034 | * there's no guarantee that it will be before snd_nxt (n). The problem | ||
1035 | * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt | ||
1036 | * wrap (s_w): | ||
1037 | * | ||
1038 | * <- outs wnd -> <- wrapzone -> | ||
1039 | * u e n u_w e_w s n_w | ||
1040 | * | | | | | | | | ||
1041 | * |<------------+------+----- TCP seqno space --------------+---------->| | ||
1042 | * ...-- <2^31 ->| |<--------... | ||
1043 | * ...---- >2^31 ------>| |<--------... | ||
1044 | * | ||
1045 | * Current code wouldn't be vulnerable but it's better still to discard such | ||
1046 | * crazy SACK blocks. Doing this check for start_seq alone closes somewhat | ||
1047 | * similar case (end_seq after snd_nxt wrap) as earlier reversed check in | ||
1048 | * snd_nxt wrap -> snd_una region will then become "well defined", i.e., | ||
1049 | * equal to the ideal case (infinite seqno space without wrap caused issues). | ||
1050 | * | ||
1051 | * With D-SACK the lower bound is extended to cover sequence space below | ||
1052 | * SND.UNA down to undo_marker, which is the last point of interest. Yet | ||
1053 | * again, DSACK block must not to go across snd_una (for the same reason as | ||
1054 | * for the normal SACK blocks, explained above). But there all simplicity | ||
1055 | * ends, TCP might receive valid D-SACKs below that. As long as they reside | ||
1056 | * fully below undo_marker they do not affect behavior in anyway and can | ||
1057 | * therefore be safely ignored. In rare cases (which are more or less | ||
1058 | * theoretical ones), the D-SACK will nicely cross that boundary due to skb | ||
1059 | * fragmentation and packet reordering past skb's retransmission. To consider | ||
1060 | * them correctly, the acceptable range must be extended even more though | ||
1061 | * the exact amount is rather hard to quantify. However, tp->max_window can | ||
1062 | * be used as an exaggerated estimate. | ||
1022 | */ | 1063 | */ |
1064 | static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | ||
1065 | u32 start_seq, u32 end_seq) | ||
1066 | { | ||
1067 | /* Too far in future, or reversed (interpretation is ambiguous) */ | ||
1068 | if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) | ||
1069 | return 0; | ||
1070 | |||
1071 | /* Nasty start_seq wrap-around check (see comments above) */ | ||
1072 | if (!before(start_seq, tp->snd_nxt)) | ||
1073 | return 0; | ||
1074 | |||
1075 | /* In outstanding window? ...This is valid exit for DSACKs too. | ||
1076 | * start_seq == snd_una is non-sensical (see comments above) | ||
1077 | */ | ||
1078 | if (after(start_seq, tp->snd_una)) | ||
1079 | return 1; | ||
1080 | |||
1081 | if (!is_dsack || !tp->undo_marker) | ||
1082 | return 0; | ||
1083 | |||
1084 | /* ...Then it's D-SACK, and must reside below snd_una completely */ | ||
1085 | if (!after(end_seq, tp->snd_una)) | ||
1086 | return 0; | ||
1087 | |||
1088 | if (!before(start_seq, tp->undo_marker)) | ||
1089 | return 1; | ||
1090 | |||
1091 | /* Too old */ | ||
1092 | if (!after(end_seq, tp->undo_marker)) | ||
1093 | return 0; | ||
1094 | |||
1095 | /* Undo_marker boundary crossing (overestimates a lot). Known already: | ||
1096 | * start_seq < undo_marker and end_seq >= undo_marker. | ||
1097 | */ | ||
1098 | return !before(start_seq, end_seq - tp->max_window); | ||
1099 | } | ||
1100 | |||
1101 | |||
1023 | static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, | 1102 | static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, |
1024 | struct tcp_sack_block_wire *sp, int num_sacks, | 1103 | struct tcp_sack_block_wire *sp, int num_sacks, |
1025 | u32 prior_snd_una) | 1104 | u32 prior_snd_una) |
@@ -1161,6 +1240,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1161 | int fack_count; | 1240 | int fack_count; |
1162 | int dup_sack = (found_dup_sack && (i == first_sack_index)); | 1241 | int dup_sack = (found_dup_sack && (i == first_sack_index)); |
1163 | 1242 | ||
1243 | if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) | ||
1244 | continue; | ||
1245 | |||
1164 | skb = cached_skb; | 1246 | skb = cached_skb; |
1165 | fack_count = cached_fack_count; | 1247 | fack_count = cached_fack_count; |
1166 | 1248 | ||