[TCP]: Discard fuzzy SACK blocks

SACK processing code has been a sort of russian roulette as no validation of SACK blocks is previously attempted. Besides, it is not very clear what all kinds of broken SACK blocks really mean (e.g., one that has start and end sequence numbers reversed). So now close the roulette once and for all. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> 2007-08-25 01:54:44 -0400
committer: David S. Miller <davem@sunset.davemloft.net> 2007-10-10 19:48:29 -0400
commit: 5b3c98821a8753239aefc1c217409aa3e5c90787 (patch)
tree: 39d4d2bc47ad533cd084a900fa94542ef1b6e8bf /net/ipv4/tcp_input.c
parent: 6728e7dc3e577241f36921c720cfb4eb8f5aed1a (diff)
1 files changed, 82 insertions, 0 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0ead46f2bcd5..a2364ebf8585 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1019,7 +1019,86 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 *    for retransmitted and already SACKed segment -> reordering..
 * Both of these heuristics are not used in Loss state, when we cannot
 * account for retransmits accurately.
+ *
+ * SACK block validation.
+ * ----------------------
+ *
+ * SACK block range validation checks that the received SACK block fits to
+ * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
+ * Note that SND.UNA is not included to the range though being valid because
+ * it means that the receiver is rather inconsistent with itself (reports
+ * SACK reneging when it should advance SND.UNA).
+ *
+ * Implements also blockage to start_seq wrap-around. Problem lies in the
+ * fact that though start_seq (s) is before end_seq (i.e., not reversed),
+ * there's no guarantee that it will be before snd_nxt (n). The problem
+ * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt
+ * wrap (s_w):
+ *
+ *         <- outs wnd ->                          <- wrapzone ->
+ *         u     e      n                         u_w   e_w  s n_w
+ *         |     |      |                          |     |   |  |
+ * |<------------+------+----- TCP seqno space --------------+---------->|
+ * ...-- <2^31 ->|                                           |<--------...
+ * ...---- >2^31 ------>|                                    |<--------...
+ *
+ * Current code wouldn't be vulnerable but it's better still to discard such
+ * crazy SACK blocks. Doing this check for start_seq alone closes somewhat
+ * similar case (end_seq after snd_nxt wrap) as earlier reversed check in
+ * snd_nxt wrap -> snd_una region will then become "well defined", i.e.,
+ * equal to the ideal case (infinite seqno space without wrap caused issues).
+ *
+ * With D-SACK the lower bound is extended to cover sequence space below
+ * SND.UNA down to undo_marker, which is the last point of interest. Yet
+ * again, DSACK block must not to go across snd_una (for the same reason as
+ * for the normal SACK blocks, explained above). But there all simplicity
+ * ends, TCP might receive valid D-SACKs below that. As long as they reside
+ * fully below undo_marker they do not affect behavior in anyway and can
+ * therefore be safely ignored. In rare cases (which are more or less
+ * theoretical ones), the D-SACK will nicely cross that boundary due to skb
+ * fragmentation and packet reordering past skb's retransmission. To consider
+ * them correctly, the acceptable range must be extended even more though
+ * the exact amount is rather hard to quantify. However, tp->max_window can
+ * be used as an exaggerated estimate.
 */
+static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
+                                  u32 start_seq, u32 end_seq)
+{
+        /* Too far in future, or reversed (interpretation is ambiguous) */
+        if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
+                return 0;
+        /* Nasty start_seq wrap-around check (see comments above) */
+        if (!before(start_seq, tp->snd_nxt))
+                return 0;
+        /* In outstanding window? ...This is valid exit for DSACKs too.
+         * start_seq == snd_una is non-sensical (see comments above)
+         */
+        if (after(start_seq, tp->snd_una))
+                return 1;
+        if (!is_dsack || !tp->undo_marker)
+                return 0;
+        /* ...Then it's D-SACK, and must reside below snd_una completely */
+        if (!after(end_seq, tp->snd_una))
+                return 0;
+        if (!before(start_seq, tp->undo_marker))
+                return 1;
+        /* Too old */
+        if (!after(end_seq, tp->undo_marker))
+                return 0;
+        /* Undo_marker boundary crossing (overestimates a lot). Known already:
+         *   start_seq < undo_marker and end_seq >= undo_marker.
+         */
+        return !before(start_seq, end_seq - tp->max_window);
+}
 static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
                           struct tcp_sack_block_wire *sp, int num_sacks,
                           u32 prior_snd_una)
@@ -1161,6 +1240,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                int fack_count;
                int dup_sack = (found_dup_sack && (i == first_sack_index));
+                if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq))
+                        continue;
                skb = cached_skb;
                fack_count = cached_fack_count;
author	Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>	2007-08-25 01:54:44 -0400
committer	David S. Miller <davem@sunset.davemloft.net>	2007-10-10 19:48:29 -0400
commit	5b3c98821a8753239aefc1c217409aa3e5c90787 (patch)
tree	39d4d2bc47ad533cd084a900fa94542ef1b6e8bf /net/ipv4/tcp_input.c
parent	6728e7dc3e577241f36921c720cfb4eb8f5aed1a (diff)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ead46f2bcd5..a2364ebf8585 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -1019,7 +1019,86 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
1019	* for retransmitted and already SACKed segment -> reordering..	1019	* for retransmitted and already SACKed segment -> reordering..
1020	* Both of these heuristics are not used in Loss state, when we cannot	1020	* Both of these heuristics are not used in Loss state, when we cannot
1021	* account for retransmits accurately.	1021	* account for retransmits accurately.
		1022	*
		1023	* SACK block validation.
		1024	* ----------------------
		1025	*
		1026	* SACK block range validation checks that the received SACK block fits to
		1027	* the expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
		1028	* Note that SND.UNA is not included to the range though being valid because
		1029	* it means that the receiver is rather inconsistent with itself (reports
		1030	* SACK reneging when it should advance SND.UNA).
		1031	*
		1032	* Implements also blockage to start_seq wrap-around. Problem lies in the
		1033	* fact that though start_seq (s) is before end_seq (i.e., not reversed),
		1034	* there's no guarantee that it will be before snd_nxt (n). The problem
		1035	* happens when start_seq resides between end_seq wrap (e_w) and snd_nxt
		1036	* wrap (s_w):
		1037	*
		1038	* <- outs wnd -> <- wrapzone ->
		1039	* u e n u_w e_w s n_w
		1040	* \| \| \| \| \| \| \|
		1041	* \|<------------+------+----- TCP seqno space --------------+---------->\|
		1042	* ...-- <2^31 ->\| \|<--------...
		1043	* ...---- >2^31 ------>\| \|<--------...
		1044	*
		1045	* Current code wouldn't be vulnerable but it's better still to discard such
		1046	* crazy SACK blocks. Doing this check for start_seq alone closes somewhat
		1047	* similar case (end_seq after snd_nxt wrap) as earlier reversed check in
		1048	* snd_nxt wrap -> snd_una region will then become "well defined", i.e.,
		1049	* equal to the ideal case (infinite seqno space without wrap caused issues).
		1050	*
		1051	* With D-SACK the lower bound is extended to cover sequence space below
		1052	* SND.UNA down to undo_marker, which is the last point of interest. Yet
		1053	* again, DSACK block must not to go across snd_una (for the same reason as
		1054	* for the normal SACK blocks, explained above). But there all simplicity
		1055	* ends, TCP might receive valid D-SACKs below that. As long as they reside
		1056	* fully below undo_marker they do not affect behavior in anyway and can
		1057	* therefore be safely ignored. In rare cases (which are more or less
		1058	* theoretical ones), the D-SACK will nicely cross that boundary due to skb
		1059	* fragmentation and packet reordering past skb's retransmission. To consider
		1060	* them correctly, the acceptable range must be extended even more though
		1061	* the exact amount is rather hard to quantify. However, tp->max_window can
		1062	* be used as an exaggerated estimate.
1022	*/	1063	*/
		1064	static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
		1065	u32 start_seq, u32 end_seq)
		1066	{
		1067	/* Too far in future, or reversed (interpretation is ambiguous) */
		1068	if (after(end_seq, tp->snd_nxt) \|\| !before(start_seq, end_seq))
		1069	return 0;
		1070
		1071	/* Nasty start_seq wrap-around check (see comments above) */
		1072	if (!before(start_seq, tp->snd_nxt))
		1073	return 0;
		1074
		1075	/* In outstanding window? ...This is valid exit for DSACKs too.
		1076	* start_seq == snd_una is non-sensical (see comments above)
		1077	*/
		1078	if (after(start_seq, tp->snd_una))
		1079	return 1;
		1080
		1081	if (!is_dsack \|\| !tp->undo_marker)
		1082	return 0;
		1083
		1084	/* ...Then it's D-SACK, and must reside below snd_una completely */
		1085	if (!after(end_seq, tp->snd_una))
		1086	return 0;
		1087
		1088	if (!before(start_seq, tp->undo_marker))
		1089	return 1;
		1090
		1091	/* Too old */
		1092	if (!after(end_seq, tp->undo_marker))
		1093	return 0;
		1094
		1095	/* Undo_marker boundary crossing (overestimates a lot). Known already:
		1096	* start_seq < undo_marker and end_seq >= undo_marker.
		1097	*/
		1098	return !before(start_seq, end_seq - tp->max_window);
		1099	}
		1100
		1101
1023	static int tcp_check_dsack(struct tcp_sock tp, struct sk_buff ack_skb,	1102	static int tcp_check_dsack(struct tcp_sock tp, struct sk_buff ack_skb,
1024	struct tcp_sack_block_wire *sp, int num_sacks,	1103	struct tcp_sack_block_wire *sp, int num_sacks,
1025	u32 prior_snd_una)	1104	u32 prior_snd_una)
@@ -1161,6 +1240,9 @@ tcp_sacktag_write_queue(struct sock sk, struct sk_buff ack_skb, u32 prior_snd_
1161	int fack_count;	1240	int fack_count;
1162	int dup_sack = (found_dup_sack && (i == first_sack_index));	1241	int dup_sack = (found_dup_sack && (i == first_sack_index));
1163		1242
		1243	if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq))
		1244	continue;
		1245
1164	skb = cached_skb;	1246	skb = cached_skb;
1165	fack_count = cached_fack_count;	1247	fack_count = cached_fack_count;
1166		1248