aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2015-02-06 16:04:38 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-08 04:03:12 -0500
commit032ee4236954eb214651cb9bfc1b38ffa8fd7a01 (patch)
treedf165996666757322162c263cebcec8fe3c93d1a /include/net/tcp.h
parentca539345f8767cca221b5aa77bf4329c725d0d7e (diff)
tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks
Helpers for mitigating ACK loops by rate-limiting dupacks sent in response to incoming out-of-window packets. This patch includes: - rate-limiting logic - sysctl to control how often we allow dupacks to out-of-window packets - SNMP counter for cases where we rate-limited our dupack sending The rate-limiting logic in this patch decides to not send dupacks in response to out-of-window segments if (a) they are SYNs or pure ACKs and (b) the remote endpoint is sending them faster than the configured rate limit. We rate-limit our responses rather than blocking them entirely or resetting the connection, because legitimate connections can rely on dupacks in response to some out-of-window segments. For example, zero window probes are typically sent with a sequence number that is below the current window, and ZWPs thus expect to thus elicit a dupack in response. We allow dupacks in response to TCP segments with data, because these may be spurious retransmissions for which the remote endpoint wants to receive DSACKs. This is safe because segments with data can't realistically be part of ACK loops, which by their nature consist of each side sending pure/data-less ACKs to each other. The dupack interval is controlled by a new sysctl knob, tcp_invalid_ratelimit, given in milliseconds, in case an administrator needs to dial this upward in the face of a high-rate DoS attack. The name and units are chosen to be analogous to the existing analogous knob for ICMP, icmp_ratelimit. The default value for tcp_invalid_ratelimit is 500ms, which allows at most one such dupack per 500ms. This is chosen to be 2x faster than the 1-second minimum RTO interval allowed by RFC 6298 (section 2, rule 2.4). We allow the extra 2x factor because network delay variations can cause packets sent at 1 second intervals to be compressed and arrive much closer. Reported-by: Avery Fay <avery@mixpanel.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h32
1 files changed, 32 insertions, 0 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 28e9bd3abceb..b81f45c67b2e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -274,6 +274,7 @@ extern int sysctl_tcp_challenge_ack_limit;
274extern unsigned int sysctl_tcp_notsent_lowat; 274extern unsigned int sysctl_tcp_notsent_lowat;
275extern int sysctl_tcp_min_tso_segs; 275extern int sysctl_tcp_min_tso_segs;
276extern int sysctl_tcp_autocorking; 276extern int sysctl_tcp_autocorking;
277extern int sysctl_tcp_invalid_ratelimit;
277 278
278extern atomic_long_t tcp_memory_allocated; 279extern atomic_long_t tcp_memory_allocated;
279extern struct percpu_counter tcp_sockets_allocated; 280extern struct percpu_counter tcp_sockets_allocated;
@@ -1236,6 +1237,37 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1236 return true; 1237 return true;
1237} 1238}
1238 1239
1240/* Return true if we're currently rate-limiting out-of-window ACKs and
1241 * thus shouldn't send a dupack right now. We rate-limit dupacks in
1242 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
1243 * attacks that send repeated SYNs or ACKs for the same connection. To
1244 * do this, we do not send a duplicate SYNACK or ACK if the remote
1245 * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
1246 */
1247static inline bool tcp_oow_rate_limited(struct net *net,
1248 const struct sk_buff *skb,
1249 int mib_idx, u32 *last_oow_ack_time)
1250{
1251 /* Data packets without SYNs are not likely part of an ACK loop. */
1252 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
1253 !tcp_hdr(skb)->syn)
1254 goto not_rate_limited;
1255
1256 if (*last_oow_ack_time) {
1257 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1258
1259 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1260 NET_INC_STATS_BH(net, mib_idx);
1261 return true; /* rate-limited: don't send yet! */
1262 }
1263 }
1264
1265 *last_oow_ack_time = tcp_time_stamp;
1266
1267not_rate_limited:
1268 return false; /* not rate-limited: go ahead, send dupack now! */
1269}
1270
1239static inline void tcp_mib_init(struct net *net) 1271static inline void tcp_mib_init(struct net *net)
1240{ 1272{
1241 /* See RFC 2012 */ 1273 /* See RFC 2012 */