diff options
author | Eric Dumazet <edumazet@google.com> | 2012-06-26 19:14:15 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-06-27 18:34:24 -0400 |
commit | c074da2810c118b3812f32d6754bd9ead2f169e7 (patch) | |
tree | 772c7fbb9da464f9afd6d56e9e610157ed665e8f /net/ipv4/ip_input.c | |
parent | 93040ae5cc8dcc893eca4a4366dc8415af278edf (diff) |
ipv4: tcp: dont cache unconfirmed intput dst
DDOS synflood attacks hit badly IP route cache.
On typical machines, this cache is allowed to hold up to 8 Millions dst
entries, 256 bytes for each, for a total of 2GB of memory.
rt_garbage_collect() triggers and tries to cleanup things.
Eventually route cache is disabled but machine is under fire and might
OOM and crash.
This patch exploits the new TCP early demux, to set a nocache
boolean in case incoming TCP frame is for a not yet ESTABLISHED or
TIMEWAIT socket.
This 'nocache' boolean is then used in case dst entry is not found in
route cache, to create an unhashed dst entry (DST_NOCACHE)
SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache
output dst for syncookies), so after this patch, a machine is able to
absorb a DDOS synflood attack without polluting its IP route cache.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_input.c')
-rw-r--r-- | net/ipv4/ip_input.c | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2a39204de5bc..7be54c8dcbe2 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
326 | */ | 326 | */ |
327 | if (skb_dst(skb) == NULL) { | 327 | if (skb_dst(skb) == NULL) { |
328 | int err = -ENOENT; | 328 | int err = -ENOENT; |
329 | bool nocache = false; | ||
329 | 330 | ||
330 | if (sysctl_ip_early_demux) { | 331 | if (sysctl_ip_early_demux) { |
331 | const struct net_protocol *ipprot; | 332 | const struct net_protocol *ipprot; |
@@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
334 | rcu_read_lock(); | 335 | rcu_read_lock(); |
335 | ipprot = rcu_dereference(inet_protos[protocol]); | 336 | ipprot = rcu_dereference(inet_protos[protocol]); |
336 | if (ipprot && ipprot->early_demux) | 337 | if (ipprot && ipprot->early_demux) |
337 | err = ipprot->early_demux(skb); | 338 | err = ipprot->early_demux(skb, &nocache); |
338 | rcu_read_unlock(); | 339 | rcu_read_unlock(); |
339 | } | 340 | } |
340 | 341 | ||
341 | if (err) { | 342 | if (err) { |
342 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | 343 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
343 | iph->tos, skb->dev); | 344 | iph->tos, skb->dev, nocache); |
344 | if (unlikely(err)) { | 345 | if (unlikely(err)) { |
345 | if (err == -EXDEV) | 346 | if (err == -EXDEV) |
346 | NET_INC_STATS_BH(dev_net(skb->dev), | 347 | NET_INC_STATS_BH(dev_net(skb->dev), |