aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-06-26 19:14:15 -0400
committerDavid S. Miller <davem@davemloft.net>2012-06-27 18:34:24 -0400
commitc074da2810c118b3812f32d6754bd9ead2f169e7 (patch)
tree772c7fbb9da464f9afd6d56e9e610157ed665e8f /net/ipv4/ip_input.c
parent93040ae5cc8dcc893eca4a4366dc8415af278edf (diff)
ipv4: tcp: dont cache unconfirmed intput dst
DDOS synflood attacks hit badly IP route cache. On typical machines, this cache is allowed to hold up to 8 Millions dst entries, 256 bytes for each, for a total of 2GB of memory. rt_garbage_collect() triggers and tries to cleanup things. Eventually route cache is disabled but machine is under fire and might OOM and crash. This patch exploits the new TCP early demux, to set a nocache boolean in case incoming TCP frame is for a not yet ESTABLISHED or TIMEWAIT socket. This 'nocache' boolean is then used in case dst entry is not found in route cache, to create an unhashed dst entry (DST_NOCACHE) SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache output dst for syncookies), so after this patch, a machine is able to absorb a DDOS synflood attack without polluting its IP route cache. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Hans Schillstrom <hans.schillstrom@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_input.c')
-rw-r--r--net/ipv4/ip_input.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2a39204de5bc..7be54c8dcbe2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
326 */ 326 */
327 if (skb_dst(skb) == NULL) { 327 if (skb_dst(skb) == NULL) {
328 int err = -ENOENT; 328 int err = -ENOENT;
329 bool nocache = false;
329 330
330 if (sysctl_ip_early_demux) { 331 if (sysctl_ip_early_demux) {
331 const struct net_protocol *ipprot; 332 const struct net_protocol *ipprot;
@@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb)
334 rcu_read_lock(); 335 rcu_read_lock();
335 ipprot = rcu_dereference(inet_protos[protocol]); 336 ipprot = rcu_dereference(inet_protos[protocol]);
336 if (ipprot && ipprot->early_demux) 337 if (ipprot && ipprot->early_demux)
337 err = ipprot->early_demux(skb); 338 err = ipprot->early_demux(skb, &nocache);
338 rcu_read_unlock(); 339 rcu_read_unlock();
339 } 340 }
340 341
341 if (err) { 342 if (err) {
342 err = ip_route_input_noref(skb, iph->daddr, iph->saddr, 343 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
343 iph->tos, skb->dev); 344 iph->tos, skb->dev, nocache);
344 if (unlikely(err)) { 345 if (unlikely(err)) {
345 if (err == -EXDEV) 346 if (err == -EXDEV)
346 NET_INC_STATS_BH(dev_net(skb->dev), 347 NET_INC_STATS_BH(dev_net(skb->dev),