aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHannes Frederic Sowa <hannes@stressinduktion.org>2015-01-23 06:01:26 -0500
committerDavid S. Miller <davem@davemloft.net>2015-01-26 20:28:27 -0500
commitdf4d92549f23e1c037e83323aff58a21b3de7fe0 (patch)
treecb82536dd48281496b6652b628266cdcacffb91d
parent412d2907c41531b60c7ea1f38cfe1116daf2a229 (diff)
ipv4: try to cache dst_entries which would cause a redirect
Not caching dst_entries which cause redirects could be exploited by hosts on the same subnet, causing a severe DoS attack. This effect aggravated since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()"). Lookups causing redirects will be allocated with DST_NOCACHE set which will force dst_release to free them via RCU. Unfortunately waiting for RCU grace period just takes too long, we can end up with >1M dst_entries waiting to be released and the system will run OOM. rcuos threads cannot catch up under high softirq load. Attaching the flag to emit a redirect later on to the specific skb allows us to cache those dst_entries thus reducing the pressure on allocation and deallocation. This issue was discovered by Marcelo Leitner. Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: Marcelo Leitner <mleitner@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip.h11
-rw-r--r--net/ipv4/ip_forward.c3
-rw-r--r--net/ipv4/route.c9
3 files changed, 13 insertions, 10 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..f7cbd703d15d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -39,11 +39,12 @@ struct inet_skb_parm {
39 struct ip_options opt; /* Compiled IP options */ 39 struct ip_options opt; /* Compiled IP options */
40 unsigned char flags; 40 unsigned char flags;
41 41
42#define IPSKB_FORWARDED 1 42#define IPSKB_FORWARDED BIT(0)
43#define IPSKB_XFRM_TUNNEL_SIZE 2 43#define IPSKB_XFRM_TUNNEL_SIZE BIT(1)
44#define IPSKB_XFRM_TRANSFORMED 4 44#define IPSKB_XFRM_TRANSFORMED BIT(2)
45#define IPSKB_FRAG_COMPLETE 8 45#define IPSKB_FRAG_COMPLETE BIT(3)
46#define IPSKB_REROUTED 16 46#define IPSKB_REROUTED BIT(4)
47#define IPSKB_DOREDIRECT BIT(5)
47 48
48 u16 frag_max_size; 49 u16 frag_max_size;
49}; 50};
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3a83ce5efa80..787b3c294ce6 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb)
129 * We now generate an ICMP HOST REDIRECT giving the route 129 * We now generate an ICMP HOST REDIRECT giving the route
130 * we calculated. 130 * we calculated.
131 */ 131 */
132 if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) 132 if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr &&
133 !skb_sec_path(skb))
133 ip_rt_send_redirect(skb); 134 ip_rt_send_redirect(skb);
134 135
135 skb->priority = rt_tos2priority(iph->tos); 136 skb->priority = rt_tos2priority(iph->tos);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..d58dd0ec3e53 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1554,11 +1554,10 @@ static int __mkroute_input(struct sk_buff *skb,
1554 1554
1555 do_cache = res->fi && !itag; 1555 do_cache = res->fi && !itag;
1556 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && 1556 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1557 skb->protocol == htons(ETH_P_IP) &&
1557 (IN_DEV_SHARED_MEDIA(out_dev) || 1558 (IN_DEV_SHARED_MEDIA(out_dev) ||
1558 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { 1559 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1559 flags |= RTCF_DOREDIRECT; 1560 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1560 do_cache = false;
1561 }
1562 1561
1563 if (skb->protocol != htons(ETH_P_IP)) { 1562 if (skb->protocol != htons(ETH_P_IP)) {
1564 /* Not IP (i.e. ARP). Do not create route, if it is 1563 /* Not IP (i.e. ARP). Do not create route, if it is
@@ -2303,6 +2302,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2303 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2302 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2304 if (rt->rt_flags & RTCF_NOTIFY) 2303 if (rt->rt_flags & RTCF_NOTIFY)
2305 r->rtm_flags |= RTM_F_NOTIFY; 2304 r->rtm_flags |= RTM_F_NOTIFY;
2305 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2306 r->rtm_flags |= RTCF_DOREDIRECT;
2306 2307
2307 if (nla_put_be32(skb, RTA_DST, dst)) 2308 if (nla_put_be32(skb, RTA_DST, dst))
2308 goto nla_put_failure; 2309 goto nla_put_failure;