[XFRM]: Allow packet drops during larval state resolution.

The current IPSEC rule resolution behavior we have does not work for a lot of people, even though technically it's an improvement from the -EAGAIN buisness we had before. Right now we'll block until the key manager resolves the route. That works for simple cases, but many folks would rather packets get silently dropped until the key manager resolves the IPSEC rules. We can't tell these folks to "set the socket non-blocking" because they don't have control over the non-block setting of things like the sockets used to resolve DNS deep inside of the resolver libraries in libc. With that in mind I coded up the patch below with some help from Herbert Xu which provides packet-drop behavior during larval state resolution, controllable via sysctl and off by default. This lays the framework to either: 1) Make this default at some point or... 2) Move this logic into xfrm{4,6}_policy.c and implement the ARP-like resolution queue we've all been dreaming of. The idea would be to queue packets to the policy, then once the larval state is resolved by the key manager we re-resolve the route and push the packets out. The packets would timeout if the rule didn't get resolved in a certain amount of time. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@sunset.davemloft.net> 2007-05-24 21:17:54 -0400
committer: David S. Miller <davem@sunset.davemloft.net> 2007-05-24 21:17:54 -0400
commit: 14e50e57aedb2a89cf79b77782879769794cab7b (patch)
tree: 46cbdab9c8007cea0821294c9d397214b38ea4c8 /net/ipv4/route.c
parent: 04efb8787e4d8a7b21a61aeb723de33154311256 (diff)
1 files changed, 70 insertions, 1 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9fe4f2e8cc..8603cfb271f2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
+static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+{
+}
+static struct dst_ops ipv4_dst_blackhole_ops = {
+        .family                 =       AF_INET,
+        .protocol               =       __constant_htons(ETH_P_IP),
+        .destroy                =       ipv4_dst_destroy,
+        .check                  =       ipv4_dst_check,
+        .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
+        .entry_size             =       sizeof(struct rtable),
+};
+static int ipv4_blackhole_output(struct sk_buff *skb)
+{
+        kfree_skb(skb);
+        return 0;
+}
+static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
+{
+        struct rtable *ort = *rp;
+        struct rtable *rt = (struct rtable *)
+                dst_alloc(&ipv4_dst_blackhole_ops);
+        if (rt) {
+                struct dst_entry *new = &rt->u.dst;
+                atomic_set(&new->__refcnt, 1);
+                new->__use = 1;
+                new->input = ipv4_blackhole_output;
+                new->output = ipv4_blackhole_output;
+                memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+                new->dev = ort->u.dst.dev;
+                if (new->dev)
+                        dev_hold(new->dev);
+                rt->fl = ort->fl;
+                rt->idev = ort->idev;
+                if (rt->idev)
+                        in_dev_hold(rt->idev);
+                rt->rt_flags = ort->rt_flags;
+                rt->rt_type = ort->rt_type;
+                rt->rt_dst = ort->rt_dst;
+                rt->rt_src = ort->rt_src;
+                rt->rt_iif = ort->rt_iif;
+                rt->rt_gateway = ort->rt_gateway;
+                rt->rt_spec_dst = ort->rt_spec_dst;
+                rt->peer = ort->peer;
+                if (rt->peer)
+                        atomic_inc(&rt->peer->refcnt);
+                dst_free(new);
+        }
+        dst_release(&(*rp)->u.dst);
+        *rp = rt;
+        return (rt ? 0 : -ENOMEM);
+}
 int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
 {
        int err;
@@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
                        flp->fl4_src = (*rp)->rt_src;
                if (!flp->fl4_dst)
                        flp->fl4_dst = (*rp)->rt_dst;
-                return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+                err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
+                if (err == -EREMOTE)
+                        err = ipv4_dst_blackhole(rp, flp, sk);
+                return err;
        }
        return 0;
@@ -3139,6 +3206,8 @@ int __init ip_rt_init(void)
                kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
                                  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+        ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
        rt_hash_table = (struct rt_hash_bucket *)
                alloc_large_system_hash("IP route cache",
                                        sizeof(struct rt_hash_bucket),
author	David S. Miller <davem@sunset.davemloft.net>	2007-05-24 21:17:54 -0400
committer	David S. Miller <davem@sunset.davemloft.net>	2007-05-24 21:17:54 -0400
commit	14e50e57aedb2a89cf79b77782879769794cab7b (patch)
tree	46cbdab9c8007cea0821294c9d397214b38ea4c8 /net/ipv4/route.c
parent	04efb8787e4d8a7b21a61aeb723de33154311256 (diff)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df9fe4f2e8cc..8603cfb271f2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c
@@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable *rp, const struct flowi flp)
2598		2598
2599	EXPORT_SYMBOL_GPL(__ip_route_output_key);	2599	EXPORT_SYMBOL_GPL(__ip_route_output_key);
2600		2600
		2601	static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
		2602	{
		2603	}
		2604
		2605	static struct dst_ops ipv4_dst_blackhole_ops = {
		2606	.family = AF_INET,
		2607	.protocol = __constant_htons(ETH_P_IP),
		2608	.destroy = ipv4_dst_destroy,
		2609	.check = ipv4_dst_check,
		2610	.update_pmtu = ipv4_rt_blackhole_update_pmtu,
		2611	.entry_size = sizeof(struct rtable),
		2612	};
		2613
		2614
		2615	static int ipv4_blackhole_output(struct sk_buff *skb)
		2616	{
		2617	kfree_skb(skb);
		2618	return 0;
		2619	}
		2620
		2621	static int ipv4_dst_blackhole(struct rtable *rp, struct flowi flp, struct sock *sk)
		2622	{
		2623	struct rtable ort = rp;
		2624	struct rtable rt = (struct rtable )
		2625	dst_alloc(&ipv4_dst_blackhole_ops);
		2626
		2627	if (rt) {
		2628	struct dst_entry *new = &rt->u.dst;
		2629
		2630	atomic_set(&new->__refcnt, 1);
		2631	new->__use = 1;
		2632	new->input = ipv4_blackhole_output;
		2633	new->output = ipv4_blackhole_output;
		2634	memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
		2635
		2636	new->dev = ort->u.dst.dev;
		2637	if (new->dev)
		2638	dev_hold(new->dev);
		2639
		2640	rt->fl = ort->fl;
		2641
		2642	rt->idev = ort->idev;
		2643	if (rt->idev)
		2644	in_dev_hold(rt->idev);
		2645	rt->rt_flags = ort->rt_flags;
		2646	rt->rt_type = ort->rt_type;
		2647	rt->rt_dst = ort->rt_dst;
		2648	rt->rt_src = ort->rt_src;
		2649	rt->rt_iif = ort->rt_iif;
		2650	rt->rt_gateway = ort->rt_gateway;
		2651	rt->rt_spec_dst = ort->rt_spec_dst;
		2652	rt->peer = ort->peer;
		2653	if (rt->peer)
		2654	atomic_inc(&rt->peer->refcnt);
		2655
		2656	dst_free(new);
		2657	}
		2658
		2659	dst_release(&(*rp)->u.dst);
		2660	*rp = rt;
		2661	return (rt ? 0 : -ENOMEM);
		2662	}
		2663
2601	int ip_route_output_flow(struct rtable *rp, struct flowi flp, struct sock *sk, int flags)	2664	int ip_route_output_flow(struct rtable *rp, struct flowi flp, struct sock *sk, int flags)
2602	{	2665	{
2603	int err;	2666	int err;
@@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable *rp, struct flowi flp, struct sock *sk,
2610	flp->fl4_src = (*rp)->rt_src;	2673	flp->fl4_src = (*rp)->rt_src;
2611	if (!flp->fl4_dst)	2674	if (!flp->fl4_dst)
2612	flp->fl4_dst = (*rp)->rt_dst;	2675	flp->fl4_dst = (*rp)->rt_dst;
2613	return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);	2676	err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
		2677	if (err == -EREMOTE)
		2678	err = ipv4_dst_blackhole(rp, flp, sk);
		2679
		2680	return err;
2614	}	2681	}
2615		2682
2616	return 0;	2683	return 0;
@@ -3139,6 +3206,8 @@ int __init ip_rt_init(void)
3139	kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,	3206	kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3140	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL, NULL);	3207	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL, NULL);
3141		3208
		3209	ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
		3210
3142	rt_hash_table = (struct rt_hash_bucket *)	3211	rt_hash_table = (struct rt_hash_bucket *)
3143	alloc_large_system_hash("IP route cache",	3212	alloc_large_system_hash("IP route cache",
3144	sizeof(struct rt_hash_bucket),	3213	sizeof(struct rt_hash_bucket),