aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-05-24 21:17:54 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-05-24 21:17:54 -0400
commit14e50e57aedb2a89cf79b77782879769794cab7b (patch)
tree46cbdab9c8007cea0821294c9d397214b38ea4c8 /net
parent04efb8787e4d8a7b21a61aeb723de33154311256 (diff)
[XFRM]: Allow packet drops during larval state resolution.
The current IPSEC rule resolution behavior we have does not work for a lot of people, even though technically it's an improvement from the -EAGAIN buisness we had before. Right now we'll block until the key manager resolves the route. That works for simple cases, but many folks would rather packets get silently dropped until the key manager resolves the IPSEC rules. We can't tell these folks to "set the socket non-blocking" because they don't have control over the non-block setting of things like the sockets used to resolve DNS deep inside of the resolver libraries in libc. With that in mind I coded up the patch below with some help from Herbert Xu which provides packet-drop behavior during larval state resolution, controllable via sysctl and off by default. This lays the framework to either: 1) Make this default at some point or... 2) Move this logic into xfrm{4,6}_policy.c and implement the ARP-like resolution queue we've all been dreaming of. The idea would be to queue packets to the policy, then once the larval state is resolved by the key manager we re-resolve the route and push the packets out. The packets would timeout if the rule didn't get resolved in a certain amount of time. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/ipv4/route.c71
-rw-r--r--net/ipv6/datagram.c8
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c63
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/xfrm/xfrm_policy.c28
9 files changed, 199 insertions, 14 deletions
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b29712033dd4..f34aca041a25 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@ extern int sysctl_core_destroy_delay;
24#ifdef CONFIG_XFRM 24#ifdef CONFIG_XFRM
25extern u32 sysctl_xfrm_aevent_etime; 25extern u32 sysctl_xfrm_aevent_etime;
26extern u32 sysctl_xfrm_aevent_rseqth; 26extern u32 sysctl_xfrm_aevent_rseqth;
27extern int sysctl_xfrm_larval_drop;
27#endif 28#endif
28 29
29ctl_table core_table[] = { 30ctl_table core_table[] = {
@@ -118,6 +119,14 @@ ctl_table core_table[] = {
118 .mode = 0644, 119 .mode = 0644,
119 .proc_handler = &proc_dointvec 120 .proc_handler = &proc_dointvec
120 }, 121 },
122 {
123 .ctl_name = CTL_UNNUMBERED,
124 .procname = "xfrm_larval_drop",
125 .data = &sysctl_xfrm_larval_drop,
126 .maxlen = sizeof(int),
127 .mode = 0644,
128 .proc_handler = &proc_dointvec
129 },
121#endif /* CONFIG_XFRM */ 130#endif /* CONFIG_XFRM */
122#endif /* CONFIG_NET */ 131#endif /* CONFIG_NET */
123 { 132 {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 64eac2515aa2..31737cdf156a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1043,9 +1043,13 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
1043 if (final_p) 1043 if (final_p)
1044 ipv6_addr_copy(&fl.fl6_dst, final_p); 1044 ipv6_addr_copy(&fl.fl6_dst, final_p);
1045 1045
1046 err = xfrm_lookup(&dst, &fl, sk, 1); 1046 err = __xfrm_lookup(&dst, &fl, sk, 1);
1047 if (err < 0) 1047 if (err < 0) {
1048 goto failure; 1048 if (err == -EREMOTE)
1049 err = ip6_dst_blackhole(sk, &dst, &fl);
1050 if (err < 0)
1051 goto failure;
1052 }
1049 1053
1050 if (saddr == NULL) { 1054 if (saddr == NULL) {
1051 saddr = &fl.fl6_src; 1055 saddr = &fl.fl6_src;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9fe4f2e8cc..8603cfb271f2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2598 2598
2599EXPORT_SYMBOL_GPL(__ip_route_output_key); 2599EXPORT_SYMBOL_GPL(__ip_route_output_key);
2600 2600
2601static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2602{
2603}
2604
2605static struct dst_ops ipv4_dst_blackhole_ops = {
2606 .family = AF_INET,
2607 .protocol = __constant_htons(ETH_P_IP),
2608 .destroy = ipv4_dst_destroy,
2609 .check = ipv4_dst_check,
2610 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2611 .entry_size = sizeof(struct rtable),
2612};
2613
2614
2615static int ipv4_blackhole_output(struct sk_buff *skb)
2616{
2617 kfree_skb(skb);
2618 return 0;
2619}
2620
2621static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
2622{
2623 struct rtable *ort = *rp;
2624 struct rtable *rt = (struct rtable *)
2625 dst_alloc(&ipv4_dst_blackhole_ops);
2626
2627 if (rt) {
2628 struct dst_entry *new = &rt->u.dst;
2629
2630 atomic_set(&new->__refcnt, 1);
2631 new->__use = 1;
2632 new->input = ipv4_blackhole_output;
2633 new->output = ipv4_blackhole_output;
2634 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
2635
2636 new->dev = ort->u.dst.dev;
2637 if (new->dev)
2638 dev_hold(new->dev);
2639
2640 rt->fl = ort->fl;
2641
2642 rt->idev = ort->idev;
2643 if (rt->idev)
2644 in_dev_hold(rt->idev);
2645 rt->rt_flags = ort->rt_flags;
2646 rt->rt_type = ort->rt_type;
2647 rt->rt_dst = ort->rt_dst;
2648 rt->rt_src = ort->rt_src;
2649 rt->rt_iif = ort->rt_iif;
2650 rt->rt_gateway = ort->rt_gateway;
2651 rt->rt_spec_dst = ort->rt_spec_dst;
2652 rt->peer = ort->peer;
2653 if (rt->peer)
2654 atomic_inc(&rt->peer->refcnt);
2655
2656 dst_free(new);
2657 }
2658
2659 dst_release(&(*rp)->u.dst);
2660 *rp = rt;
2661 return (rt ? 0 : -ENOMEM);
2662}
2663
2601int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) 2664int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
2602{ 2665{
2603 int err; 2666 int err;
@@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
2610 flp->fl4_src = (*rp)->rt_src; 2673 flp->fl4_src = (*rp)->rt_src;
2611 if (!flp->fl4_dst) 2674 if (!flp->fl4_dst)
2612 flp->fl4_dst = (*rp)->rt_dst; 2675 flp->fl4_dst = (*rp)->rt_dst;
2613 return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); 2676 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
2677 if (err == -EREMOTE)
2678 err = ipv4_dst_blackhole(rp, flp, sk);
2679
2680 return err;
2614 } 2681 }
2615 2682
2616 return 0; 2683 return 0;
@@ -3139,6 +3206,8 @@ int __init ip_rt_init(void)
3139 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, 3206 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3140 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 3207 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
3141 3208
3209 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3210
3142 rt_hash_table = (struct rt_hash_bucket *) 3211 rt_hash_table = (struct rt_hash_bucket *)
3143 alloc_large_system_hash("IP route cache", 3212 alloc_large_system_hash("IP route cache",
3144 sizeof(struct rt_hash_bucket), 3213 sizeof(struct rt_hash_bucket),
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 403eee66b9c5..b1fe7ac5dc90 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -177,8 +177,12 @@ ipv4_connected:
177 if (final_p) 177 if (final_p)
178 ipv6_addr_copy(&fl.fl6_dst, final_p); 178 ipv6_addr_copy(&fl.fl6_dst, final_p);
179 179
180 if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) 180 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
181 goto out; 181 if (err == -EREMOTE)
182 err = ip6_dst_blackhole(sk, &dst, &fl);
183 if (err < 0)
184 goto out;
185 }
182 186
183 /* source address lookup done in ip6_dst_lookup */ 187 /* source address lookup done in ip6_dst_lookup */
184 188
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 009a1047fc3f..a58459a76684 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -818,8 +818,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
818 if (final_p) 818 if (final_p)
819 ipv6_addr_copy(&fl.fl6_dst, final_p); 819 ipv6_addr_copy(&fl.fl6_dst, final_p);
820 820
821 if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) 821 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
822 goto out; 822 if (err == -EREMOTE)
823 err = ip6_dst_blackhole(sk, &dst, &fl);
824 if (err < 0)
825 goto out;
826 }
823 827
824 if (hlimit < 0) { 828 if (hlimit < 0) {
825 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 829 if (ipv6_addr_is_multicast(&fl.fl6_dst))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b46ad53044ba..1324b06796c0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -119,6 +119,19 @@ static struct dst_ops ip6_dst_ops = {
119 .entry_size = sizeof(struct rt6_info), 119 .entry_size = sizeof(struct rt6_info),
120}; 120};
121 121
122static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
123{
124}
125
126static struct dst_ops ip6_dst_blackhole_ops = {
127 .family = AF_INET6,
128 .protocol = __constant_htons(ETH_P_IPV6),
129 .destroy = ip6_dst_destroy,
130 .check = ip6_dst_check,
131 .update_pmtu = ip6_rt_blackhole_update_pmtu,
132 .entry_size = sizeof(struct rt6_info),
133};
134
122struct rt6_info ip6_null_entry = { 135struct rt6_info ip6_null_entry = {
123 .u = { 136 .u = {
124 .dst = { 137 .dst = {
@@ -833,6 +846,54 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
833 846
834EXPORT_SYMBOL(ip6_route_output); 847EXPORT_SYMBOL(ip6_route_output);
835 848
849static int ip6_blackhole_output(struct sk_buff *skb)
850{
851 kfree_skb(skb);
852 return 0;
853}
854
855int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
856{
857 struct rt6_info *ort = (struct rt6_info *) *dstp;
858 struct rt6_info *rt = (struct rt6_info *)
859 dst_alloc(&ip6_dst_blackhole_ops);
860 struct dst_entry *new = NULL;
861
862 if (rt) {
863 new = &rt->u.dst;
864
865 atomic_set(&new->__refcnt, 1);
866 new->__use = 1;
867 new->input = ip6_blackhole_output;
868 new->output = ip6_blackhole_output;
869
870 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
871 new->dev = ort->u.dst.dev;
872 if (new->dev)
873 dev_hold(new->dev);
874 rt->rt6i_idev = ort->rt6i_idev;
875 if (rt->rt6i_idev)
876 in6_dev_hold(rt->rt6i_idev);
877 rt->rt6i_expires = 0;
878
879 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
881 rt->rt6i_metric = 0;
882
883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
884#ifdef CONFIG_IPV6_SUBTREES
885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
886#endif
887
888 dst_free(new);
889 }
890
891 dst_release(*dstp);
892 *dstp = new;
893 return (new ? 0 : -ENOMEM);
894}
895EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
896
836/* 897/*
837 * Destination cache support functions 898 * Destination cache support functions
838 */ 899 */
@@ -2495,6 +2556,8 @@ void __init ip6_route_init(void)
2495 ip6_dst_ops.kmem_cachep = 2556 ip6_dst_ops.kmem_cachep =
2496 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2557 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2497 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 2558 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2559 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2560
2498 fib6_init(); 2561 fib6_init();
2499#ifdef CONFIG_PROC_FS 2562#ifdef CONFIG_PROC_FS
2500 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2563 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e2f25ea43b68..4f06a51ad4fd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,8 +265,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
265 if (final_p) 265 if (final_p)
266 ipv6_addr_copy(&fl.fl6_dst, final_p); 266 ipv6_addr_copy(&fl.fl6_dst, final_p);
267 267
268 if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) 268 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
269 goto failure; 269 if (err == -EREMOTE)
270 err = ip6_dst_blackhole(sk, &dst, &fl);
271 if (err < 0)
272 goto failure;
273 }
270 274
271 if (saddr == NULL) { 275 if (saddr == NULL) {
272 saddr = &fl.fl6_src; 276 saddr = &fl.fl6_src;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a7ae59c954d5..d1fbddd172e7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -767,8 +767,12 @@ do_udp_sendmsg:
767 if (final_p) 767 if (final_p)
768 ipv6_addr_copy(&fl.fl6_dst, final_p); 768 ipv6_addr_copy(&fl.fl6_dst, final_p);
769 769
770 if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) 770 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) {
771 goto out; 771 if (err == -EREMOTE)
772 err = ip6_dst_blackhole(sk, &dst, &fl);
773 if (err < 0)
774 goto out;
775 }
772 776
773 if (hlimit < 0) { 777 if (hlimit < 0) {
774 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 778 if (ipv6_addr_is_multicast(&fl.fl6_dst))
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d0882e53b6fc..b8bab89616a0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,8 @@
29 29
30#include "xfrm_hash.h" 30#include "xfrm_hash.h"
31 31
32int sysctl_xfrm_larval_drop;
33
32DEFINE_MUTEX(xfrm_cfg_mutex); 34DEFINE_MUTEX(xfrm_cfg_mutex);
33EXPORT_SYMBOL(xfrm_cfg_mutex); 35EXPORT_SYMBOL(xfrm_cfg_mutex);
34 36
@@ -1390,8 +1392,8 @@ static int stale_bundle(struct dst_entry *dst);
1390 * At the moment we eat a raw IP route. Mostly to speed up lookups 1392 * At the moment we eat a raw IP route. Mostly to speed up lookups
1391 * on interfaces with disabled IPsec. 1393 * on interfaces with disabled IPsec.
1392 */ 1394 */
1393int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, 1395int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1394 struct sock *sk, int flags) 1396 struct sock *sk, int flags)
1395{ 1397{
1396 struct xfrm_policy *policy; 1398 struct xfrm_policy *policy;
1397 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1399 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
@@ -1509,6 +1511,13 @@ restart:
1509 1511
1510 if (unlikely(nx<0)) { 1512 if (unlikely(nx<0)) {
1511 err = nx; 1513 err = nx;
1514 if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1515 /* EREMOTE tells the caller to generate
1516 * a one-shot blackhole route.
1517 */
1518 xfrm_pol_put(policy);
1519 return -EREMOTE;
1520 }
1512 if (err == -EAGAIN && flags) { 1521 if (err == -EAGAIN && flags) {
1513 DECLARE_WAITQUEUE(wait, current); 1522 DECLARE_WAITQUEUE(wait, current);
1514 1523
@@ -1598,6 +1607,21 @@ error:
1598 *dst_p = NULL; 1607 *dst_p = NULL;
1599 return err; 1608 return err;
1600} 1609}
1610EXPORT_SYMBOL(__xfrm_lookup);
1611
1612int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1613 struct sock *sk, int flags)
1614{
1615 int err = __xfrm_lookup(dst_p, fl, sk, flags);
1616
1617 if (err == -EREMOTE) {
1618 dst_release(*dst_p);
1619 *dst_p = NULL;
1620 err = -EAGAIN;
1621 }
1622
1623 return err;
1624}
1601EXPORT_SYMBOL(xfrm_lookup); 1625EXPORT_SYMBOL(xfrm_lookup);
1602 1626
1603static inline int 1627static inline int