diff options
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 80 |
1 files changed, 60 insertions, 20 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365ac046..25661f968f3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { | |||
109 | .link_failure = ip6_link_failure, | 109 | .link_failure = ip6_link_failure, |
110 | .update_pmtu = ip6_rt_update_pmtu, | 110 | .update_pmtu = ip6_rt_update_pmtu, |
111 | .local_out = __ip6_local_out, | 111 | .local_out = __ip6_local_out, |
112 | .entries = ATOMIC_INIT(0), | ||
113 | }; | 112 | }; |
114 | 113 | ||
115 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 114 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { | |||
122 | .destroy = ip6_dst_destroy, | 121 | .destroy = ip6_dst_destroy, |
123 | .check = ip6_dst_check, | 122 | .check = ip6_dst_check, |
124 | .update_pmtu = ip6_rt_blackhole_update_pmtu, | 123 | .update_pmtu = ip6_rt_blackhole_update_pmtu, |
125 | .entries = ATOMIC_INIT(0), | ||
126 | }; | 124 | }; |
127 | 125 | ||
128 | static struct rt6_info ip6_null_entry_template = { | 126 | static struct rt6_info ip6_null_entry_template = { |
@@ -217,14 +215,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
217 | 215 | ||
218 | static __inline__ int rt6_check_expired(const struct rt6_info *rt) | 216 | static __inline__ int rt6_check_expired(const struct rt6_info *rt) |
219 | { | 217 | { |
220 | return (rt->rt6i_flags & RTF_EXPIRES && | 218 | return (rt->rt6i_flags & RTF_EXPIRES) && |
221 | time_after(jiffies, rt->rt6i_expires)); | 219 | time_after(jiffies, rt->rt6i_expires); |
222 | } | 220 | } |
223 | 221 | ||
224 | static inline int rt6_need_strict(struct in6_addr *daddr) | 222 | static inline int rt6_need_strict(struct in6_addr *daddr) |
225 | { | 223 | { |
226 | return (ipv6_addr_type(daddr) & | 224 | return ipv6_addr_type(daddr) & |
227 | (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); | 225 | (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); |
228 | } | 226 | } |
229 | 227 | ||
230 | /* | 228 | /* |
@@ -440,7 +438,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) | |||
440 | __func__, match); | 438 | __func__, match); |
441 | 439 | ||
442 | net = dev_net(rt0->rt6i_dev); | 440 | net = dev_net(rt0->rt6i_dev); |
443 | return (match ? match : net->ipv6.ip6_null_entry); | 441 | return match ? match : net->ipv6.ip6_null_entry; |
444 | } | 442 | } |
445 | 443 | ||
446 | #ifdef CONFIG_IPV6_ROUTE_INFO | 444 | #ifdef CONFIG_IPV6_ROUTE_INFO |
@@ -670,7 +668,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad | |||
670 | 668 | ||
671 | if (net_ratelimit()) | 669 | if (net_ratelimit()) |
672 | printk(KERN_WARNING | 670 | printk(KERN_WARNING |
673 | "Neighbour table overflow.\n"); | 671 | "ipv6: Neighbour table overflow.\n"); |
674 | dst_free(&rt->dst); | 672 | dst_free(&rt->dst); |
675 | return NULL; | 673 | return NULL; |
676 | } | 674 | } |
@@ -859,7 +857,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl | |||
859 | 857 | ||
860 | dst_release(*dstp); | 858 | dst_release(*dstp); |
861 | *dstp = new; | 859 | *dstp = new; |
862 | return (new ? 0 : -ENOMEM); | 860 | return new ? 0 : -ENOMEM; |
863 | } | 861 | } |
864 | EXPORT_SYMBOL_GPL(ip6_dst_blackhole); | 862 | EXPORT_SYMBOL_GPL(ip6_dst_blackhole); |
865 | 863 | ||
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) | |||
1058 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; | 1056 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; |
1059 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; | 1057 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; |
1060 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; | 1058 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; |
1059 | int entries; | ||
1061 | 1060 | ||
1061 | entries = dst_entries_get_fast(ops); | ||
1062 | if (time_after(rt_last_gc + rt_min_interval, now) && | 1062 | if (time_after(rt_last_gc + rt_min_interval, now) && |
1063 | atomic_read(&ops->entries) <= rt_max_size) | 1063 | entries <= rt_max_size) |
1064 | goto out; | 1064 | goto out; |
1065 | 1065 | ||
1066 | net->ipv6.ip6_rt_gc_expire++; | 1066 | net->ipv6.ip6_rt_gc_expire++; |
1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | 1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); |
1068 | net->ipv6.ip6_rt_last_gc = now; | 1068 | net->ipv6.ip6_rt_last_gc = now; |
1069 | if (atomic_read(&ops->entries) < ops->gc_thresh) | 1069 | entries = dst_entries_get_slow(ops); |
1070 | if (entries < ops->gc_thresh) | ||
1070 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; | 1071 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; |
1071 | out: | 1072 | out: |
1072 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; | 1073 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; |
1073 | return (atomic_read(&ops->entries) > rt_max_size); | 1074 | return entries > rt_max_size; |
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | /* Clean host part of a prefix. Not necessary in radix tree, | 1077 | /* Clean host part of a prefix. Not necessary in radix tree, |
@@ -1169,6 +1170,8 @@ int ip6_route_add(struct fib6_config *cfg) | |||
1169 | 1170 | ||
1170 | if (addr_type & IPV6_ADDR_MULTICAST) | 1171 | if (addr_type & IPV6_ADDR_MULTICAST) |
1171 | rt->dst.input = ip6_mc_input; | 1172 | rt->dst.input = ip6_mc_input; |
1173 | else if (cfg->fc_flags & RTF_LOCAL) | ||
1174 | rt->dst.input = ip6_input; | ||
1172 | else | 1175 | else |
1173 | rt->dst.input = ip6_forward; | 1176 | rt->dst.input = ip6_forward; |
1174 | 1177 | ||
@@ -1190,7 +1193,8 @@ int ip6_route_add(struct fib6_config *cfg) | |||
1190 | they would result in kernel looping; promote them to reject routes | 1193 | they would result in kernel looping; promote them to reject routes |
1191 | */ | 1194 | */ |
1192 | if ((cfg->fc_flags & RTF_REJECT) || | 1195 | if ((cfg->fc_flags & RTF_REJECT) || |
1193 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { | 1196 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) |
1197 | && !(cfg->fc_flags&RTF_LOCAL))) { | ||
1194 | /* hold loopback dev/idev if we haven't done so. */ | 1198 | /* hold loopback dev/idev if we haven't done so. */ |
1195 | if (dev != net->loopback_dev) { | 1199 | if (dev != net->loopback_dev) { |
1196 | if (dev) { | 1200 | if (dev) { |
@@ -1556,14 +1560,13 @@ out: | |||
1556 | * i.e. Path MTU discovery | 1560 | * i.e. Path MTU discovery |
1557 | */ | 1561 | */ |
1558 | 1562 | ||
1559 | void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, | 1563 | static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, |
1560 | struct net_device *dev, u32 pmtu) | 1564 | struct net *net, u32 pmtu, int ifindex) |
1561 | { | 1565 | { |
1562 | struct rt6_info *rt, *nrt; | 1566 | struct rt6_info *rt, *nrt; |
1563 | struct net *net = dev_net(dev); | ||
1564 | int allfrag = 0; | 1567 | int allfrag = 0; |
1565 | 1568 | ||
1566 | rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); | 1569 | rt = rt6_lookup(net, daddr, saddr, ifindex, 0); |
1567 | if (rt == NULL) | 1570 | if (rt == NULL) |
1568 | return; | 1571 | return; |
1569 | 1572 | ||
@@ -1631,6 +1634,27 @@ out: | |||
1631 | dst_release(&rt->dst); | 1634 | dst_release(&rt->dst); |
1632 | } | 1635 | } |
1633 | 1636 | ||
1637 | void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, | ||
1638 | struct net_device *dev, u32 pmtu) | ||
1639 | { | ||
1640 | struct net *net = dev_net(dev); | ||
1641 | |||
1642 | /* | ||
1643 | * RFC 1981 states that a node "MUST reduce the size of the packets it | ||
1644 | * is sending along the path" that caused the Packet Too Big message. | ||
1645 | * Since it's not possible in the general case to determine which | ||
1646 | * interface was used to send the original packet, we update the MTU | ||
1647 | * on the interface that will be used to send future packets. We also | ||
1648 | * update the MTU on the interface that received the Packet Too Big in | ||
1649 | * case the original packet was forced out that interface with | ||
1650 | * SO_BINDTODEVICE or similar. This is the next best thing to the | ||
1651 | * correct behaviour, which would be to update the MTU on all | ||
1652 | * interfaces. | ||
1653 | */ | ||
1654 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); | ||
1655 | rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); | ||
1656 | } | ||
1657 | |||
1634 | /* | 1658 | /* |
1635 | * Misc support functions | 1659 | * Misc support functions |
1636 | */ | 1660 | */ |
@@ -2082,6 +2106,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
2082 | if (rtm->rtm_type == RTN_UNREACHABLE) | 2106 | if (rtm->rtm_type == RTN_UNREACHABLE) |
2083 | cfg->fc_flags |= RTF_REJECT; | 2107 | cfg->fc_flags |= RTF_REJECT; |
2084 | 2108 | ||
2109 | if (rtm->rtm_type == RTN_LOCAL) | ||
2110 | cfg->fc_flags |= RTF_LOCAL; | ||
2111 | |||
2085 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; | 2112 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; |
2086 | cfg->fc_nlinfo.nlh = nlh; | 2113 | cfg->fc_nlinfo.nlh = nlh; |
2087 | cfg->fc_nlinfo.nl_net = sock_net(skb->sk); | 2114 | cfg->fc_nlinfo.nl_net = sock_net(skb->sk); |
@@ -2202,6 +2229,8 @@ static int rt6_fill_node(struct net *net, | |||
2202 | NLA_PUT_U32(skb, RTA_TABLE, table); | 2229 | NLA_PUT_U32(skb, RTA_TABLE, table); |
2203 | if (rt->rt6i_flags&RTF_REJECT) | 2230 | if (rt->rt6i_flags&RTF_REJECT) |
2204 | rtm->rtm_type = RTN_UNREACHABLE; | 2231 | rtm->rtm_type = RTN_UNREACHABLE; |
2232 | else if (rt->rt6i_flags&RTF_LOCAL) | ||
2233 | rtm->rtm_type = RTN_LOCAL; | ||
2205 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) | 2234 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) |
2206 | rtm->rtm_type = RTN_LOCAL; | 2235 | rtm->rtm_type = RTN_LOCAL; |
2207 | else | 2236 | else |
@@ -2496,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) | |||
2496 | net->ipv6.rt6_stats->fib_rt_alloc, | 2525 | net->ipv6.rt6_stats->fib_rt_alloc, |
2497 | net->ipv6.rt6_stats->fib_rt_entries, | 2526 | net->ipv6.rt6_stats->fib_rt_entries, |
2498 | net->ipv6.rt6_stats->fib_rt_cache, | 2527 | net->ipv6.rt6_stats->fib_rt_cache, |
2499 | atomic_read(&net->ipv6.ip6_dst_ops.entries), | 2528 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), |
2500 | net->ipv6.rt6_stats->fib_discarded_routes); | 2529 | net->ipv6.rt6_stats->fib_discarded_routes); |
2501 | 2530 | ||
2502 | return 0; | 2531 | return 0; |
@@ -2638,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) | |||
2638 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, | 2667 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, |
2639 | sizeof(net->ipv6.ip6_dst_ops)); | 2668 | sizeof(net->ipv6.ip6_dst_ops)); |
2640 | 2669 | ||
2670 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) | ||
2671 | goto out_ip6_dst_ops; | ||
2672 | |||
2641 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, | 2673 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, |
2642 | sizeof(*net->ipv6.ip6_null_entry), | 2674 | sizeof(*net->ipv6.ip6_null_entry), |
2643 | GFP_KERNEL); | 2675 | GFP_KERNEL); |
2644 | if (!net->ipv6.ip6_null_entry) | 2676 | if (!net->ipv6.ip6_null_entry) |
2645 | goto out_ip6_dst_ops; | 2677 | goto out_ip6_dst_entries; |
2646 | net->ipv6.ip6_null_entry->dst.path = | 2678 | net->ipv6.ip6_null_entry->dst.path = |
2647 | (struct dst_entry *)net->ipv6.ip6_null_entry; | 2679 | (struct dst_entry *)net->ipv6.ip6_null_entry; |
2648 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2680 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
@@ -2692,6 +2724,8 @@ out_ip6_prohibit_entry: | |||
2692 | out_ip6_null_entry: | 2724 | out_ip6_null_entry: |
2693 | kfree(net->ipv6.ip6_null_entry); | 2725 | kfree(net->ipv6.ip6_null_entry); |
2694 | #endif | 2726 | #endif |
2727 | out_ip6_dst_entries: | ||
2728 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | ||
2695 | out_ip6_dst_ops: | 2729 | out_ip6_dst_ops: |
2696 | goto out; | 2730 | goto out; |
2697 | } | 2731 | } |
@@ -2730,10 +2764,14 @@ int __init ip6_route_init(void) | |||
2730 | if (!ip6_dst_ops_template.kmem_cachep) | 2764 | if (!ip6_dst_ops_template.kmem_cachep) |
2731 | goto out; | 2765 | goto out; |
2732 | 2766 | ||
2733 | ret = register_pernet_subsys(&ip6_route_net_ops); | 2767 | ret = dst_entries_init(&ip6_dst_blackhole_ops); |
2734 | if (ret) | 2768 | if (ret) |
2735 | goto out_kmem_cache; | 2769 | goto out_kmem_cache; |
2736 | 2770 | ||
2771 | ret = register_pernet_subsys(&ip6_route_net_ops); | ||
2772 | if (ret) | ||
2773 | goto out_dst_entries; | ||
2774 | |||
2737 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; | 2775 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; |
2738 | 2776 | ||
2739 | /* Registering of the loopback is done before this portion of code, | 2777 | /* Registering of the loopback is done before this portion of code, |
@@ -2780,6 +2818,8 @@ out_fib6_init: | |||
2780 | fib6_gc_cleanup(); | 2818 | fib6_gc_cleanup(); |
2781 | out_register_subsys: | 2819 | out_register_subsys: |
2782 | unregister_pernet_subsys(&ip6_route_net_ops); | 2820 | unregister_pernet_subsys(&ip6_route_net_ops); |
2821 | out_dst_entries: | ||
2822 | dst_entries_destroy(&ip6_dst_blackhole_ops); | ||
2783 | out_kmem_cache: | 2823 | out_kmem_cache: |
2784 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2824 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2785 | goto out; | 2825 | goto out; |