aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-01-04 19:04:39 -0500
committerDavid S. Miller <davem@davemloft.net>2009-01-04 19:04:39 -0500
commit14deae41566b5cdd992c01d0069518ced5227c83 (patch)
treed15c3dfabdc3ccf10997487c29df35fa58387e55
parenteb4dea5853046727bfbb579f0c9a8cae7369f7c6 (diff)
ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.
Thanks to excellent diagnosis by Eduard Guzovsky. The core problem is that on a network with lots of active multicast traffic, the neighbour cache can fill up. If we try to allocate a new route and thus neighbour cache entry, the bog-standard GC attempt the neighbour layer does in ineffective because route entries hold a reference to the existing neighbour entries and GC can only liberate entries with no references. IPV4 already has a way to handle this, by doing a route cache GC in such situations (when neigh attach returns -ENOBUFS). So simply mimick this on the ipv6 side. Tested-by: Eduard Guzovsky <eguzovsky@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ndisc.h4
-rw-r--r--net/ipv6/route.c52
2 files changed, 49 insertions, 7 deletions
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index ce532f2222ce..1459ed3e2697 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -155,9 +155,9 @@ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, const s
155{ 155{
156 156
157 if (dev) 157 if (dev)
158 return __neigh_lookup(&nd_tbl, addr, dev, 1); 158 return __neigh_lookup_errno(&nd_tbl, addr, dev);
159 159
160 return NULL; 160 return ERR_PTR(-ENODEV);
161} 161}
162 162
163 163
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 18c486cf4987..76f06b94ab9f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
627 rt = ip6_rt_copy(ort); 627 rt = ip6_rt_copy(ort);
628 628
629 if (rt) { 629 if (rt) {
630 struct neighbour *neigh;
631 int attempts = !in_softirq();
632
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 633 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 && 634 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 635 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
@@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
646 } 649 }
647#endif 650#endif
648 651
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 652 retry:
653 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
654 if (IS_ERR(neigh)) {
655 struct net *net = dev_net(rt->rt6i_dev);
656 int saved_rt_min_interval =
657 net->ipv6.sysctl.ip6_rt_gc_min_interval;
658 int saved_rt_elasticity =
659 net->ipv6.sysctl.ip6_rt_gc_elasticity;
660
661 if (attempts-- > 0) {
662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
663 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
664
665 ip6_dst_gc(net->ipv6.ip6_dst_ops);
666
667 net->ipv6.sysctl.ip6_rt_gc_elasticity =
668 saved_rt_elasticity;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval =
670 saved_rt_min_interval;
671 goto retry;
672 }
673
674 if (net_ratelimit())
675 printk(KERN_WARNING
676 "Neighbour table overflow.\n");
677 dst_free(&rt->u.dst);
678 return NULL;
679 }
680 rt->rt6i_nexthop = neigh;
650 681
651 } 682 }
652 683
@@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
945 dev_hold(dev); 976 dev_hold(dev);
946 if (neigh) 977 if (neigh)
947 neigh_hold(neigh); 978 neigh_hold(neigh);
948 else 979 else {
949 neigh = ndisc_get_neigh(dev, addr); 980 neigh = ndisc_get_neigh(dev, addr);
981 if (IS_ERR(neigh))
982 neigh = NULL;
983 }
950 984
951 rt->rt6i_dev = dev; 985 rt->rt6i_dev = dev;
952 rt->rt6i_idev = idev; 986 rt->rt6i_idev = idev;
@@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1887{ 1921{
1888 struct net *net = dev_net(idev->dev); 1922 struct net *net = dev_net(idev->dev);
1889 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1923 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1924 struct neighbour *neigh;
1890 1925
1891 if (rt == NULL) 1926 if (rt == NULL)
1892 return ERR_PTR(-ENOMEM); 1927 return ERR_PTR(-ENOMEM);
@@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1909 rt->rt6i_flags |= RTF_ANYCAST; 1944 rt->rt6i_flags |= RTF_ANYCAST;
1910 else 1945 else
1911 rt->rt6i_flags |= RTF_LOCAL; 1946 rt->rt6i_flags |= RTF_LOCAL;
1912 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1947 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1913 if (rt->rt6i_nexthop == NULL) { 1948 if (IS_ERR(neigh)) {
1914 dst_free(&rt->u.dst); 1949 dst_free(&rt->u.dst);
1915 return ERR_PTR(-ENOMEM); 1950
1951 /* We are casting this because that is the return
1952 * value type. But an errno encoded pointer is the
1953 * same regardless of the underlying pointer type,
1954 * and that's what we are returning. So this is OK.
1955 */
1956 return (struct rt6_info *) neigh;
1916 } 1957 }
1958 rt->rt6i_nexthop = neigh;
1917 1959
1918 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1960 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1919 rt->rt6i_dst.plen = 128; 1961 rt->rt6i_dst.plen = 128;