diff options
author | David S. Miller <davem@davemloft.net> | 2009-01-04 19:04:39 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-01-04 19:04:39 -0500 |
commit | 14deae41566b5cdd992c01d0069518ced5227c83 (patch) | |
tree | d15c3dfabdc3ccf10997487c29df35fa58387e55 | |
parent | eb4dea5853046727bfbb579f0c9a8cae7369f7c6 (diff) |
ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.
Thanks to excellent diagnosis by Eduard Guzovsky.
The core problem is that on a network with lots of active
multicast traffic, the neighbour cache can fill up. If
we try to allocate a new route and thus neighbour cache
entry, the bog-standard GC attempt the neighbour layer does
in ineffective because route entries hold a reference
to the existing neighbour entries and GC can only liberate
entries with no references.
IPV4 already has a way to handle this, by doing a route cache
GC in such situations (when neigh attach returns -ENOBUFS).
So simply mimick this on the ipv6 side.
Tested-by: Eduard Guzovsky <eguzovsky@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ndisc.h | 4 | ||||
-rw-r--r-- | net/ipv6/route.c | 52 |
2 files changed, 49 insertions, 7 deletions
diff --git a/include/net/ndisc.h b/include/net/ndisc.h index ce532f2222ce..1459ed3e2697 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h | |||
@@ -155,9 +155,9 @@ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, const s | |||
155 | { | 155 | { |
156 | 156 | ||
157 | if (dev) | 157 | if (dev) |
158 | return __neigh_lookup(&nd_tbl, addr, dev, 1); | 158 | return __neigh_lookup_errno(&nd_tbl, addr, dev); |
159 | 159 | ||
160 | return NULL; | 160 | return ERR_PTR(-ENODEV); |
161 | } | 161 | } |
162 | 162 | ||
163 | 163 | ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 18c486cf4987..76f06b94ab9f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad | |||
627 | rt = ip6_rt_copy(ort); | 627 | rt = ip6_rt_copy(ort); |
628 | 628 | ||
629 | if (rt) { | 629 | if (rt) { |
630 | struct neighbour *neigh; | ||
631 | int attempts = !in_softirq(); | ||
632 | |||
630 | if (!(rt->rt6i_flags&RTF_GATEWAY)) { | 633 | if (!(rt->rt6i_flags&RTF_GATEWAY)) { |
631 | if (rt->rt6i_dst.plen != 128 && | 634 | if (rt->rt6i_dst.plen != 128 && |
632 | ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) | 635 | ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) |
@@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad | |||
646 | } | 649 | } |
647 | #endif | 650 | #endif |
648 | 651 | ||
649 | rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | 652 | retry: |
653 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | ||
654 | if (IS_ERR(neigh)) { | ||
655 | struct net *net = dev_net(rt->rt6i_dev); | ||
656 | int saved_rt_min_interval = | ||
657 | net->ipv6.sysctl.ip6_rt_gc_min_interval; | ||
658 | int saved_rt_elasticity = | ||
659 | net->ipv6.sysctl.ip6_rt_gc_elasticity; | ||
660 | |||
661 | if (attempts-- > 0) { | ||
662 | net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; | ||
663 | net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; | ||
664 | |||
665 | ip6_dst_gc(net->ipv6.ip6_dst_ops); | ||
666 | |||
667 | net->ipv6.sysctl.ip6_rt_gc_elasticity = | ||
668 | saved_rt_elasticity; | ||
669 | net->ipv6.sysctl.ip6_rt_gc_min_interval = | ||
670 | saved_rt_min_interval; | ||
671 | goto retry; | ||
672 | } | ||
673 | |||
674 | if (net_ratelimit()) | ||
675 | printk(KERN_WARNING | ||
676 | "Neighbour table overflow.\n"); | ||
677 | dst_free(&rt->u.dst); | ||
678 | return NULL; | ||
679 | } | ||
680 | rt->rt6i_nexthop = neigh; | ||
650 | 681 | ||
651 | } | 682 | } |
652 | 683 | ||
@@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, | |||
945 | dev_hold(dev); | 976 | dev_hold(dev); |
946 | if (neigh) | 977 | if (neigh) |
947 | neigh_hold(neigh); | 978 | neigh_hold(neigh); |
948 | else | 979 | else { |
949 | neigh = ndisc_get_neigh(dev, addr); | 980 | neigh = ndisc_get_neigh(dev, addr); |
981 | if (IS_ERR(neigh)) | ||
982 | neigh = NULL; | ||
983 | } | ||
950 | 984 | ||
951 | rt->rt6i_dev = dev; | 985 | rt->rt6i_dev = dev; |
952 | rt->rt6i_idev = idev; | 986 | rt->rt6i_idev = idev; |
@@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, | |||
1887 | { | 1921 | { |
1888 | struct net *net = dev_net(idev->dev); | 1922 | struct net *net = dev_net(idev->dev); |
1889 | struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); | 1923 | struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); |
1924 | struct neighbour *neigh; | ||
1890 | 1925 | ||
1891 | if (rt == NULL) | 1926 | if (rt == NULL) |
1892 | return ERR_PTR(-ENOMEM); | 1927 | return ERR_PTR(-ENOMEM); |
@@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, | |||
1909 | rt->rt6i_flags |= RTF_ANYCAST; | 1944 | rt->rt6i_flags |= RTF_ANYCAST; |
1910 | else | 1945 | else |
1911 | rt->rt6i_flags |= RTF_LOCAL; | 1946 | rt->rt6i_flags |= RTF_LOCAL; |
1912 | rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | 1947 | neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); |
1913 | if (rt->rt6i_nexthop == NULL) { | 1948 | if (IS_ERR(neigh)) { |
1914 | dst_free(&rt->u.dst); | 1949 | dst_free(&rt->u.dst); |
1915 | return ERR_PTR(-ENOMEM); | 1950 | |
1951 | /* We are casting this because that is the return | ||
1952 | * value type. But an errno encoded pointer is the | ||
1953 | * same regardless of the underlying pointer type, | ||
1954 | * and that's what we are returning. So this is OK. | ||
1955 | */ | ||
1956 | return (struct rt6_info *) neigh; | ||
1916 | } | 1957 | } |
1958 | rt->rt6i_nexthop = neigh; | ||
1917 | 1959 | ||
1918 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); | 1960 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); |
1919 | rt->rt6i_dst.plen = 128; | 1961 | rt->rt6i_dst.plen = 128; |