diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 190 |
1 files changed, 80 insertions, 110 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6559cb54f9..d6cb2bfcd8e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = { | |||
159 | .link_failure = ipv4_link_failure, | 159 | .link_failure = ipv4_link_failure, |
160 | .update_pmtu = ip_rt_update_pmtu, | 160 | .update_pmtu = ip_rt_update_pmtu, |
161 | .local_out = __ip_local_out, | 161 | .local_out = __ip_local_out, |
162 | .entries = ATOMIC_INIT(0), | ||
163 | }; | 162 | }; |
164 | 163 | ||
165 | #define ECN_OR_COST(class) TC_PRIO_##class | 164 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) | |||
466 | 465 | ||
467 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " | 466 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " |
468 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", | 467 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", |
469 | atomic_read(&ipv4_dst_ops.entries), | 468 | dst_entries_get_slow(&ipv4_dst_ops), |
470 | st->in_hit, | 469 | st->in_hit, |
471 | st->in_slow_tot, | 470 | st->in_slow_tot, |
472 | st->in_slow_mc, | 471 | st->in_slow_mc, |
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
945 | struct rtable *rth, **rthp; | 944 | struct rtable *rth, **rthp; |
946 | unsigned long now = jiffies; | 945 | unsigned long now = jiffies; |
947 | int goal; | 946 | int goal; |
947 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | ||
948 | 948 | ||
949 | /* | 949 | /* |
950 | * Garbage collection is pretty expensive, | 950 | * Garbage collection is pretty expensive, |
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
954 | RT_CACHE_STAT_INC(gc_total); | 954 | RT_CACHE_STAT_INC(gc_total); |
955 | 955 | ||
956 | if (now - last_gc < ip_rt_gc_min_interval && | 956 | if (now - last_gc < ip_rt_gc_min_interval && |
957 | atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { | 957 | entries < ip_rt_max_size) { |
958 | RT_CACHE_STAT_INC(gc_ignored); | 958 | RT_CACHE_STAT_INC(gc_ignored); |
959 | goto out; | 959 | goto out; |
960 | } | 960 | } |
961 | 961 | ||
962 | entries = dst_entries_get_slow(&ipv4_dst_ops); | ||
962 | /* Calculate number of entries, which we want to expire now. */ | 963 | /* Calculate number of entries, which we want to expire now. */ |
963 | goal = atomic_read(&ipv4_dst_ops.entries) - | 964 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); |
964 | (ip_rt_gc_elasticity << rt_hash_log); | ||
965 | if (goal <= 0) { | 965 | if (goal <= 0) { |
966 | if (equilibrium < ipv4_dst_ops.gc_thresh) | 966 | if (equilibrium < ipv4_dst_ops.gc_thresh) |
967 | equilibrium = ipv4_dst_ops.gc_thresh; | 967 | equilibrium = ipv4_dst_ops.gc_thresh; |
968 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 968 | goal = entries - equilibrium; |
969 | if (goal > 0) { | 969 | if (goal > 0) { |
970 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 970 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
971 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 971 | goal = entries - equilibrium; |
972 | } | 972 | } |
973 | } else { | 973 | } else { |
974 | /* We are in dangerous area. Try to reduce cache really | 974 | /* We are in dangerous area. Try to reduce cache really |
975 | * aggressively. | 975 | * aggressively. |
976 | */ | 976 | */ |
977 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 977 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
978 | equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; | 978 | equilibrium = entries - goal; |
979 | } | 979 | } |
980 | 980 | ||
981 | if (now - last_gc >= ip_rt_gc_min_interval) | 981 | if (now - last_gc >= ip_rt_gc_min_interval) |
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1032 | expire >>= 1; | 1032 | expire >>= 1; |
1033 | #if RT_CACHE_DEBUG >= 2 | 1033 | #if RT_CACHE_DEBUG >= 2 |
1034 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, | 1034 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, |
1035 | atomic_read(&ipv4_dst_ops.entries), goal, i); | 1035 | dst_entries_get_fast(&ipv4_dst_ops), goal, i); |
1036 | #endif | 1036 | #endif |
1037 | 1037 | ||
1038 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 1038 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
1039 | goto out; | 1039 | goto out; |
1040 | } while (!in_softirq() && time_before_eq(jiffies, now)); | 1040 | } while (!in_softirq() && time_before_eq(jiffies, now)); |
1041 | 1041 | ||
1042 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 1042 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
1043 | goto out; | ||
1044 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | ||
1043 | goto out; | 1045 | goto out; |
1044 | if (net_ratelimit()) | 1046 | if (net_ratelimit()) |
1045 | printk(KERN_WARNING "dst cache overflow\n"); | 1047 | printk(KERN_WARNING "dst cache overflow\n"); |
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1049 | work_done: | 1051 | work_done: |
1050 | expire += ip_rt_gc_min_interval; | 1052 | expire += ip_rt_gc_min_interval; |
1051 | if (expire > ip_rt_gc_timeout || | 1053 | if (expire > ip_rt_gc_timeout || |
1052 | atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) | 1054 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || |
1055 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | ||
1053 | expire = ip_rt_gc_timeout; | 1056 | expire = ip_rt_gc_timeout; |
1054 | #if RT_CACHE_DEBUG >= 2 | 1057 | #if RT_CACHE_DEBUG >= 2 |
1055 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, | 1058 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, |
1056 | atomic_read(&ipv4_dst_ops.entries), goal, rover); | 1059 | dst_entries_get_fast(&ipv4_dst_ops), goal, rover); |
1057 | #endif | 1060 | #endif |
1058 | out: return 0; | 1061 | out: return 0; |
1059 | } | 1062 | } |
@@ -1102,23 +1105,23 @@ restart: | |||
1102 | * Note that we do rt_free on this new route entry, so that | 1105 | * Note that we do rt_free on this new route entry, so that |
1103 | * once its refcount hits zero, we are still able to reap it | 1106 | * once its refcount hits zero, we are still able to reap it |
1104 | * (Thanks Alexey) | 1107 | * (Thanks Alexey) |
1105 | * Note also the rt_free uses call_rcu. We don't actually | 1108 | * Note: To avoid expensive rcu stuff for this uncached dst, |
1106 | * need rcu protection here, this is just our path to get | 1109 | * we set DST_NOCACHE so that dst_release() can free dst without |
1107 | * on the route gc list. | 1110 | * waiting a grace period. |
1108 | */ | 1111 | */ |
1109 | 1112 | ||
1113 | rt->dst.flags |= DST_NOCACHE; | ||
1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1114 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
1111 | int err = arp_bind_neighbour(&rt->dst); | 1115 | int err = arp_bind_neighbour(&rt->dst); |
1112 | if (err) { | 1116 | if (err) { |
1113 | if (net_ratelimit()) | 1117 | if (net_ratelimit()) |
1114 | printk(KERN_WARNING | 1118 | printk(KERN_WARNING |
1115 | "Neighbour table failure & not caching routes.\n"); | 1119 | "Neighbour table failure & not caching routes.\n"); |
1116 | rt_drop(rt); | 1120 | ip_rt_put(rt); |
1117 | return err; | 1121 | return err; |
1118 | } | 1122 | } |
1119 | } | 1123 | } |
1120 | 1124 | ||
1121 | rt_free(rt); | ||
1122 | goto skip_hashing; | 1125 | goto skip_hashing; |
1123 | } | 1126 | } |
1124 | 1127 | ||
@@ -1268,18 +1271,11 @@ skip_hashing: | |||
1268 | 1271 | ||
1269 | void rt_bind_peer(struct rtable *rt, int create) | 1272 | void rt_bind_peer(struct rtable *rt, int create) |
1270 | { | 1273 | { |
1271 | static DEFINE_SPINLOCK(rt_peer_lock); | ||
1272 | struct inet_peer *peer; | 1274 | struct inet_peer *peer; |
1273 | 1275 | ||
1274 | peer = inet_getpeer(rt->rt_dst, create); | 1276 | peer = inet_getpeer(rt->rt_dst, create); |
1275 | 1277 | ||
1276 | spin_lock_bh(&rt_peer_lock); | 1278 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1277 | if (rt->peer == NULL) { | ||
1278 | rt->peer = peer; | ||
1279 | peer = NULL; | ||
1280 | } | ||
1281 | spin_unlock_bh(&rt_peer_lock); | ||
1282 | if (peer) | ||
1283 | inet_putpeer(peer); | 1279 | inet_putpeer(peer); |
1284 | } | 1280 | } |
1285 | 1281 | ||
@@ -1779,12 +1775,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1779 | 1775 | ||
1780 | if (rt->fl.iif == 0) | 1776 | if (rt->fl.iif == 0) |
1781 | src = rt->rt_src; | 1777 | src = rt->rt_src; |
1782 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { | 1778 | else { |
1783 | src = FIB_RES_PREFSRC(res); | 1779 | rcu_read_lock(); |
1784 | fib_res_put(&res); | 1780 | if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) |
1785 | } else | 1781 | src = FIB_RES_PREFSRC(res); |
1786 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1782 | else |
1783 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | ||
1787 | RT_SCOPE_UNIVERSE); | 1784 | RT_SCOPE_UNIVERSE); |
1785 | rcu_read_unlock(); | ||
1786 | } | ||
1788 | memcpy(addr, &src, 4); | 1787 | memcpy(addr, &src, 4); |
1789 | } | 1788 | } |
1790 | 1789 | ||
@@ -2087,6 +2086,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2087 | * Such approach solves two big problems: | 2086 | * Such approach solves two big problems: |
2088 | * 1. Not simplex devices are handled properly. | 2087 | * 1. Not simplex devices are handled properly. |
2089 | * 2. IP spoofing attempts are filtered with 100% of guarantee. | 2088 | * 2. IP spoofing attempts are filtered with 100% of guarantee. |
2089 | * called with rcu_read_lock() | ||
2090 | */ | 2090 | */ |
2091 | 2091 | ||
2092 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2092 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2108,7 +2108,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2108 | unsigned hash; | 2108 | unsigned hash; |
2109 | __be32 spec_dst; | 2109 | __be32 spec_dst; |
2110 | int err = -EINVAL; | 2110 | int err = -EINVAL; |
2111 | int free_res = 0; | ||
2112 | struct net * net = dev_net(dev); | 2111 | struct net * net = dev_net(dev); |
2113 | 2112 | ||
2114 | /* IP on this device is disabled. */ | 2113 | /* IP on this device is disabled. */ |
@@ -2124,7 +2123,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2124 | ipv4_is_loopback(saddr)) | 2123 | ipv4_is_loopback(saddr)) |
2125 | goto martian_source; | 2124 | goto martian_source; |
2126 | 2125 | ||
2127 | if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) | 2126 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) |
2128 | goto brd_input; | 2127 | goto brd_input; |
2129 | 2128 | ||
2130 | /* Accept zero addresses only to limited broadcast; | 2129 | /* Accept zero addresses only to limited broadcast; |
@@ -2133,19 +2132,18 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2133 | if (ipv4_is_zeronet(saddr)) | 2132 | if (ipv4_is_zeronet(saddr)) |
2134 | goto martian_source; | 2133 | goto martian_source; |
2135 | 2134 | ||
2136 | if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) || | 2135 | if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) |
2137 | ipv4_is_loopback(daddr)) | ||
2138 | goto martian_destination; | 2136 | goto martian_destination; |
2139 | 2137 | ||
2140 | /* | 2138 | /* |
2141 | * Now we are ready to route packet. | 2139 | * Now we are ready to route packet. |
2142 | */ | 2140 | */ |
2143 | if ((err = fib_lookup(net, &fl, &res)) != 0) { | 2141 | err = fib_lookup(net, &fl, &res); |
2142 | if (err != 0) { | ||
2144 | if (!IN_DEV_FORWARD(in_dev)) | 2143 | if (!IN_DEV_FORWARD(in_dev)) |
2145 | goto e_hostunreach; | 2144 | goto e_hostunreach; |
2146 | goto no_route; | 2145 | goto no_route; |
2147 | } | 2146 | } |
2148 | free_res = 1; | ||
2149 | 2147 | ||
2150 | RT_CACHE_STAT_INC(in_slow_tot); | 2148 | RT_CACHE_STAT_INC(in_slow_tot); |
2151 | 2149 | ||
@@ -2154,8 +2152,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2154 | 2152 | ||
2155 | if (res.type == RTN_LOCAL) { | 2153 | if (res.type == RTN_LOCAL) { |
2156 | err = fib_validate_source(saddr, daddr, tos, | 2154 | err = fib_validate_source(saddr, daddr, tos, |
2157 | net->loopback_dev->ifindex, | 2155 | net->loopback_dev->ifindex, |
2158 | dev, &spec_dst, &itag, skb->mark); | 2156 | dev, &spec_dst, &itag, skb->mark); |
2159 | if (err < 0) | 2157 | if (err < 0) |
2160 | goto martian_source_keep_err; | 2158 | goto martian_source_keep_err; |
2161 | if (err) | 2159 | if (err) |
@@ -2170,9 +2168,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2170 | goto martian_destination; | 2168 | goto martian_destination; |
2171 | 2169 | ||
2172 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2170 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); |
2173 | done: | ||
2174 | if (free_res) | ||
2175 | fib_res_put(&res); | ||
2176 | out: return err; | 2171 | out: return err; |
2177 | 2172 | ||
2178 | brd_input: | 2173 | brd_input: |
@@ -2232,7 +2227,7 @@ local_input: | |||
2232 | rth->rt_type = res.type; | 2227 | rth->rt_type = res.type; |
2233 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2228 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
2234 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2229 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); |
2235 | goto done; | 2230 | goto out; |
2236 | 2231 | ||
2237 | no_route: | 2232 | no_route: |
2238 | RT_CACHE_STAT_INC(in_no_route); | 2233 | RT_CACHE_STAT_INC(in_no_route); |
@@ -2255,21 +2250,21 @@ martian_destination: | |||
2255 | 2250 | ||
2256 | e_hostunreach: | 2251 | e_hostunreach: |
2257 | err = -EHOSTUNREACH; | 2252 | err = -EHOSTUNREACH; |
2258 | goto done; | 2253 | goto out; |
2259 | 2254 | ||
2260 | e_inval: | 2255 | e_inval: |
2261 | err = -EINVAL; | 2256 | err = -EINVAL; |
2262 | goto done; | 2257 | goto out; |
2263 | 2258 | ||
2264 | e_nobufs: | 2259 | e_nobufs: |
2265 | err = -ENOBUFS; | 2260 | err = -ENOBUFS; |
2266 | goto done; | 2261 | goto out; |
2267 | 2262 | ||
2268 | martian_source: | 2263 | martian_source: |
2269 | err = -EINVAL; | 2264 | err = -EINVAL; |
2270 | martian_source_keep_err: | 2265 | martian_source_keep_err: |
2271 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2266 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2272 | goto done; | 2267 | goto out; |
2273 | } | 2268 | } |
2274 | 2269 | ||
2275 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2270 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2355,6 +2350,7 @@ skip_cache: | |||
2355 | } | 2350 | } |
2356 | EXPORT_SYMBOL(ip_route_input_common); | 2351 | EXPORT_SYMBOL(ip_route_input_common); |
2357 | 2352 | ||
2353 | /* called with rcu_read_lock() */ | ||
2358 | static int __mkroute_output(struct rtable **result, | 2354 | static int __mkroute_output(struct rtable **result, |
2359 | struct fib_result *res, | 2355 | struct fib_result *res, |
2360 | const struct flowi *fl, | 2356 | const struct flowi *fl, |
@@ -2365,53 +2361,47 @@ static int __mkroute_output(struct rtable **result, | |||
2365 | struct rtable *rth; | 2361 | struct rtable *rth; |
2366 | struct in_device *in_dev; | 2362 | struct in_device *in_dev; |
2367 | u32 tos = RT_FL_TOS(oldflp); | 2363 | u32 tos = RT_FL_TOS(oldflp); |
2368 | int err = 0; | ||
2369 | 2364 | ||
2370 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) | 2365 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) |
2371 | return -EINVAL; | 2366 | return -EINVAL; |
2372 | 2367 | ||
2373 | if (fl->fl4_dst == htonl(0xFFFFFFFF)) | 2368 | if (ipv4_is_lbcast(fl->fl4_dst)) |
2374 | res->type = RTN_BROADCAST; | 2369 | res->type = RTN_BROADCAST; |
2375 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2370 | else if (ipv4_is_multicast(fl->fl4_dst)) |
2376 | res->type = RTN_MULTICAST; | 2371 | res->type = RTN_MULTICAST; |
2377 | else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst)) | 2372 | else if (ipv4_is_zeronet(fl->fl4_dst)) |
2378 | return -EINVAL; | 2373 | return -EINVAL; |
2379 | 2374 | ||
2380 | if (dev_out->flags & IFF_LOOPBACK) | 2375 | if (dev_out->flags & IFF_LOOPBACK) |
2381 | flags |= RTCF_LOCAL; | 2376 | flags |= RTCF_LOCAL; |
2382 | 2377 | ||
2383 | /* get work reference to inet device */ | 2378 | in_dev = __in_dev_get_rcu(dev_out); |
2384 | in_dev = in_dev_get(dev_out); | ||
2385 | if (!in_dev) | 2379 | if (!in_dev) |
2386 | return -EINVAL; | 2380 | return -EINVAL; |
2387 | 2381 | ||
2388 | if (res->type == RTN_BROADCAST) { | 2382 | if (res->type == RTN_BROADCAST) { |
2389 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2390 | if (res->fi) { | 2384 | res->fi = NULL; |
2391 | fib_info_put(res->fi); | ||
2392 | res->fi = NULL; | ||
2393 | } | ||
2394 | } else if (res->type == RTN_MULTICAST) { | 2385 | } else if (res->type == RTN_MULTICAST) { |
2395 | flags |= RTCF_MULTICAST|RTCF_LOCAL; | 2386 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2396 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2387 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, |
2397 | oldflp->proto)) | 2388 | oldflp->proto)) |
2398 | flags &= ~RTCF_LOCAL; | 2389 | flags &= ~RTCF_LOCAL; |
2399 | /* If multicast route do not exist use | 2390 | /* If multicast route do not exist use |
2400 | default one, but do not gateway in this case. | 2391 | * default one, but do not gateway in this case. |
2401 | Yes, it is hack. | 2392 | * Yes, it is hack. |
2402 | */ | 2393 | */ |
2403 | if (res->fi && res->prefixlen < 4) { | 2394 | if (res->fi && res->prefixlen < 4) |
2404 | fib_info_put(res->fi); | ||
2405 | res->fi = NULL; | 2395 | res->fi = NULL; |
2406 | } | ||
2407 | } | 2396 | } |
2408 | 2397 | ||
2409 | 2398 | ||
2410 | rth = dst_alloc(&ipv4_dst_ops); | 2399 | rth = dst_alloc(&ipv4_dst_ops); |
2411 | if (!rth) { | 2400 | if (!rth) |
2412 | err = -ENOBUFS; | 2401 | return -ENOBUFS; |
2413 | goto cleanup; | 2402 | |
2414 | } | 2403 | in_dev_hold(in_dev); |
2404 | rth->idev = in_dev; | ||
2415 | 2405 | ||
2416 | atomic_set(&rth->dst.__refcnt, 1); | 2406 | atomic_set(&rth->dst.__refcnt, 1); |
2417 | rth->dst.flags= DST_HOST; | 2407 | rth->dst.flags= DST_HOST; |
@@ -2432,7 +2422,6 @@ static int __mkroute_output(struct rtable **result, | |||
2432 | cache entry */ | 2422 | cache entry */ |
2433 | rth->dst.dev = dev_out; | 2423 | rth->dst.dev = dev_out; |
2434 | dev_hold(dev_out); | 2424 | dev_hold(dev_out); |
2435 | rth->idev = in_dev_get(dev_out); | ||
2436 | rth->rt_gateway = fl->fl4_dst; | 2425 | rth->rt_gateway = fl->fl4_dst; |
2437 | rth->rt_spec_dst= fl->fl4_src; | 2426 | rth->rt_spec_dst= fl->fl4_src; |
2438 | 2427 | ||
@@ -2467,15 +2456,11 @@ static int __mkroute_output(struct rtable **result, | |||
2467 | rt_set_nexthop(rth, res, 0); | 2456 | rt_set_nexthop(rth, res, 0); |
2468 | 2457 | ||
2469 | rth->rt_flags = flags; | 2458 | rth->rt_flags = flags; |
2470 | |||
2471 | *result = rth; | 2459 | *result = rth; |
2472 | cleanup: | 2460 | return 0; |
2473 | /* release work reference to inet device */ | ||
2474 | in_dev_put(in_dev); | ||
2475 | |||
2476 | return err; | ||
2477 | } | 2461 | } |
2478 | 2462 | ||
2463 | /* called with rcu_read_lock() */ | ||
2479 | static int ip_mkroute_output(struct rtable **rp, | 2464 | static int ip_mkroute_output(struct rtable **rp, |
2480 | struct fib_result *res, | 2465 | struct fib_result *res, |
2481 | const struct flowi *fl, | 2466 | const struct flowi *fl, |
@@ -2497,6 +2482,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2497 | 2482 | ||
2498 | /* | 2483 | /* |
2499 | * Major route resolver routine. | 2484 | * Major route resolver routine. |
2485 | * called with rcu_read_lock(); | ||
2500 | */ | 2486 | */ |
2501 | 2487 | ||
2502 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2488 | static int ip_route_output_slow(struct net *net, struct rtable **rp, |
@@ -2515,9 +2501,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2515 | .iif = net->loopback_dev->ifindex, | 2501 | .iif = net->loopback_dev->ifindex, |
2516 | .oif = oldflp->oif }; | 2502 | .oif = oldflp->oif }; |
2517 | struct fib_result res; | 2503 | struct fib_result res; |
2518 | unsigned flags = 0; | 2504 | unsigned int flags = 0; |
2519 | struct net_device *dev_out = NULL; | 2505 | struct net_device *dev_out = NULL; |
2520 | int free_res = 0; | ||
2521 | int err; | 2506 | int err; |
2522 | 2507 | ||
2523 | 2508 | ||
@@ -2543,9 +2528,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2543 | 2528 | ||
2544 | if (oldflp->oif == 0 && | 2529 | if (oldflp->oif == 0 && |
2545 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2530 | (ipv4_is_multicast(oldflp->fl4_dst) || |
2546 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { | 2531 | ipv4_is_lbcast(oldflp->fl4_dst))) { |
2547 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2532 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2548 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2533 | dev_out = __ip_dev_find(net, oldflp->fl4_src, false); |
2549 | if (dev_out == NULL) | 2534 | if (dev_out == NULL) |
2550 | goto out; | 2535 | goto out; |
2551 | 2536 | ||
@@ -2570,29 +2555,24 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2570 | 2555 | ||
2571 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2556 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { |
2572 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2557 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2573 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2558 | if (!__ip_dev_find(net, oldflp->fl4_src, false)) |
2574 | if (dev_out == NULL) | ||
2575 | goto out; | 2559 | goto out; |
2576 | dev_put(dev_out); | ||
2577 | dev_out = NULL; | ||
2578 | } | 2560 | } |
2579 | } | 2561 | } |
2580 | 2562 | ||
2581 | 2563 | ||
2582 | if (oldflp->oif) { | 2564 | if (oldflp->oif) { |
2583 | dev_out = dev_get_by_index(net, oldflp->oif); | 2565 | dev_out = dev_get_by_index_rcu(net, oldflp->oif); |
2584 | err = -ENODEV; | 2566 | err = -ENODEV; |
2585 | if (dev_out == NULL) | 2567 | if (dev_out == NULL) |
2586 | goto out; | 2568 | goto out; |
2587 | 2569 | ||
2588 | /* RACE: Check return value of inet_select_addr instead. */ | 2570 | /* RACE: Check return value of inet_select_addr instead. */ |
2589 | if (__in_dev_get_rtnl(dev_out) == NULL) { | 2571 | if (rcu_dereference(dev_out->ip_ptr) == NULL) |
2590 | dev_put(dev_out); | ||
2591 | goto out; /* Wrong error code */ | 2572 | goto out; /* Wrong error code */ |
2592 | } | ||
2593 | 2573 | ||
2594 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2574 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || |
2595 | oldflp->fl4_dst == htonl(0xFFFFFFFF)) { | 2575 | ipv4_is_lbcast(oldflp->fl4_dst)) { |
2596 | if (!fl.fl4_src) | 2576 | if (!fl.fl4_src) |
2597 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2577 | fl.fl4_src = inet_select_addr(dev_out, 0, |
2598 | RT_SCOPE_LINK); | 2578 | RT_SCOPE_LINK); |
@@ -2612,10 +2592,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2612 | fl.fl4_dst = fl.fl4_src; | 2592 | fl.fl4_dst = fl.fl4_src; |
2613 | if (!fl.fl4_dst) | 2593 | if (!fl.fl4_dst) |
2614 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2594 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); |
2615 | if (dev_out) | ||
2616 | dev_put(dev_out); | ||
2617 | dev_out = net->loopback_dev; | 2595 | dev_out = net->loopback_dev; |
2618 | dev_hold(dev_out); | ||
2619 | fl.oif = net->loopback_dev->ifindex; | 2596 | fl.oif = net->loopback_dev->ifindex; |
2620 | res.type = RTN_LOCAL; | 2597 | res.type = RTN_LOCAL; |
2621 | flags |= RTCF_LOCAL; | 2598 | flags |= RTCF_LOCAL; |
@@ -2649,23 +2626,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2649 | res.type = RTN_UNICAST; | 2626 | res.type = RTN_UNICAST; |
2650 | goto make_route; | 2627 | goto make_route; |
2651 | } | 2628 | } |
2652 | if (dev_out) | ||
2653 | dev_put(dev_out); | ||
2654 | err = -ENETUNREACH; | 2629 | err = -ENETUNREACH; |
2655 | goto out; | 2630 | goto out; |
2656 | } | 2631 | } |
2657 | free_res = 1; | ||
2658 | 2632 | ||
2659 | if (res.type == RTN_LOCAL) { | 2633 | if (res.type == RTN_LOCAL) { |
2660 | if (!fl.fl4_src) | 2634 | if (!fl.fl4_src) |
2661 | fl.fl4_src = fl.fl4_dst; | 2635 | fl.fl4_src = fl.fl4_dst; |
2662 | if (dev_out) | ||
2663 | dev_put(dev_out); | ||
2664 | dev_out = net->loopback_dev; | 2636 | dev_out = net->loopback_dev; |
2665 | dev_hold(dev_out); | ||
2666 | fl.oif = dev_out->ifindex; | 2637 | fl.oif = dev_out->ifindex; |
2667 | if (res.fi) | ||
2668 | fib_info_put(res.fi); | ||
2669 | res.fi = NULL; | 2638 | res.fi = NULL; |
2670 | flags |= RTCF_LOCAL; | 2639 | flags |= RTCF_LOCAL; |
2671 | goto make_route; | 2640 | goto make_route; |
@@ -2682,28 +2651,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2682 | if (!fl.fl4_src) | 2651 | if (!fl.fl4_src) |
2683 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2652 | fl.fl4_src = FIB_RES_PREFSRC(res); |
2684 | 2653 | ||
2685 | if (dev_out) | ||
2686 | dev_put(dev_out); | ||
2687 | dev_out = FIB_RES_DEV(res); | 2654 | dev_out = FIB_RES_DEV(res); |
2688 | dev_hold(dev_out); | ||
2689 | fl.oif = dev_out->ifindex; | 2655 | fl.oif = dev_out->ifindex; |
2690 | 2656 | ||
2691 | 2657 | ||
2692 | make_route: | 2658 | make_route: |
2693 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2659 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); |
2694 | 2660 | ||
2695 | |||
2696 | if (free_res) | ||
2697 | fib_res_put(&res); | ||
2698 | if (dev_out) | ||
2699 | dev_put(dev_out); | ||
2700 | out: return err; | 2661 | out: return err; |
2701 | } | 2662 | } |
2702 | 2663 | ||
2703 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2664 | int __ip_route_output_key(struct net *net, struct rtable **rp, |
2704 | const struct flowi *flp) | 2665 | const struct flowi *flp) |
2705 | { | 2666 | { |
2706 | unsigned hash; | 2667 | unsigned int hash; |
2668 | int res; | ||
2707 | struct rtable *rth; | 2669 | struct rtable *rth; |
2708 | 2670 | ||
2709 | if (!rt_caching(net)) | 2671 | if (!rt_caching(net)) |
@@ -2734,7 +2696,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2734 | rcu_read_unlock_bh(); | 2696 | rcu_read_unlock_bh(); |
2735 | 2697 | ||
2736 | slow_output: | 2698 | slow_output: |
2737 | return ip_route_output_slow(net, rp, flp); | 2699 | rcu_read_lock(); |
2700 | res = ip_route_output_slow(net, rp, flp); | ||
2701 | rcu_read_unlock(); | ||
2702 | return res; | ||
2738 | } | 2703 | } |
2739 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2704 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2740 | 2705 | ||
@@ -2753,7 +2718,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2753 | .destroy = ipv4_dst_destroy, | 2718 | .destroy = ipv4_dst_destroy, |
2754 | .check = ipv4_blackhole_dst_check, | 2719 | .check = ipv4_blackhole_dst_check, |
2755 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2720 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2756 | .entries = ATOMIC_INIT(0), | ||
2757 | }; | 2721 | }; |
2758 | 2722 | ||
2759 | 2723 | ||
@@ -2798,7 +2762,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2798 | 2762 | ||
2799 | dst_release(&(*rp)->dst); | 2763 | dst_release(&(*rp)->dst); |
2800 | *rp = rt; | 2764 | *rp = rt; |
2801 | return (rt ? 0 : -ENOMEM); | 2765 | return rt ? 0 : -ENOMEM; |
2802 | } | 2766 | } |
2803 | 2767 | ||
2804 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | 2768 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, |
@@ -3323,6 +3287,12 @@ int __init ip_rt_init(void) | |||
3323 | 3287 | ||
3324 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; | 3288 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; |
3325 | 3289 | ||
3290 | if (dst_entries_init(&ipv4_dst_ops) < 0) | ||
3291 | panic("IP: failed to allocate ipv4_dst_ops counter\n"); | ||
3292 | |||
3293 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) | ||
3294 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | ||
3295 | |||
3326 | rt_hash_table = (struct rt_hash_bucket *) | 3296 | rt_hash_table = (struct rt_hash_bucket *) |
3327 | alloc_large_system_hash("IP route cache", | 3297 | alloc_large_system_hash("IP route cache", |
3328 | sizeof(struct rt_hash_bucket), | 3298 | sizeof(struct rt_hash_bucket), |