aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c190
1 files changed, 80 insertions, 110 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ac6559cb54f9..d6cb2bfcd8e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
159 .link_failure = ipv4_link_failure, 159 .link_failure = ipv4_link_failure,
160 .update_pmtu = ip_rt_update_pmtu, 160 .update_pmtu = ip_rt_update_pmtu,
161 .local_out = __ip_local_out, 161 .local_out = __ip_local_out,
162 .entries = ATOMIC_INIT(0),
163}; 162};
164 163
165#define ECN_OR_COST(class) TC_PRIO_##class 164#define ECN_OR_COST(class) TC_PRIO_##class
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
466 465
467 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 466 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
468 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 467 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
469 atomic_read(&ipv4_dst_ops.entries), 468 dst_entries_get_slow(&ipv4_dst_ops),
470 st->in_hit, 469 st->in_hit,
471 st->in_slow_tot, 470 st->in_slow_tot,
472 st->in_slow_mc, 471 st->in_slow_mc,
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
945 struct rtable *rth, **rthp; 944 struct rtable *rth, **rthp;
946 unsigned long now = jiffies; 945 unsigned long now = jiffies;
947 int goal; 946 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops);
948 948
949 /* 949 /*
950 * Garbage collection is pretty expensive, 950 * Garbage collection is pretty expensive,
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
954 RT_CACHE_STAT_INC(gc_total); 954 RT_CACHE_STAT_INC(gc_total);
955 955
956 if (now - last_gc < ip_rt_gc_min_interval && 956 if (now - last_gc < ip_rt_gc_min_interval &&
957 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 957 entries < ip_rt_max_size) {
958 RT_CACHE_STAT_INC(gc_ignored); 958 RT_CACHE_STAT_INC(gc_ignored);
959 goto out; 959 goto out;
960 } 960 }
961 961
962 entries = dst_entries_get_slow(&ipv4_dst_ops);
962 /* Calculate number of entries, which we want to expire now. */ 963 /* Calculate number of entries, which we want to expire now. */
963 goal = atomic_read(&ipv4_dst_ops.entries) - 964 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
964 (ip_rt_gc_elasticity << rt_hash_log);
965 if (goal <= 0) { 965 if (goal <= 0) {
966 if (equilibrium < ipv4_dst_ops.gc_thresh) 966 if (equilibrium < ipv4_dst_ops.gc_thresh)
967 equilibrium = ipv4_dst_ops.gc_thresh; 967 equilibrium = ipv4_dst_ops.gc_thresh;
968 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 968 goal = entries - equilibrium;
969 if (goal > 0) { 969 if (goal > 0) {
970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); 970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
971 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 971 goal = entries - equilibrium;
972 } 972 }
973 } else { 973 } else {
974 /* We are in dangerous area. Try to reduce cache really 974 /* We are in dangerous area. Try to reduce cache really
975 * aggressively. 975 * aggressively.
976 */ 976 */
977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); 977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
978 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 978 equilibrium = entries - goal;
979 } 979 }
980 980
981 if (now - last_gc >= ip_rt_gc_min_interval) 981 if (now - last_gc >= ip_rt_gc_min_interval)
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 expire >>= 1; 1032 expire >>= 1;
1033#if RT_CACHE_DEBUG >= 2 1033#if RT_CACHE_DEBUG >= 2
1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1035 atomic_read(&ipv4_dst_ops.entries), goal, i); 1035 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1036#endif 1036#endif
1037 1037
1038 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1038 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1039 goto out; 1039 goto out;
1040 } while (!in_softirq() && time_before_eq(jiffies, now)); 1040 } while (!in_softirq() && time_before_eq(jiffies, now));
1041 1041
1042 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1042 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out;
1044 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out; 1045 goto out;
1044 if (net_ratelimit()) 1046 if (net_ratelimit())
1045 printk(KERN_WARNING "dst cache overflow\n"); 1047 printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
1049work_done: 1051work_done:
1050 expire += ip_rt_gc_min_interval; 1052 expire += ip_rt_gc_min_interval;
1051 if (expire > ip_rt_gc_timeout || 1053 if (expire > ip_rt_gc_timeout ||
1052 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 1054 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1055 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1053 expire = ip_rt_gc_timeout; 1056 expire = ip_rt_gc_timeout;
1054#if RT_CACHE_DEBUG >= 2 1057#if RT_CACHE_DEBUG >= 2
1055 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 1058 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1056 atomic_read(&ipv4_dst_ops.entries), goal, rover); 1059 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1057#endif 1060#endif
1058out: return 0; 1061out: return 0;
1059} 1062}
@@ -1102,23 +1105,23 @@ restart:
1102 * Note that we do rt_free on this new route entry, so that 1105 * Note that we do rt_free on this new route entry, so that
1103 * once its refcount hits zero, we are still able to reap it 1106 * once its refcount hits zero, we are still able to reap it
1104 * (Thanks Alexey) 1107 * (Thanks Alexey)
1105 * Note also the rt_free uses call_rcu. We don't actually 1108 * Note: To avoid expensive rcu stuff for this uncached dst,
1106 * need rcu protection here, this is just our path to get 1109 * we set DST_NOCACHE so that dst_release() can free dst without
1107 * on the route gc list. 1110 * waiting a grace period.
1108 */ 1111 */
1109 1112
1113 rt->dst.flags |= DST_NOCACHE;
1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1114 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1111 int err = arp_bind_neighbour(&rt->dst); 1115 int err = arp_bind_neighbour(&rt->dst);
1112 if (err) { 1116 if (err) {
1113 if (net_ratelimit()) 1117 if (net_ratelimit())
1114 printk(KERN_WARNING 1118 printk(KERN_WARNING
1115 "Neighbour table failure & not caching routes.\n"); 1119 "Neighbour table failure & not caching routes.\n");
1116 rt_drop(rt); 1120 ip_rt_put(rt);
1117 return err; 1121 return err;
1118 } 1122 }
1119 } 1123 }
1120 1124
1121 rt_free(rt);
1122 goto skip_hashing; 1125 goto skip_hashing;
1123 } 1126 }
1124 1127
@@ -1268,18 +1271,11 @@ skip_hashing:
1268 1271
1269void rt_bind_peer(struct rtable *rt, int create) 1272void rt_bind_peer(struct rtable *rt, int create)
1270{ 1273{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1274 struct inet_peer *peer;
1273 1275
1274 peer = inet_getpeer(rt->rt_dst, create); 1276 peer = inet_getpeer(rt->rt_dst, create);
1275 1277
1276 spin_lock_bh(&rt_peer_lock); 1278 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1279 inet_putpeer(peer);
1284} 1280}
1285 1281
@@ -1779,12 +1775,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1779 1775
1780 if (rt->fl.iif == 0) 1776 if (rt->fl.iif == 0)
1781 src = rt->rt_src; 1777 src = rt->rt_src;
1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { 1778 else {
1783 src = FIB_RES_PREFSRC(res); 1779 rcu_read_lock();
1784 fib_res_put(&res); 1780 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
1785 } else 1781 src = FIB_RES_PREFSRC(res);
1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1782 else
1783 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1787 RT_SCOPE_UNIVERSE); 1784 RT_SCOPE_UNIVERSE);
1785 rcu_read_unlock();
1786 }
1788 memcpy(addr, &src, 4); 1787 memcpy(addr, &src, 4);
1789} 1788}
1790 1789
@@ -2087,6 +2086,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2087 * Such approach solves two big problems: 2086 * Such approach solves two big problems:
2088 * 1. Not simplex devices are handled properly. 2087 * 1. Not simplex devices are handled properly.
2089 * 2. IP spoofing attempts are filtered with 100% of guarantee. 2088 * 2. IP spoofing attempts are filtered with 100% of guarantee.
2089 * called with rcu_read_lock()
2090 */ 2090 */
2091 2091
2092static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2092static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2108,7 +2108,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2108 unsigned hash; 2108 unsigned hash;
2109 __be32 spec_dst; 2109 __be32 spec_dst;
2110 int err = -EINVAL; 2110 int err = -EINVAL;
2111 int free_res = 0;
2112 struct net * net = dev_net(dev); 2111 struct net * net = dev_net(dev);
2113 2112
2114 /* IP on this device is disabled. */ 2113 /* IP on this device is disabled. */
@@ -2124,7 +2123,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2124 ipv4_is_loopback(saddr)) 2123 ipv4_is_loopback(saddr))
2125 goto martian_source; 2124 goto martian_source;
2126 2125
2127 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) 2126 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
2128 goto brd_input; 2127 goto brd_input;
2129 2128
2130 /* Accept zero addresses only to limited broadcast; 2129 /* Accept zero addresses only to limited broadcast;
@@ -2133,19 +2132,18 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2133 if (ipv4_is_zeronet(saddr)) 2132 if (ipv4_is_zeronet(saddr))
2134 goto martian_source; 2133 goto martian_source;
2135 2134
2136 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) || 2135 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
2137 ipv4_is_loopback(daddr))
2138 goto martian_destination; 2136 goto martian_destination;
2139 2137
2140 /* 2138 /*
2141 * Now we are ready to route packet. 2139 * Now we are ready to route packet.
2142 */ 2140 */
2143 if ((err = fib_lookup(net, &fl, &res)) != 0) { 2141 err = fib_lookup(net, &fl, &res);
2142 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2143 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2144 goto e_hostunreach;
2146 goto no_route; 2145 goto no_route;
2147 } 2146 }
2148 free_res = 1;
2149 2147
2150 RT_CACHE_STAT_INC(in_slow_tot); 2148 RT_CACHE_STAT_INC(in_slow_tot);
2151 2149
@@ -2154,8 +2152,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2154 2152
2155 if (res.type == RTN_LOCAL) { 2153 if (res.type == RTN_LOCAL) {
2156 err = fib_validate_source(saddr, daddr, tos, 2154 err = fib_validate_source(saddr, daddr, tos,
2157 net->loopback_dev->ifindex, 2155 net->loopback_dev->ifindex,
2158 dev, &spec_dst, &itag, skb->mark); 2156 dev, &spec_dst, &itag, skb->mark);
2159 if (err < 0) 2157 if (err < 0)
2160 goto martian_source_keep_err; 2158 goto martian_source_keep_err;
2161 if (err) 2159 if (err)
@@ -2170,9 +2168,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2170 goto martian_destination; 2168 goto martian_destination;
2171 2169
2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2170 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2173done:
2174 if (free_res)
2175 fib_res_put(&res);
2176out: return err; 2171out: return err;
2177 2172
2178brd_input: 2173brd_input:
@@ -2232,7 +2227,7 @@ local_input:
2232 rth->rt_type = res.type; 2227 rth->rt_type = res.type;
2233 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2234 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2235 goto done; 2230 goto out;
2236 2231
2237no_route: 2232no_route:
2238 RT_CACHE_STAT_INC(in_no_route); 2233 RT_CACHE_STAT_INC(in_no_route);
@@ -2255,21 +2250,21 @@ martian_destination:
2255 2250
2256e_hostunreach: 2251e_hostunreach:
2257 err = -EHOSTUNREACH; 2252 err = -EHOSTUNREACH;
2258 goto done; 2253 goto out;
2259 2254
2260e_inval: 2255e_inval:
2261 err = -EINVAL; 2256 err = -EINVAL;
2262 goto done; 2257 goto out;
2263 2258
2264e_nobufs: 2259e_nobufs:
2265 err = -ENOBUFS; 2260 err = -ENOBUFS;
2266 goto done; 2261 goto out;
2267 2262
2268martian_source: 2263martian_source:
2269 err = -EINVAL; 2264 err = -EINVAL;
2270martian_source_keep_err: 2265martian_source_keep_err:
2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2266 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2272 goto done; 2267 goto out;
2273} 2268}
2274 2269
2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2270int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2355,6 +2350,7 @@ skip_cache:
2355} 2350}
2356EXPORT_SYMBOL(ip_route_input_common); 2351EXPORT_SYMBOL(ip_route_input_common);
2357 2352
2353/* called with rcu_read_lock() */
2358static int __mkroute_output(struct rtable **result, 2354static int __mkroute_output(struct rtable **result,
2359 struct fib_result *res, 2355 struct fib_result *res,
2360 const struct flowi *fl, 2356 const struct flowi *fl,
@@ -2365,53 +2361,47 @@ static int __mkroute_output(struct rtable **result,
2365 struct rtable *rth; 2361 struct rtable *rth;
2366 struct in_device *in_dev; 2362 struct in_device *in_dev;
2367 u32 tos = RT_FL_TOS(oldflp); 2363 u32 tos = RT_FL_TOS(oldflp);
2368 int err = 0;
2369 2364
2370 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2371 return -EINVAL; 2366 return -EINVAL;
2372 2367
2373 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2368 if (ipv4_is_lbcast(fl->fl4_dst))
2374 res->type = RTN_BROADCAST; 2369 res->type = RTN_BROADCAST;
2375 else if (ipv4_is_multicast(fl->fl4_dst)) 2370 else if (ipv4_is_multicast(fl->fl4_dst))
2376 res->type = RTN_MULTICAST; 2371 res->type = RTN_MULTICAST;
2377 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst)) 2372 else if (ipv4_is_zeronet(fl->fl4_dst))
2378 return -EINVAL; 2373 return -EINVAL;
2379 2374
2380 if (dev_out->flags & IFF_LOOPBACK) 2375 if (dev_out->flags & IFF_LOOPBACK)
2381 flags |= RTCF_LOCAL; 2376 flags |= RTCF_LOCAL;
2382 2377
2383 /* get work reference to inet device */ 2378 in_dev = __in_dev_get_rcu(dev_out);
2384 in_dev = in_dev_get(dev_out);
2385 if (!in_dev) 2379 if (!in_dev)
2386 return -EINVAL; 2380 return -EINVAL;
2387 2381
2388 if (res->type == RTN_BROADCAST) { 2382 if (res->type == RTN_BROADCAST) {
2389 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2383 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2390 if (res->fi) { 2384 res->fi = NULL;
2391 fib_info_put(res->fi);
2392 res->fi = NULL;
2393 }
2394 } else if (res->type == RTN_MULTICAST) { 2385 } else if (res->type == RTN_MULTICAST) {
2395 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2386 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2396 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2397 oldflp->proto)) 2388 oldflp->proto))
2398 flags &= ~RTCF_LOCAL; 2389 flags &= ~RTCF_LOCAL;
2399 /* If multicast route do not exist use 2390 /* If multicast route do not exist use
2400 default one, but do not gateway in this case. 2391 * default one, but do not gateway in this case.
2401 Yes, it is hack. 2392 * Yes, it is hack.
2402 */ 2393 */
2403 if (res->fi && res->prefixlen < 4) { 2394 if (res->fi && res->prefixlen < 4)
2404 fib_info_put(res->fi);
2405 res->fi = NULL; 2395 res->fi = NULL;
2406 }
2407 } 2396 }
2408 2397
2409 2398
2410 rth = dst_alloc(&ipv4_dst_ops); 2399 rth = dst_alloc(&ipv4_dst_ops);
2411 if (!rth) { 2400 if (!rth)
2412 err = -ENOBUFS; 2401 return -ENOBUFS;
2413 goto cleanup; 2402
2414 } 2403 in_dev_hold(in_dev);
2404 rth->idev = in_dev;
2415 2405
2416 atomic_set(&rth->dst.__refcnt, 1); 2406 atomic_set(&rth->dst.__refcnt, 1);
2417 rth->dst.flags= DST_HOST; 2407 rth->dst.flags= DST_HOST;
@@ -2432,7 +2422,6 @@ static int __mkroute_output(struct rtable **result,
2432 cache entry */ 2422 cache entry */
2433 rth->dst.dev = dev_out; 2423 rth->dst.dev = dev_out;
2434 dev_hold(dev_out); 2424 dev_hold(dev_out);
2435 rth->idev = in_dev_get(dev_out);
2436 rth->rt_gateway = fl->fl4_dst; 2425 rth->rt_gateway = fl->fl4_dst;
2437 rth->rt_spec_dst= fl->fl4_src; 2426 rth->rt_spec_dst= fl->fl4_src;
2438 2427
@@ -2467,15 +2456,11 @@ static int __mkroute_output(struct rtable **result,
2467 rt_set_nexthop(rth, res, 0); 2456 rt_set_nexthop(rth, res, 0);
2468 2457
2469 rth->rt_flags = flags; 2458 rth->rt_flags = flags;
2470
2471 *result = rth; 2459 *result = rth;
2472 cleanup: 2460 return 0;
2473 /* release work reference to inet device */
2474 in_dev_put(in_dev);
2475
2476 return err;
2477} 2461}
2478 2462
2463/* called with rcu_read_lock() */
2479static int ip_mkroute_output(struct rtable **rp, 2464static int ip_mkroute_output(struct rtable **rp,
2480 struct fib_result *res, 2465 struct fib_result *res,
2481 const struct flowi *fl, 2466 const struct flowi *fl,
@@ -2497,6 +2482,7 @@ static int ip_mkroute_output(struct rtable **rp,
2497 2482
2498/* 2483/*
2499 * Major route resolver routine. 2484 * Major route resolver routine.
2485 * called with rcu_read_lock();
2500 */ 2486 */
2501 2487
2502static int ip_route_output_slow(struct net *net, struct rtable **rp, 2488static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2515,9 +2501,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2515 .iif = net->loopback_dev->ifindex, 2501 .iif = net->loopback_dev->ifindex,
2516 .oif = oldflp->oif }; 2502 .oif = oldflp->oif };
2517 struct fib_result res; 2503 struct fib_result res;
2518 unsigned flags = 0; 2504 unsigned int flags = 0;
2519 struct net_device *dev_out = NULL; 2505 struct net_device *dev_out = NULL;
2520 int free_res = 0;
2521 int err; 2506 int err;
2522 2507
2523 2508
@@ -2543,9 +2528,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 2528
2544 if (oldflp->oif == 0 && 2529 if (oldflp->oif == 0 &&
2545 (ipv4_is_multicast(oldflp->fl4_dst) || 2530 (ipv4_is_multicast(oldflp->fl4_dst) ||
2546 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2531 ipv4_is_lbcast(oldflp->fl4_dst))) {
2547 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2532 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2548 dev_out = ip_dev_find(net, oldflp->fl4_src); 2533 dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
2549 if (dev_out == NULL) 2534 if (dev_out == NULL)
2550 goto out; 2535 goto out;
2551 2536
@@ -2570,29 +2555,24 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2570 2555
2571 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2556 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2572 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2557 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2573 dev_out = ip_dev_find(net, oldflp->fl4_src); 2558 if (!__ip_dev_find(net, oldflp->fl4_src, false))
2574 if (dev_out == NULL)
2575 goto out; 2559 goto out;
2576 dev_put(dev_out);
2577 dev_out = NULL;
2578 } 2560 }
2579 } 2561 }
2580 2562
2581 2563
2582 if (oldflp->oif) { 2564 if (oldflp->oif) {
2583 dev_out = dev_get_by_index(net, oldflp->oif); 2565 dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2584 err = -ENODEV; 2566 err = -ENODEV;
2585 if (dev_out == NULL) 2567 if (dev_out == NULL)
2586 goto out; 2568 goto out;
2587 2569
2588 /* RACE: Check return value of inet_select_addr instead. */ 2570 /* RACE: Check return value of inet_select_addr instead. */
2589 if (__in_dev_get_rtnl(dev_out) == NULL) { 2571 if (rcu_dereference(dev_out->ip_ptr) == NULL)
2590 dev_put(dev_out);
2591 goto out; /* Wrong error code */ 2572 goto out; /* Wrong error code */
2592 }
2593 2573
2594 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2574 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2595 oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2575 ipv4_is_lbcast(oldflp->fl4_dst)) {
2596 if (!fl.fl4_src) 2576 if (!fl.fl4_src)
2597 fl.fl4_src = inet_select_addr(dev_out, 0, 2577 fl.fl4_src = inet_select_addr(dev_out, 0,
2598 RT_SCOPE_LINK); 2578 RT_SCOPE_LINK);
@@ -2612,10 +2592,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2612 fl.fl4_dst = fl.fl4_src; 2592 fl.fl4_dst = fl.fl4_src;
2613 if (!fl.fl4_dst) 2593 if (!fl.fl4_dst)
2614 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2594 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2615 if (dev_out)
2616 dev_put(dev_out);
2617 dev_out = net->loopback_dev; 2595 dev_out = net->loopback_dev;
2618 dev_hold(dev_out);
2619 fl.oif = net->loopback_dev->ifindex; 2596 fl.oif = net->loopback_dev->ifindex;
2620 res.type = RTN_LOCAL; 2597 res.type = RTN_LOCAL;
2621 flags |= RTCF_LOCAL; 2598 flags |= RTCF_LOCAL;
@@ -2649,23 +2626,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2649 res.type = RTN_UNICAST; 2626 res.type = RTN_UNICAST;
2650 goto make_route; 2627 goto make_route;
2651 } 2628 }
2652 if (dev_out)
2653 dev_put(dev_out);
2654 err = -ENETUNREACH; 2629 err = -ENETUNREACH;
2655 goto out; 2630 goto out;
2656 } 2631 }
2657 free_res = 1;
2658 2632
2659 if (res.type == RTN_LOCAL) { 2633 if (res.type == RTN_LOCAL) {
2660 if (!fl.fl4_src) 2634 if (!fl.fl4_src)
2661 fl.fl4_src = fl.fl4_dst; 2635 fl.fl4_src = fl.fl4_dst;
2662 if (dev_out)
2663 dev_put(dev_out);
2664 dev_out = net->loopback_dev; 2636 dev_out = net->loopback_dev;
2665 dev_hold(dev_out);
2666 fl.oif = dev_out->ifindex; 2637 fl.oif = dev_out->ifindex;
2667 if (res.fi)
2668 fib_info_put(res.fi);
2669 res.fi = NULL; 2638 res.fi = NULL;
2670 flags |= RTCF_LOCAL; 2639 flags |= RTCF_LOCAL;
2671 goto make_route; 2640 goto make_route;
@@ -2682,28 +2651,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2682 if (!fl.fl4_src) 2651 if (!fl.fl4_src)
2683 fl.fl4_src = FIB_RES_PREFSRC(res); 2652 fl.fl4_src = FIB_RES_PREFSRC(res);
2684 2653
2685 if (dev_out)
2686 dev_put(dev_out);
2687 dev_out = FIB_RES_DEV(res); 2654 dev_out = FIB_RES_DEV(res);
2688 dev_hold(dev_out);
2689 fl.oif = dev_out->ifindex; 2655 fl.oif = dev_out->ifindex;
2690 2656
2691 2657
2692make_route: 2658make_route:
2693 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2659 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2694 2660
2695
2696 if (free_res)
2697 fib_res_put(&res);
2698 if (dev_out)
2699 dev_put(dev_out);
2700out: return err; 2661out: return err;
2701} 2662}
2702 2663
2703int __ip_route_output_key(struct net *net, struct rtable **rp, 2664int __ip_route_output_key(struct net *net, struct rtable **rp,
2704 const struct flowi *flp) 2665 const struct flowi *flp)
2705{ 2666{
2706 unsigned hash; 2667 unsigned int hash;
2668 int res;
2707 struct rtable *rth; 2669 struct rtable *rth;
2708 2670
2709 if (!rt_caching(net)) 2671 if (!rt_caching(net))
@@ -2734,7 +2696,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2734 rcu_read_unlock_bh(); 2696 rcu_read_unlock_bh();
2735 2697
2736slow_output: 2698slow_output:
2737 return ip_route_output_slow(net, rp, flp); 2699 rcu_read_lock();
2700 res = ip_route_output_slow(net, rp, flp);
2701 rcu_read_unlock();
2702 return res;
2738} 2703}
2739EXPORT_SYMBOL_GPL(__ip_route_output_key); 2704EXPORT_SYMBOL_GPL(__ip_route_output_key);
2740 2705
@@ -2753,7 +2718,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2753 .destroy = ipv4_dst_destroy, 2718 .destroy = ipv4_dst_destroy,
2754 .check = ipv4_blackhole_dst_check, 2719 .check = ipv4_blackhole_dst_check,
2755 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2720 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2756 .entries = ATOMIC_INIT(0),
2757}; 2721};
2758 2722
2759 2723
@@ -2798,7 +2762,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2798 2762
2799 dst_release(&(*rp)->dst); 2763 dst_release(&(*rp)->dst);
2800 *rp = rt; 2764 *rp = rt;
2801 return (rt ? 0 : -ENOMEM); 2765 return rt ? 0 : -ENOMEM;
2802} 2766}
2803 2767
2804int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2768int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
@@ -3323,6 +3287,12 @@ int __init ip_rt_init(void)
3323 3287
3324 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 3288 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3325 3289
3290 if (dst_entries_init(&ipv4_dst_ops) < 0)
3291 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3292
3293 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3294 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3295
3326 rt_hash_table = (struct rt_hash_bucket *) 3296 rt_hash_table = (struct rt_hash_bucket *)
3327 alloc_large_system_hash("IP route cache", 3297 alloc_large_system_hash("IP route cache",
3328 sizeof(struct rt_hash_bucket), 3298 sizeof(struct rt_hash_bucket),