aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c90
1 files changed, 66 insertions, 24 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 28205e5bfa9b..278f46f5011b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -131,8 +131,8 @@ static int ip_rt_min_advmss __read_mostly = 256;
131static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; 131static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
132static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
133 133
134static void rt_worker_func(struct work_struct *work); 134static struct delayed_work expires_work;
135static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 135static unsigned long expires_ljiffies;
136 136
137/* 137/*
138 * Interface to generic destination cache. 138 * Interface to generic destination cache.
@@ -787,9 +787,12 @@ static void rt_check_expire(void)
787 struct rtable *rth, *aux, **rthp; 787 struct rtable *rth, *aux, **rthp;
788 unsigned long samples = 0; 788 unsigned long samples = 0;
789 unsigned long sum = 0, sum2 = 0; 789 unsigned long sum = 0, sum2 = 0;
790 unsigned long delta;
790 u64 mult; 791 u64 mult;
791 792
792 mult = ((u64)ip_rt_gc_interval) << rt_hash_log; 793 delta = jiffies - expires_ljiffies;
794 expires_ljiffies = jiffies;
795 mult = ((u64)delta) << rt_hash_log;
793 if (ip_rt_gc_timeout > 1) 796 if (ip_rt_gc_timeout > 1)
794 do_div(mult, ip_rt_gc_timeout); 797 do_div(mult, ip_rt_gc_timeout);
795 goal = (unsigned int)mult; 798 goal = (unsigned int)mult;
@@ -1064,7 +1067,8 @@ work_done:
1064out: return 0; 1067out: return 0;
1065} 1068}
1066 1069
1067static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 1070static int rt_intern_hash(unsigned hash, struct rtable *rt,
1071 struct rtable **rp, struct sk_buff *skb)
1068{ 1072{
1069 struct rtable *rth, **rthp; 1073 struct rtable *rth, **rthp;
1070 unsigned long now; 1074 unsigned long now;
@@ -1081,8 +1085,35 @@ restart:
1081 now = jiffies; 1085 now = jiffies;
1082 1086
1083 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1087 if (!rt_caching(dev_net(rt->u.dst.dev))) {
1084 rt_drop(rt); 1088 /*
1085 return 0; 1089 * If we're not caching, just tell the caller we
1090 * were successful and don't touch the route. The
1091 * caller hold the sole reference to the cache entry, and
1092 * it will be released when the caller is done with it.
1093 * If we drop it here, the callers have no way to resolve routes
1094 * when we're not caching. Instead, just point *rp at rt, so
1095 * the caller gets a single use out of the route
1096 * Note that we do rt_free on this new route entry, so that
1097 * once its refcount hits zero, we are still able to reap it
1098 * (Thanks Alexey)
1099 * Note also the rt_free uses call_rcu. We don't actually
1100 * need rcu protection here, this is just our path to get
1101 * on the route gc list.
1102 */
1103
1104 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1105 int err = arp_bind_neighbour(&rt->u.dst);
1106 if (err) {
1107 if (net_ratelimit())
1108 printk(KERN_WARNING
1109 "Neighbour table failure & not caching routes.\n");
1110 rt_drop(rt);
1111 return err;
1112 }
1113 }
1114
1115 rt_free(rt);
1116 goto skip_hashing;
1086 } 1117 }
1087 1118
1088 rthp = &rt_hash_table[hash].chain; 1119 rthp = &rt_hash_table[hash].chain;
@@ -1114,7 +1145,10 @@ restart:
1114 spin_unlock_bh(rt_hash_lock_addr(hash)); 1145 spin_unlock_bh(rt_hash_lock_addr(hash));
1115 1146
1116 rt_drop(rt); 1147 rt_drop(rt);
1117 *rp = rth; 1148 if (rp)
1149 *rp = rth;
1150 else
1151 skb_dst_set(skb, &rth->u.dst);
1118 return 0; 1152 return 0;
1119 } 1153 }
1120 1154
@@ -1196,7 +1230,8 @@ restart:
1196#if RT_CACHE_DEBUG >= 2 1230#if RT_CACHE_DEBUG >= 2
1197 if (rt->u.dst.rt_next) { 1231 if (rt->u.dst.rt_next) {
1198 struct rtable *trt; 1232 struct rtable *trt;
1199 printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); 1233 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1234 hash, &rt->rt_dst);
1200 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1235 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1201 printk(" . %pI4", &trt->rt_dst); 1236 printk(" . %pI4", &trt->rt_dst);
1202 printk("\n"); 1237 printk("\n");
@@ -1210,7 +1245,12 @@ restart:
1210 rcu_assign_pointer(rt_hash_table[hash].chain, rt); 1245 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1211 1246
1212 spin_unlock_bh(rt_hash_lock_addr(hash)); 1247 spin_unlock_bh(rt_hash_lock_addr(hash));
1213 *rp = rt; 1248
1249skip_hashing:
1250 if (rp)
1251 *rp = rt;
1252 else
1253 skb_dst_set(skb, &rt->u.dst);
1214 return 0; 1254 return 0;
1215} 1255}
1216 1256
@@ -1407,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1407 &netevent); 1447 &netevent);
1408 1448
1409 rt_del(hash, rth); 1449 rt_del(hash, rth);
1410 if (!rt_intern_hash(hash, rt, &rt)) 1450 if (!rt_intern_hash(hash, rt, &rt, NULL))
1411 ip_rt_put(rt); 1451 ip_rt_put(rt);
1412 goto do_next; 1452 goto do_next;
1413 } 1453 }
@@ -1473,7 +1513,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1473 1513
1474void ip_rt_send_redirect(struct sk_buff *skb) 1514void ip_rt_send_redirect(struct sk_buff *skb)
1475{ 1515{
1476 struct rtable *rt = skb->rtable; 1516 struct rtable *rt = skb_rtable(skb);
1477 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1517 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1478 1518
1479 if (!in_dev) 1519 if (!in_dev)
@@ -1521,7 +1561,7 @@ out:
1521 1561
1522static int ip_error(struct sk_buff *skb) 1562static int ip_error(struct sk_buff *skb)
1523{ 1563{
1524 struct rtable *rt = skb->rtable; 1564 struct rtable *rt = skb_rtable(skb);
1525 unsigned long now; 1565 unsigned long now;
1526 int code; 1566 int code;
1527 1567
@@ -1698,7 +1738,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1698 1738
1699 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1739 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1700 1740
1701 rt = skb->rtable; 1741 rt = skb_rtable(skb);
1702 if (rt) 1742 if (rt)
1703 dst_set_expires(&rt->u.dst, 0); 1743 dst_set_expires(&rt->u.dst, 0);
1704} 1744}
@@ -1858,7 +1898,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1858 1898
1859 in_dev_put(in_dev); 1899 in_dev_put(in_dev);
1860 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1900 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1861 return rt_intern_hash(hash, rth, &skb->rtable); 1901 return rt_intern_hash(hash, rth, NULL, skb);
1862 1902
1863e_nobufs: 1903e_nobufs:
1864 in_dev_put(in_dev); 1904 in_dev_put(in_dev);
@@ -2019,7 +2059,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2019 /* put it into the cache */ 2059 /* put it into the cache */
2020 hash = rt_hash(daddr, saddr, fl->iif, 2060 hash = rt_hash(daddr, saddr, fl->iif,
2021 rt_genid(dev_net(rth->u.dst.dev))); 2061 rt_genid(dev_net(rth->u.dst.dev)));
2022 return rt_intern_hash(hash, rth, &skb->rtable); 2062 return rt_intern_hash(hash, rth, NULL, skb);
2023} 2063}
2024 2064
2025/* 2065/*
@@ -2175,7 +2215,7 @@ local_input:
2175 } 2215 }
2176 rth->rt_type = res.type; 2216 rth->rt_type = res.type;
2177 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2217 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2178 err = rt_intern_hash(hash, rth, &skb->rtable); 2218 err = rt_intern_hash(hash, rth, NULL, skb);
2179 goto done; 2219 goto done;
2180 2220
2181no_route: 2221no_route:
@@ -2244,7 +2284,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2244 dst_use(&rth->u.dst, jiffies); 2284 dst_use(&rth->u.dst, jiffies);
2245 RT_CACHE_STAT_INC(in_hit); 2285 RT_CACHE_STAT_INC(in_hit);
2246 rcu_read_unlock(); 2286 rcu_read_unlock();
2247 skb->rtable = rth; 2287 skb_dst_set(skb, &rth->u.dst);
2248 return 0; 2288 return 0;
2249 } 2289 }
2250 RT_CACHE_STAT_INC(in_hlist_search); 2290 RT_CACHE_STAT_INC(in_hlist_search);
@@ -2420,7 +2460,7 @@ static int ip_mkroute_output(struct rtable **rp,
2420 if (err == 0) { 2460 if (err == 0) {
2421 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, 2461 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2422 rt_genid(dev_net(dev_out))); 2462 rt_genid(dev_net(dev_out)));
2423 err = rt_intern_hash(hash, rth, rp); 2463 err = rt_intern_hash(hash, rth, rp, NULL);
2424 } 2464 }
2425 2465
2426 return err; 2466 return err;
@@ -2763,7 +2803,7 @@ static int rt_fill_info(struct net *net,
2763 struct sk_buff *skb, u32 pid, u32 seq, int event, 2803 struct sk_buff *skb, u32 pid, u32 seq, int event,
2764 int nowait, unsigned int flags) 2804 int nowait, unsigned int flags)
2765{ 2805{
2766 struct rtable *rt = skb->rtable; 2806 struct rtable *rt = skb_rtable(skb);
2767 struct rtmsg *r; 2807 struct rtmsg *r;
2768 struct nlmsghdr *nlh; 2808 struct nlmsghdr *nlh;
2769 long expires; 2809 long expires;
@@ -2907,7 +2947,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2907 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2947 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2908 local_bh_enable(); 2948 local_bh_enable();
2909 2949
2910 rt = skb->rtable; 2950 rt = skb_rtable(skb);
2911 if (err == 0 && rt->u.dst.error) 2951 if (err == 0 && rt->u.dst.error)
2912 err = -rt->u.dst.error; 2952 err = -rt->u.dst.error;
2913 } else { 2953 } else {
@@ -2927,7 +2967,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2927 if (err) 2967 if (err)
2928 goto errout_free; 2968 goto errout_free;
2929 2969
2930 skb->rtable = rt; 2970 skb_dst_set(skb, &rt->u.dst);
2931 if (rtm->rtm_flags & RTM_F_NOTIFY) 2971 if (rtm->rtm_flags & RTM_F_NOTIFY)
2932 rt->rt_flags |= RTCF_NOTIFY; 2972 rt->rt_flags |= RTCF_NOTIFY;
2933 2973
@@ -2968,15 +3008,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2968 continue; 3008 continue;
2969 if (rt_is_expired(rt)) 3009 if (rt_is_expired(rt))
2970 continue; 3010 continue;
2971 skb->dst = dst_clone(&rt->u.dst); 3011 skb_dst_set(skb, dst_clone(&rt->u.dst));
2972 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3012 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
2973 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3013 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2974 1, NLM_F_MULTI) <= 0) { 3014 1, NLM_F_MULTI) <= 0) {
2975 dst_release(xchg(&skb->dst, NULL)); 3015 skb_dst_drop(skb);
2976 rcu_read_unlock_bh(); 3016 rcu_read_unlock_bh();
2977 goto done; 3017 goto done;
2978 } 3018 }
2979 dst_release(xchg(&skb->dst, NULL)); 3019 skb_dst_drop(skb);
2980 } 3020 }
2981 rcu_read_unlock_bh(); 3021 rcu_read_unlock_bh();
2982 } 3022 }
@@ -3390,6 +3430,8 @@ int __init ip_rt_init(void)
3390 /* All the timers, started at system startup tend 3430 /* All the timers, started at system startup tend
3391 to synchronize. Perturb it a bit. 3431 to synchronize. Perturb it a bit.
3392 */ 3432 */
3433 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3434 expires_ljiffies = jiffies;
3393 schedule_delayed_work(&expires_work, 3435 schedule_delayed_work(&expires_work,
3394 net_random() % ip_rt_gc_interval + ip_rt_gc_interval); 3436 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3395 3437