diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 90 |
1 files changed, 66 insertions, 24 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 28205e5bfa9b..278f46f5011b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -131,8 +131,8 @@ static int ip_rt_min_advmss __read_mostly = 256; | |||
131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | 131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; |
132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
133 | 133 | ||
134 | static void rt_worker_func(struct work_struct *work); | 134 | static struct delayed_work expires_work; |
135 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 135 | static unsigned long expires_ljiffies; |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * Interface to generic destination cache. | 138 | * Interface to generic destination cache. |
@@ -787,9 +787,12 @@ static void rt_check_expire(void) | |||
787 | struct rtable *rth, *aux, **rthp; | 787 | struct rtable *rth, *aux, **rthp; |
788 | unsigned long samples = 0; | 788 | unsigned long samples = 0; |
789 | unsigned long sum = 0, sum2 = 0; | 789 | unsigned long sum = 0, sum2 = 0; |
790 | unsigned long delta; | ||
790 | u64 mult; | 791 | u64 mult; |
791 | 792 | ||
792 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; | 793 | delta = jiffies - expires_ljiffies; |
794 | expires_ljiffies = jiffies; | ||
795 | mult = ((u64)delta) << rt_hash_log; | ||
793 | if (ip_rt_gc_timeout > 1) | 796 | if (ip_rt_gc_timeout > 1) |
794 | do_div(mult, ip_rt_gc_timeout); | 797 | do_div(mult, ip_rt_gc_timeout); |
795 | goal = (unsigned int)mult; | 798 | goal = (unsigned int)mult; |
@@ -1064,7 +1067,8 @@ work_done: | |||
1064 | out: return 0; | 1067 | out: return 0; |
1065 | } | 1068 | } |
1066 | 1069 | ||
1067 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) | 1070 | static int rt_intern_hash(unsigned hash, struct rtable *rt, |
1071 | struct rtable **rp, struct sk_buff *skb) | ||
1068 | { | 1072 | { |
1069 | struct rtable *rth, **rthp; | 1073 | struct rtable *rth, **rthp; |
1070 | unsigned long now; | 1074 | unsigned long now; |
@@ -1081,8 +1085,35 @@ restart: | |||
1081 | now = jiffies; | 1085 | now = jiffies; |
1082 | 1086 | ||
1083 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1087 | if (!rt_caching(dev_net(rt->u.dst.dev))) { |
1084 | rt_drop(rt); | 1088 | /* |
1085 | return 0; | 1089 | * If we're not caching, just tell the caller we |
1090 | * were successful and don't touch the route. The | ||
1091 | * caller hold the sole reference to the cache entry, and | ||
1092 | * it will be released when the caller is done with it. | ||
1093 | * If we drop it here, the callers have no way to resolve routes | ||
1094 | * when we're not caching. Instead, just point *rp at rt, so | ||
1095 | * the caller gets a single use out of the route | ||
1096 | * Note that we do rt_free on this new route entry, so that | ||
1097 | * once its refcount hits zero, we are still able to reap it | ||
1098 | * (Thanks Alexey) | ||
1099 | * Note also the rt_free uses call_rcu. We don't actually | ||
1100 | * need rcu protection here, this is just our path to get | ||
1101 | * on the route gc list. | ||
1102 | */ | ||
1103 | |||
1104 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | ||
1105 | int err = arp_bind_neighbour(&rt->u.dst); | ||
1106 | if (err) { | ||
1107 | if (net_ratelimit()) | ||
1108 | printk(KERN_WARNING | ||
1109 | "Neighbour table failure & not caching routes.\n"); | ||
1110 | rt_drop(rt); | ||
1111 | return err; | ||
1112 | } | ||
1113 | } | ||
1114 | |||
1115 | rt_free(rt); | ||
1116 | goto skip_hashing; | ||
1086 | } | 1117 | } |
1087 | 1118 | ||
1088 | rthp = &rt_hash_table[hash].chain; | 1119 | rthp = &rt_hash_table[hash].chain; |
@@ -1114,7 +1145,10 @@ restart: | |||
1114 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1145 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1115 | 1146 | ||
1116 | rt_drop(rt); | 1147 | rt_drop(rt); |
1117 | *rp = rth; | 1148 | if (rp) |
1149 | *rp = rth; | ||
1150 | else | ||
1151 | skb_dst_set(skb, &rth->u.dst); | ||
1118 | return 0; | 1152 | return 0; |
1119 | } | 1153 | } |
1120 | 1154 | ||
@@ -1196,7 +1230,8 @@ restart: | |||
1196 | #if RT_CACHE_DEBUG >= 2 | 1230 | #if RT_CACHE_DEBUG >= 2 |
1197 | if (rt->u.dst.rt_next) { | 1231 | if (rt->u.dst.rt_next) { |
1198 | struct rtable *trt; | 1232 | struct rtable *trt; |
1199 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); | 1233 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", |
1234 | hash, &rt->rt_dst); | ||
1200 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1235 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) |
1201 | printk(" . %pI4", &trt->rt_dst); | 1236 | printk(" . %pI4", &trt->rt_dst); |
1202 | printk("\n"); | 1237 | printk("\n"); |
@@ -1210,7 +1245,12 @@ restart: | |||
1210 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | 1245 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); |
1211 | 1246 | ||
1212 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1247 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1213 | *rp = rt; | 1248 | |
1249 | skip_hashing: | ||
1250 | if (rp) | ||
1251 | *rp = rt; | ||
1252 | else | ||
1253 | skb_dst_set(skb, &rt->u.dst); | ||
1214 | return 0; | 1254 | return 0; |
1215 | } | 1255 | } |
1216 | 1256 | ||
@@ -1407,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1407 | &netevent); | 1447 | &netevent); |
1408 | 1448 | ||
1409 | rt_del(hash, rth); | 1449 | rt_del(hash, rth); |
1410 | if (!rt_intern_hash(hash, rt, &rt)) | 1450 | if (!rt_intern_hash(hash, rt, &rt, NULL)) |
1411 | ip_rt_put(rt); | 1451 | ip_rt_put(rt); |
1412 | goto do_next; | 1452 | goto do_next; |
1413 | } | 1453 | } |
@@ -1473,7 +1513,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1473 | 1513 | ||
1474 | void ip_rt_send_redirect(struct sk_buff *skb) | 1514 | void ip_rt_send_redirect(struct sk_buff *skb) |
1475 | { | 1515 | { |
1476 | struct rtable *rt = skb->rtable; | 1516 | struct rtable *rt = skb_rtable(skb); |
1477 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); | 1517 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); |
1478 | 1518 | ||
1479 | if (!in_dev) | 1519 | if (!in_dev) |
@@ -1521,7 +1561,7 @@ out: | |||
1521 | 1561 | ||
1522 | static int ip_error(struct sk_buff *skb) | 1562 | static int ip_error(struct sk_buff *skb) |
1523 | { | 1563 | { |
1524 | struct rtable *rt = skb->rtable; | 1564 | struct rtable *rt = skb_rtable(skb); |
1525 | unsigned long now; | 1565 | unsigned long now; |
1526 | int code; | 1566 | int code; |
1527 | 1567 | ||
@@ -1698,7 +1738,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1698 | 1738 | ||
1699 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1739 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1700 | 1740 | ||
1701 | rt = skb->rtable; | 1741 | rt = skb_rtable(skb); |
1702 | if (rt) | 1742 | if (rt) |
1703 | dst_set_expires(&rt->u.dst, 0); | 1743 | dst_set_expires(&rt->u.dst, 0); |
1704 | } | 1744 | } |
@@ -1858,7 +1898,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1858 | 1898 | ||
1859 | in_dev_put(in_dev); | 1899 | in_dev_put(in_dev); |
1860 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1900 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1861 | return rt_intern_hash(hash, rth, &skb->rtable); | 1901 | return rt_intern_hash(hash, rth, NULL, skb); |
1862 | 1902 | ||
1863 | e_nobufs: | 1903 | e_nobufs: |
1864 | in_dev_put(in_dev); | 1904 | in_dev_put(in_dev); |
@@ -2019,7 +2059,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2019 | /* put it into the cache */ | 2059 | /* put it into the cache */ |
2020 | hash = rt_hash(daddr, saddr, fl->iif, | 2060 | hash = rt_hash(daddr, saddr, fl->iif, |
2021 | rt_genid(dev_net(rth->u.dst.dev))); | 2061 | rt_genid(dev_net(rth->u.dst.dev))); |
2022 | return rt_intern_hash(hash, rth, &skb->rtable); | 2062 | return rt_intern_hash(hash, rth, NULL, skb); |
2023 | } | 2063 | } |
2024 | 2064 | ||
2025 | /* | 2065 | /* |
@@ -2175,7 +2215,7 @@ local_input: | |||
2175 | } | 2215 | } |
2176 | rth->rt_type = res.type; | 2216 | rth->rt_type = res.type; |
2177 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2217 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
2178 | err = rt_intern_hash(hash, rth, &skb->rtable); | 2218 | err = rt_intern_hash(hash, rth, NULL, skb); |
2179 | goto done; | 2219 | goto done; |
2180 | 2220 | ||
2181 | no_route: | 2221 | no_route: |
@@ -2244,7 +2284,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2244 | dst_use(&rth->u.dst, jiffies); | 2284 | dst_use(&rth->u.dst, jiffies); |
2245 | RT_CACHE_STAT_INC(in_hit); | 2285 | RT_CACHE_STAT_INC(in_hit); |
2246 | rcu_read_unlock(); | 2286 | rcu_read_unlock(); |
2247 | skb->rtable = rth; | 2287 | skb_dst_set(skb, &rth->u.dst); |
2248 | return 0; | 2288 | return 0; |
2249 | } | 2289 | } |
2250 | RT_CACHE_STAT_INC(in_hlist_search); | 2290 | RT_CACHE_STAT_INC(in_hlist_search); |
@@ -2420,7 +2460,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2420 | if (err == 0) { | 2460 | if (err == 0) { |
2421 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | 2461 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
2422 | rt_genid(dev_net(dev_out))); | 2462 | rt_genid(dev_net(dev_out))); |
2423 | err = rt_intern_hash(hash, rth, rp); | 2463 | err = rt_intern_hash(hash, rth, rp, NULL); |
2424 | } | 2464 | } |
2425 | 2465 | ||
2426 | return err; | 2466 | return err; |
@@ -2763,7 +2803,7 @@ static int rt_fill_info(struct net *net, | |||
2763 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2803 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
2764 | int nowait, unsigned int flags) | 2804 | int nowait, unsigned int flags) |
2765 | { | 2805 | { |
2766 | struct rtable *rt = skb->rtable; | 2806 | struct rtable *rt = skb_rtable(skb); |
2767 | struct rtmsg *r; | 2807 | struct rtmsg *r; |
2768 | struct nlmsghdr *nlh; | 2808 | struct nlmsghdr *nlh; |
2769 | long expires; | 2809 | long expires; |
@@ -2907,7 +2947,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2907 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2947 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
2908 | local_bh_enable(); | 2948 | local_bh_enable(); |
2909 | 2949 | ||
2910 | rt = skb->rtable; | 2950 | rt = skb_rtable(skb); |
2911 | if (err == 0 && rt->u.dst.error) | 2951 | if (err == 0 && rt->u.dst.error) |
2912 | err = -rt->u.dst.error; | 2952 | err = -rt->u.dst.error; |
2913 | } else { | 2953 | } else { |
@@ -2927,7 +2967,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2927 | if (err) | 2967 | if (err) |
2928 | goto errout_free; | 2968 | goto errout_free; |
2929 | 2969 | ||
2930 | skb->rtable = rt; | 2970 | skb_dst_set(skb, &rt->u.dst); |
2931 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2971 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
2932 | rt->rt_flags |= RTCF_NOTIFY; | 2972 | rt->rt_flags |= RTCF_NOTIFY; |
2933 | 2973 | ||
@@ -2968,15 +3008,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2968 | continue; | 3008 | continue; |
2969 | if (rt_is_expired(rt)) | 3009 | if (rt_is_expired(rt)) |
2970 | continue; | 3010 | continue; |
2971 | skb->dst = dst_clone(&rt->u.dst); | 3011 | skb_dst_set(skb, dst_clone(&rt->u.dst)); |
2972 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | 3012 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
2973 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 3013 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
2974 | 1, NLM_F_MULTI) <= 0) { | 3014 | 1, NLM_F_MULTI) <= 0) { |
2975 | dst_release(xchg(&skb->dst, NULL)); | 3015 | skb_dst_drop(skb); |
2976 | rcu_read_unlock_bh(); | 3016 | rcu_read_unlock_bh(); |
2977 | goto done; | 3017 | goto done; |
2978 | } | 3018 | } |
2979 | dst_release(xchg(&skb->dst, NULL)); | 3019 | skb_dst_drop(skb); |
2980 | } | 3020 | } |
2981 | rcu_read_unlock_bh(); | 3021 | rcu_read_unlock_bh(); |
2982 | } | 3022 | } |
@@ -3390,6 +3430,8 @@ int __init ip_rt_init(void) | |||
3390 | /* All the timers, started at system startup tend | 3430 | /* All the timers, started at system startup tend |
3391 | to synchronize. Perturb it a bit. | 3431 | to synchronize. Perturb it a bit. |
3392 | */ | 3432 | */ |
3433 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3434 | expires_ljiffies = jiffies; | ||
3393 | schedule_delayed_work(&expires_work, | 3435 | schedule_delayed_work(&expires_work, |
3394 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | 3436 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); |
3395 | 3437 | ||