aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c305
1 files changed, 147 insertions, 158 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b1050a5d874..cb562fdd9b9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -90,6 +90,7 @@
90#include <linux/jhash.h> 90#include <linux/jhash.h>
91#include <linux/rcupdate.h> 91#include <linux/rcupdate.h>
92#include <linux/times.h> 92#include <linux/times.h>
93#include <linux/slab.h>
93#include <net/dst.h> 94#include <net/dst.h>
94#include <net/net_namespace.h> 95#include <net/net_namespace.h>
95#include <net/protocol.h> 96#include <net/protocol.h>
@@ -146,7 +147,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
146static void ipv4_link_failure(struct sk_buff *skb); 147static void ipv4_link_failure(struct sk_buff *skb);
147static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 148static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
148static int rt_garbage_collect(struct dst_ops *ops); 149static int rt_garbage_collect(struct dst_ops *ops);
149static void rt_emergency_hash_rebuild(struct net *net);
150 150
151 151
152static struct dst_ops ipv4_dst_ops = { 152static struct dst_ops ipv4_dst_ops = {
@@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
287 if (!rt_hash_table[st->bucket].chain) 287 if (!rt_hash_table[st->bucket].chain)
288 continue; 288 continue;
289 rcu_read_lock_bh(); 289 rcu_read_lock_bh();
290 r = rcu_dereference(rt_hash_table[st->bucket].chain); 290 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
291 while (r) { 291 while (r) {
292 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 292 if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
293 r->rt_genid == st->genid) 293 r->rt_genid == st->genid)
294 return r; 294 return r;
295 r = rcu_dereference(r->u.dst.rt_next); 295 r = rcu_dereference_bh(r->u.dst.rt_next);
296 } 296 }
297 rcu_read_unlock_bh(); 297 rcu_read_unlock_bh();
298 } 298 }
@@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
314 rcu_read_lock_bh(); 314 rcu_read_lock_bh();
315 r = rt_hash_table[st->bucket].chain; 315 r = rt_hash_table[st->bucket].chain;
316 } 316 }
317 return rcu_dereference(r); 317 return rcu_dereference_bh(r);
318} 318}
319 319
320static struct rtable *rt_cache_get_next(struct seq_file *seq, 320static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -513,43 +513,42 @@ static const struct file_operations rt_cpu_seq_fops = {
513}; 513};
514 514
515#ifdef CONFIG_NET_CLS_ROUTE 515#ifdef CONFIG_NET_CLS_ROUTE
516static int ip_rt_acct_read(char *buffer, char **start, off_t offset, 516static int rt_acct_proc_show(struct seq_file *m, void *v)
517 int length, int *eof, void *data) 517{
518{ 518 struct ip_rt_acct *dst, *src;
519 unsigned int i; 519 unsigned int i, j;
520 520
521 if ((offset & 3) || (length & 3)) 521 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
522 return -EIO; 522 if (!dst)
523 523 return -ENOMEM;
524 if (offset >= sizeof(struct ip_rt_acct) * 256) { 524
525 *eof = 1; 525 for_each_possible_cpu(i) {
526 return 0; 526 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
527 } 527 for (j = 0; j < 256; j++) {
528 528 dst[j].o_bytes += src[j].o_bytes;
529 if (offset + length >= sizeof(struct ip_rt_acct) * 256) { 529 dst[j].o_packets += src[j].o_packets;
530 length = sizeof(struct ip_rt_acct) * 256 - offset; 530 dst[j].i_bytes += src[j].i_bytes;
531 *eof = 1; 531 dst[j].i_packets += src[j].i_packets;
532 }
532 } 533 }
533 534
534 offset /= sizeof(u32); 535 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
535 536 kfree(dst);
536 if (length > 0) { 537 return 0;
537 u32 *dst = (u32 *) buffer; 538}
538
539 *start = buffer;
540 memset(dst, 0, length);
541
542 for_each_possible_cpu(i) {
543 unsigned int j;
544 u32 *src;
545 539
546 src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset; 540static int rt_acct_proc_open(struct inode *inode, struct file *file)
547 for (j = 0; j < length/4; j++) 541{
548 dst[j] += src[j]; 542 return single_open(file, rt_acct_proc_show, NULL);
549 }
550 }
551 return length;
552} 543}
544
545static const struct file_operations rt_acct_proc_fops = {
546 .owner = THIS_MODULE,
547 .open = rt_acct_proc_open,
548 .read = seq_read,
549 .llseek = seq_lseek,
550 .release = single_release,
551};
553#endif 552#endif
554 553
555static int __net_init ip_rt_do_proc_init(struct net *net) 554static int __net_init ip_rt_do_proc_init(struct net *net)
@@ -567,8 +566,7 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 goto err2; 566 goto err2;
568 567
569#ifdef CONFIG_NET_CLS_ROUTE 568#ifdef CONFIG_NET_CLS_ROUTE
570 pde = create_proc_read_entry("rt_acct", 0, net->proc_net, 569 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
571 ip_rt_acct_read, NULL);
572 if (!pde) 570 if (!pde)
573 goto err3; 571 goto err3;
574#endif 572#endif
@@ -588,7 +586,9 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 586{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 587 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 588 remove_proc_entry("rt_cache", net->proc_net);
589#ifdef CONFIG_NET_CLS_ROUTE
591 remove_proc_entry("rt_acct", net->proc_net); 590 remove_proc_entry("rt_acct", net->proc_net);
591#endif
592} 592}
593 593
594static struct pernet_operations ip_rt_proc_ops __net_initdata = { 594static struct pernet_operations ip_rt_proc_ops __net_initdata = {
@@ -703,7 +703,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
703 703
704static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 704static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
705{ 705{
706 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); 706 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev));
707} 707}
708 708
709static inline int rt_is_expired(struct rtable *rth) 709static inline int rt_is_expired(struct rtable *rth)
@@ -780,11 +780,30 @@ static void rt_do_flush(int process_context)
780#define FRACT_BITS 3 780#define FRACT_BITS 3
781#define ONE (1UL << FRACT_BITS) 781#define ONE (1UL << FRACT_BITS)
782 782
783/*
784 * Given a hash chain and an item in this hash chain,
785 * find if a previous entry has the same hash_inputs
786 * (but differs on tos, mark or oif)
787 * Returns 0 if an alias is found.
788 * Returns ONE if rth has no alias before itself.
789 */
790static int has_noalias(const struct rtable *head, const struct rtable *rth)
791{
792 const struct rtable *aux = head;
793
794 while (aux != rth) {
795 if (compare_hash_inputs(&aux->fl, &rth->fl))
796 return 0;
797 aux = aux->u.dst.rt_next;
798 }
799 return ONE;
800}
801
783static void rt_check_expire(void) 802static void rt_check_expire(void)
784{ 803{
785 static unsigned int rover; 804 static unsigned int rover;
786 unsigned int i = rover, goal; 805 unsigned int i = rover, goal;
787 struct rtable *rth, *aux, **rthp; 806 struct rtable *rth, **rthp;
788 unsigned long samples = 0; 807 unsigned long samples = 0;
789 unsigned long sum = 0, sum2 = 0; 808 unsigned long sum = 0, sum2 = 0;
790 unsigned long delta; 809 unsigned long delta;
@@ -835,15 +854,7 @@ nofree:
835 * attributes don't unfairly skew 854 * attributes don't unfairly skew
836 * the length computation 855 * the length computation
837 */ 856 */
838 for (aux = rt_hash_table[i].chain;;) { 857 length += has_noalias(rt_hash_table[i].chain, rth);
839 if (aux == rth) {
840 length += ONE;
841 break;
842 }
843 if (compare_hash_inputs(&aux->fl, &rth->fl))
844 break;
845 aux = aux->u.dst.rt_next;
846 }
847 continue; 858 continue;
848 } 859 }
849 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) 860 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -902,6 +913,12 @@ void rt_cache_flush(struct net *net, int delay)
902 rt_do_flush(!in_softirq()); 913 rt_do_flush(!in_softirq());
903} 914}
904 915
916/* Flush previous cache invalidated entries from the cache */
917void rt_cache_flush_batch(void)
918{
919 rt_do_flush(!in_softirq());
920}
921
905/* 922/*
906 * We change rt_genid and let gc do the cleanup 923 * We change rt_genid and let gc do the cleanup
907 */ 924 */
@@ -916,10 +933,8 @@ static void rt_secret_rebuild_oneshot(struct net *net)
916{ 933{
917 del_timer_sync(&net->ipv4.rt_secret_timer); 934 del_timer_sync(&net->ipv4.rt_secret_timer);
918 rt_cache_invalidate(net); 935 rt_cache_invalidate(net);
919 if (ip_rt_secret_interval) { 936 if (ip_rt_secret_interval)
920 net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; 937 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
921 add_timer(&net->ipv4.rt_secret_timer);
922 }
923} 938}
924 939
925static void rt_emergency_hash_rebuild(struct net *net) 940static void rt_emergency_hash_rebuild(struct net *net)
@@ -1067,8 +1082,23 @@ work_done:
1067out: return 0; 1082out: return 0;
1068} 1083}
1069 1084
1085/*
1086 * Returns number of entries in a hash chain that have different hash_inputs
1087 */
1088static int slow_chain_length(const struct rtable *head)
1089{
1090 int length = 0;
1091 const struct rtable *rth = head;
1092
1093 while (rth) {
1094 length += has_noalias(head, rth);
1095 rth = rth->u.dst.rt_next;
1096 }
1097 return length >> FRACT_BITS;
1098}
1099
1070static int rt_intern_hash(unsigned hash, struct rtable *rt, 1100static int rt_intern_hash(unsigned hash, struct rtable *rt,
1071 struct rtable **rp, struct sk_buff *skb) 1101 struct rtable **rp, struct sk_buff *skb, int ifindex)
1072{ 1102{
1073 struct rtable *rth, **rthp; 1103 struct rtable *rth, **rthp;
1074 unsigned long now; 1104 unsigned long now;
@@ -1179,14 +1209,20 @@ restart:
1179 rt_free(cand); 1209 rt_free(cand);
1180 } 1210 }
1181 } else { 1211 } else {
1182 if (chain_length > rt_chain_length_max) { 1212 if (chain_length > rt_chain_length_max &&
1213 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1183 struct net *net = dev_net(rt->u.dst.dev); 1214 struct net *net = dev_net(rt->u.dst.dev);
1184 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1215 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1185 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1216 if (!rt_caching(net)) {
1186 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1217 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1187 rt->u.dst.dev->name, num); 1218 rt->u.dst.dev->name, num);
1188 } 1219 }
1189 rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); 1220 rt_emergency_hash_rebuild(net);
1221 spin_unlock_bh(rt_hash_lock_addr(hash));
1222
1223 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1224 ifindex, rt_genid(net));
1225 goto restart;
1190 } 1226 }
1191 } 1227 }
1192 1228
@@ -1346,9 +1382,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1346 return; 1382 return;
1347 1383
1348 net = dev_net(dev); 1384 net = dev_net(dev);
1349 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1385 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
1350 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) 1386 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
1351 || ipv4_is_zeronet(new_gw)) 1387 ipv4_is_zeronet(new_gw))
1352 goto reject_redirect; 1388 goto reject_redirect;
1353 1389
1354 if (!rt_caching(net)) 1390 if (!rt_caching(net))
@@ -1411,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1411 dev_hold(rt->u.dst.dev); 1447 dev_hold(rt->u.dst.dev);
1412 if (rt->idev) 1448 if (rt->idev)
1413 in_dev_hold(rt->idev); 1449 in_dev_hold(rt->idev);
1414 rt->u.dst.obsolete = 0; 1450 rt->u.dst.obsolete = -1;
1415 rt->u.dst.lastuse = jiffies; 1451 rt->u.dst.lastuse = jiffies;
1416 rt->u.dst.path = &rt->u.dst; 1452 rt->u.dst.path = &rt->u.dst;
1417 rt->u.dst.neighbour = NULL; 1453 rt->u.dst.neighbour = NULL;
@@ -1447,7 +1483,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1447 &netevent); 1483 &netevent);
1448 1484
1449 rt_del(hash, rth); 1485 rt_del(hash, rth);
1450 if (!rt_intern_hash(hash, rt, &rt, NULL)) 1486 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1451 ip_rt_put(rt); 1487 ip_rt_put(rt);
1452 goto do_next; 1488 goto do_next;
1453 } 1489 }
@@ -1476,11 +1512,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1476 struct dst_entry *ret = dst; 1512 struct dst_entry *ret = dst;
1477 1513
1478 if (rt) { 1514 if (rt) {
1479 if (dst->obsolete) { 1515 if (dst->obsolete > 0) {
1480 ip_rt_put(rt); 1516 ip_rt_put(rt);
1481 ret = NULL; 1517 ret = NULL;
1482 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1518 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1483 rt->u.dst.expires) { 1519 (rt->u.dst.expires &&
1520 time_after_eq(jiffies, rt->u.dst.expires))) {
1484 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1521 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1485 rt->fl.oif, 1522 rt->fl.oif,
1486 rt_genid(dev_net(dst->dev))); 1523 rt_genid(dev_net(dst->dev)));
@@ -1628,9 +1665,6 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1628 __be32 daddr = iph->daddr; 1665 __be32 daddr = iph->daddr;
1629 unsigned short est_mtu = 0; 1666 unsigned short est_mtu = 0;
1630 1667
1631 if (ipv4_config.no_pmtu_disc)
1632 return 0;
1633
1634 for (k = 0; k < 2; k++) { 1668 for (k = 0; k < 2; k++) {
1635 for (i = 0; i < 2; i++) { 1669 for (i = 0; i < 2; i++) {
1636 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1670 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
@@ -1699,7 +1733,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1699 1733
1700static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1734static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1701{ 1735{
1702 return NULL; 1736 if (rt_is_expired((struct rtable *)dst))
1737 return NULL;
1738 return dst;
1703} 1739}
1704 1740
1705static void ipv4_dst_destroy(struct dst_entry *dst) 1741static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1861,7 +1897,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1861 if (!rth) 1897 if (!rth)
1862 goto e_nobufs; 1898 goto e_nobufs;
1863 1899
1864 rth->u.dst.output= ip_rt_bug; 1900 rth->u.dst.output = ip_rt_bug;
1901 rth->u.dst.obsolete = -1;
1865 1902
1866 atomic_set(&rth->u.dst.__refcnt, 1); 1903 atomic_set(&rth->u.dst.__refcnt, 1);
1867 rth->u.dst.flags= DST_HOST; 1904 rth->u.dst.flags= DST_HOST;
@@ -1900,7 +1937,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1900 1937
1901 in_dev_put(in_dev); 1938 in_dev_put(in_dev);
1902 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1939 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1903 return rt_intern_hash(hash, rth, NULL, skb); 1940 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1904 1941
1905e_nobufs: 1942e_nobufs:
1906 in_dev_put(in_dev); 1943 in_dev_put(in_dev);
@@ -1987,8 +2024,13 @@ static int __mkroute_input(struct sk_buff *skb,
1987 if (skb->protocol != htons(ETH_P_IP)) { 2024 if (skb->protocol != htons(ETH_P_IP)) {
1988 /* Not IP (i.e. ARP). Do not create route, if it is 2025 /* Not IP (i.e. ARP). Do not create route, if it is
1989 * invalid for proxy arp. DNAT routes are always valid. 2026 * invalid for proxy arp. DNAT routes are always valid.
2027 *
2028 * Proxy arp feature have been extended to allow, ARP
2029 * replies back to the same interface, to support
2030 * Private VLAN switch technologies. See arp.c.
1990 */ 2031 */
1991 if (out_dev == in_dev) { 2032 if (out_dev == in_dev &&
2033 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1992 err = -EINVAL; 2034 err = -EINVAL;
1993 goto cleanup; 2035 goto cleanup;
1994 } 2036 }
@@ -2022,6 +2064,7 @@ static int __mkroute_input(struct sk_buff *skb,
2022 rth->fl.oif = 0; 2064 rth->fl.oif = 0;
2023 rth->rt_spec_dst= spec_dst; 2065 rth->rt_spec_dst= spec_dst;
2024 2066
2067 rth->u.dst.obsolete = -1;
2025 rth->u.dst.input = ip_forward; 2068 rth->u.dst.input = ip_forward;
2026 rth->u.dst.output = ip_output; 2069 rth->u.dst.output = ip_output;
2027 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2070 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
@@ -2061,7 +2104,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2061 /* put it into the cache */ 2104 /* put it into the cache */
2062 hash = rt_hash(daddr, saddr, fl->iif, 2105 hash = rt_hash(daddr, saddr, fl->iif,
2063 rt_genid(dev_net(rth->u.dst.dev))); 2106 rt_genid(dev_net(rth->u.dst.dev)));
2064 return rt_intern_hash(hash, rth, NULL, skb); 2107 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2065} 2108}
2066 2109
2067/* 2110/*
@@ -2186,6 +2229,7 @@ local_input:
2186 goto e_nobufs; 2229 goto e_nobufs;
2187 2230
2188 rth->u.dst.output= ip_rt_bug; 2231 rth->u.dst.output= ip_rt_bug;
2232 rth->u.dst.obsolete = -1;
2189 rth->rt_genid = rt_genid(net); 2233 rth->rt_genid = rt_genid(net);
2190 2234
2191 atomic_set(&rth->u.dst.__refcnt, 1); 2235 atomic_set(&rth->u.dst.__refcnt, 1);
@@ -2217,7 +2261,7 @@ local_input:
2217 } 2261 }
2218 rth->rt_type = res.type; 2262 rth->rt_type = res.type;
2219 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2263 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2220 err = rt_intern_hash(hash, rth, NULL, skb); 2264 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2221 goto done; 2265 goto done;
2222 2266
2223no_route: 2267no_route:
@@ -2314,10 +2358,11 @@ skip_cache:
2314 ip_hdr(skb)->protocol); 2358 ip_hdr(skb)->protocol);
2315 if (our 2359 if (our
2316#ifdef CONFIG_IP_MROUTE 2360#ifdef CONFIG_IP_MROUTE
2317 || (!ipv4_is_local_multicast(daddr) && 2361 ||
2318 IN_DEV_MFORWARD(in_dev)) 2362 (!ipv4_is_local_multicast(daddr) &&
2363 IN_DEV_MFORWARD(in_dev))
2319#endif 2364#endif
2320 ) { 2365 ) {
2321 rcu_read_unlock(); 2366 rcu_read_unlock();
2322 return ip_route_input_mc(skb, daddr, saddr, 2367 return ip_route_input_mc(skb, daddr, saddr,
2323 tos, dev, our); 2368 tos, dev, our);
@@ -2411,6 +2456,7 @@ static int __mkroute_output(struct rtable **result,
2411 rth->rt_spec_dst= fl->fl4_src; 2456 rth->rt_spec_dst= fl->fl4_src;
2412 2457
2413 rth->u.dst.output=ip_output; 2458 rth->u.dst.output=ip_output;
2459 rth->u.dst.obsolete = -1;
2414 rth->rt_genid = rt_genid(dev_net(dev_out)); 2460 rth->rt_genid = rt_genid(dev_net(dev_out));
2415 2461
2416 RT_CACHE_STAT_INC(out_slow_tot); 2462 RT_CACHE_STAT_INC(out_slow_tot);
@@ -2462,7 +2508,7 @@ static int ip_mkroute_output(struct rtable **rp,
2462 if (err == 0) { 2508 if (err == 0) {
2463 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, 2509 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2464 rt_genid(dev_net(dev_out))); 2510 rt_genid(dev_net(dev_out)));
2465 err = rt_intern_hash(hash, rth, rp, NULL); 2511 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2466 } 2512 }
2467 2513
2468 return err; 2514 return err;
@@ -2514,9 +2560,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2514 of another iface. --ANK 2560 of another iface. --ANK
2515 */ 2561 */
2516 2562
2517 if (oldflp->oif == 0 2563 if (oldflp->oif == 0 &&
2518 && (ipv4_is_multicast(oldflp->fl4_dst) || 2564 (ipv4_is_multicast(oldflp->fl4_dst) ||
2519 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2565 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2520 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2566 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2521 dev_out = ip_dev_find(net, oldflp->fl4_src); 2567 dev_out = ip_dev_find(net, oldflp->fl4_src);
2522 if (dev_out == NULL) 2568 if (dev_out == NULL)
@@ -2685,8 +2731,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2685 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2731 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
2686 2732
2687 rcu_read_lock_bh(); 2733 rcu_read_lock_bh();
2688 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2734 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2689 rth = rcu_dereference(rth->u.dst.rt_next)) { 2735 rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
2690 if (rth->fl.fl4_dst == flp->fl4_dst && 2736 if (rth->fl.fl4_dst == flp->fl4_dst &&
2691 rth->fl.fl4_src == flp->fl4_src && 2737 rth->fl.fl4_src == flp->fl4_src &&
2692 rth->fl.iif == 0 && 2738 rth->fl.iif == 0 &&
@@ -2855,7 +2901,7 @@ static int rt_fill_info(struct net *net,
2855 error = rt->u.dst.error; 2901 error = rt->u.dst.error;
2856 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2902 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
2857 if (rt->peer) { 2903 if (rt->peer) {
2858 id = rt->peer->ip_id_count; 2904 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2859 if (rt->peer->tcp_ts_stamp) { 2905 if (rt->peer->tcp_ts_stamp) {
2860 ts = rt->peer->tcp_ts; 2906 ts = rt->peer->tcp_ts;
2861 tsage = get_seconds() - rt->peer->tcp_ts_stamp; 2907 tsage = get_seconds() - rt->peer->tcp_ts_stamp;
@@ -3004,8 +3050,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3004 if (!rt_hash_table[h].chain) 3050 if (!rt_hash_table[h].chain)
3005 continue; 3051 continue;
3006 rcu_read_lock_bh(); 3052 rcu_read_lock_bh();
3007 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 3053 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3008 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 3054 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
3009 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3055 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
3010 continue; 3056 continue;
3011 if (rt_is_expired(rt)) 3057 if (rt_is_expired(rt))
@@ -3056,23 +3102,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
3056 return -EINVAL; 3102 return -EINVAL;
3057} 3103}
3058 3104
3059static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
3060 void __user *oldval,
3061 size_t __user *oldlenp,
3062 void __user *newval,
3063 size_t newlen)
3064{
3065 int delay;
3066 struct net *net;
3067 if (newlen != sizeof(int))
3068 return -EINVAL;
3069 if (get_user(delay, (int __user *)newval))
3070 return -EFAULT;
3071 net = (struct net *)table->extra1;
3072 rt_cache_flush(net, delay);
3073 return 0;
3074}
3075
3076static void rt_secret_reschedule(int old) 3105static void rt_secret_reschedule(int old)
3077{ 3106{
3078 struct net *net; 3107 struct net *net;
@@ -3085,22 +3114,20 @@ static void rt_secret_reschedule(int old)
3085 rtnl_lock(); 3114 rtnl_lock();
3086 for_each_net(net) { 3115 for_each_net(net) {
3087 int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); 3116 int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
3117 long time;
3088 3118
3089 if (!new) 3119 if (!new)
3090 continue; 3120 continue;
3091 3121
3092 if (deleted) { 3122 if (deleted) {
3093 long time = net->ipv4.rt_secret_timer.expires - jiffies; 3123 time = net->ipv4.rt_secret_timer.expires - jiffies;
3094 3124
3095 if (time <= 0 || (time += diff) <= 0) 3125 if (time <= 0 || (time += diff) <= 0)
3096 time = 0; 3126 time = 0;
3097
3098 net->ipv4.rt_secret_timer.expires = time;
3099 } else 3127 } else
3100 net->ipv4.rt_secret_timer.expires = new; 3128 time = new;
3101 3129
3102 net->ipv4.rt_secret_timer.expires += jiffies; 3130 mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
3103 add_timer(&net->ipv4.rt_secret_timer);
3104 } 3131 }
3105 rtnl_unlock(); 3132 rtnl_unlock();
3106} 3133}
@@ -3117,23 +3144,8 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
3117 return ret; 3144 return ret;
3118} 3145}
3119 3146
3120static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
3121 void __user *oldval,
3122 size_t __user *oldlenp,
3123 void __user *newval,
3124 size_t newlen)
3125{
3126 int old = ip_rt_secret_interval;
3127 int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen);
3128
3129 rt_secret_reschedule(old);
3130
3131 return ret;
3132}
3133
3134static ctl_table ipv4_route_table[] = { 3147static ctl_table ipv4_route_table[] = {
3135 { 3148 {
3136 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
3137 .procname = "gc_thresh", 3149 .procname = "gc_thresh",
3138 .data = &ipv4_dst_ops.gc_thresh, 3150 .data = &ipv4_dst_ops.gc_thresh,
3139 .maxlen = sizeof(int), 3151 .maxlen = sizeof(int),
@@ -3141,7 +3153,6 @@ static ctl_table ipv4_route_table[] = {
3141 .proc_handler = proc_dointvec, 3153 .proc_handler = proc_dointvec,
3142 }, 3154 },
3143 { 3155 {
3144 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE,
3145 .procname = "max_size", 3156 .procname = "max_size",
3146 .data = &ip_rt_max_size, 3157 .data = &ip_rt_max_size,
3147 .maxlen = sizeof(int), 3158 .maxlen = sizeof(int),
@@ -3151,43 +3162,34 @@ static ctl_table ipv4_route_table[] = {
3151 { 3162 {
3152 /* Deprecated. Use gc_min_interval_ms */ 3163 /* Deprecated. Use gc_min_interval_ms */
3153 3164
3154 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL,
3155 .procname = "gc_min_interval", 3165 .procname = "gc_min_interval",
3156 .data = &ip_rt_gc_min_interval, 3166 .data = &ip_rt_gc_min_interval,
3157 .maxlen = sizeof(int), 3167 .maxlen = sizeof(int),
3158 .mode = 0644, 3168 .mode = 0644,
3159 .proc_handler = proc_dointvec_jiffies, 3169 .proc_handler = proc_dointvec_jiffies,
3160 .strategy = sysctl_jiffies,
3161 }, 3170 },
3162 { 3171 {
3163 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
3164 .procname = "gc_min_interval_ms", 3172 .procname = "gc_min_interval_ms",
3165 .data = &ip_rt_gc_min_interval, 3173 .data = &ip_rt_gc_min_interval,
3166 .maxlen = sizeof(int), 3174 .maxlen = sizeof(int),
3167 .mode = 0644, 3175 .mode = 0644,
3168 .proc_handler = proc_dointvec_ms_jiffies, 3176 .proc_handler = proc_dointvec_ms_jiffies,
3169 .strategy = sysctl_ms_jiffies,
3170 }, 3177 },
3171 { 3178 {
3172 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT,
3173 .procname = "gc_timeout", 3179 .procname = "gc_timeout",
3174 .data = &ip_rt_gc_timeout, 3180 .data = &ip_rt_gc_timeout,
3175 .maxlen = sizeof(int), 3181 .maxlen = sizeof(int),
3176 .mode = 0644, 3182 .mode = 0644,
3177 .proc_handler = proc_dointvec_jiffies, 3183 .proc_handler = proc_dointvec_jiffies,
3178 .strategy = sysctl_jiffies,
3179 }, 3184 },
3180 { 3185 {
3181 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL,
3182 .procname = "gc_interval", 3186 .procname = "gc_interval",
3183 .data = &ip_rt_gc_interval, 3187 .data = &ip_rt_gc_interval,
3184 .maxlen = sizeof(int), 3188 .maxlen = sizeof(int),
3185 .mode = 0644, 3189 .mode = 0644,
3186 .proc_handler = proc_dointvec_jiffies, 3190 .proc_handler = proc_dointvec_jiffies,
3187 .strategy = sysctl_jiffies,
3188 }, 3191 },
3189 { 3192 {
3190 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD,
3191 .procname = "redirect_load", 3193 .procname = "redirect_load",
3192 .data = &ip_rt_redirect_load, 3194 .data = &ip_rt_redirect_load,
3193 .maxlen = sizeof(int), 3195 .maxlen = sizeof(int),
@@ -3195,7 +3197,6 @@ static ctl_table ipv4_route_table[] = {
3195 .proc_handler = proc_dointvec, 3197 .proc_handler = proc_dointvec,
3196 }, 3198 },
3197 { 3199 {
3198 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER,
3199 .procname = "redirect_number", 3200 .procname = "redirect_number",
3200 .data = &ip_rt_redirect_number, 3201 .data = &ip_rt_redirect_number,
3201 .maxlen = sizeof(int), 3202 .maxlen = sizeof(int),
@@ -3203,7 +3204,6 @@ static ctl_table ipv4_route_table[] = {
3203 .proc_handler = proc_dointvec, 3204 .proc_handler = proc_dointvec,
3204 }, 3205 },
3205 { 3206 {
3206 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE,
3207 .procname = "redirect_silence", 3207 .procname = "redirect_silence",
3208 .data = &ip_rt_redirect_silence, 3208 .data = &ip_rt_redirect_silence,
3209 .maxlen = sizeof(int), 3209 .maxlen = sizeof(int),
@@ -3211,7 +3211,6 @@ static ctl_table ipv4_route_table[] = {
3211 .proc_handler = proc_dointvec, 3211 .proc_handler = proc_dointvec,
3212 }, 3212 },
3213 { 3213 {
3214 .ctl_name = NET_IPV4_ROUTE_ERROR_COST,
3215 .procname = "error_cost", 3214 .procname = "error_cost",
3216 .data = &ip_rt_error_cost, 3215 .data = &ip_rt_error_cost,
3217 .maxlen = sizeof(int), 3216 .maxlen = sizeof(int),
@@ -3219,7 +3218,6 @@ static ctl_table ipv4_route_table[] = {
3219 .proc_handler = proc_dointvec, 3218 .proc_handler = proc_dointvec,
3220 }, 3219 },
3221 { 3220 {
3222 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST,
3223 .procname = "error_burst", 3221 .procname = "error_burst",
3224 .data = &ip_rt_error_burst, 3222 .data = &ip_rt_error_burst,
3225 .maxlen = sizeof(int), 3223 .maxlen = sizeof(int),
@@ -3227,7 +3225,6 @@ static ctl_table ipv4_route_table[] = {
3227 .proc_handler = proc_dointvec, 3225 .proc_handler = proc_dointvec,
3228 }, 3226 },
3229 { 3227 {
3230 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY,
3231 .procname = "gc_elasticity", 3228 .procname = "gc_elasticity",
3232 .data = &ip_rt_gc_elasticity, 3229 .data = &ip_rt_gc_elasticity,
3233 .maxlen = sizeof(int), 3230 .maxlen = sizeof(int),
@@ -3235,16 +3232,13 @@ static ctl_table ipv4_route_table[] = {
3235 .proc_handler = proc_dointvec, 3232 .proc_handler = proc_dointvec,
3236 }, 3233 },
3237 { 3234 {
3238 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES,
3239 .procname = "mtu_expires", 3235 .procname = "mtu_expires",
3240 .data = &ip_rt_mtu_expires, 3236 .data = &ip_rt_mtu_expires,
3241 .maxlen = sizeof(int), 3237 .maxlen = sizeof(int),
3242 .mode = 0644, 3238 .mode = 0644,
3243 .proc_handler = proc_dointvec_jiffies, 3239 .proc_handler = proc_dointvec_jiffies,
3244 .strategy = sysctl_jiffies,
3245 }, 3240 },
3246 { 3241 {
3247 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU,
3248 .procname = "min_pmtu", 3242 .procname = "min_pmtu",
3249 .data = &ip_rt_min_pmtu, 3243 .data = &ip_rt_min_pmtu,
3250 .maxlen = sizeof(int), 3244 .maxlen = sizeof(int),
@@ -3252,7 +3246,6 @@ static ctl_table ipv4_route_table[] = {
3252 .proc_handler = proc_dointvec, 3246 .proc_handler = proc_dointvec,
3253 }, 3247 },
3254 { 3248 {
3255 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS,
3256 .procname = "min_adv_mss", 3249 .procname = "min_adv_mss",
3257 .data = &ip_rt_min_advmss, 3250 .data = &ip_rt_min_advmss,
3258 .maxlen = sizeof(int), 3251 .maxlen = sizeof(int),
@@ -3260,50 +3253,46 @@ static ctl_table ipv4_route_table[] = {
3260 .proc_handler = proc_dointvec, 3253 .proc_handler = proc_dointvec,
3261 }, 3254 },
3262 { 3255 {
3263 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL,
3264 .procname = "secret_interval", 3256 .procname = "secret_interval",
3265 .data = &ip_rt_secret_interval, 3257 .data = &ip_rt_secret_interval,
3266 .maxlen = sizeof(int), 3258 .maxlen = sizeof(int),
3267 .mode = 0644, 3259 .mode = 0644,
3268 .proc_handler = ipv4_sysctl_rt_secret_interval, 3260 .proc_handler = ipv4_sysctl_rt_secret_interval,
3269 .strategy = ipv4_sysctl_rt_secret_interval_strategy,
3270 }, 3261 },
3271 { .ctl_name = 0 } 3262 { }
3272}; 3263};
3273 3264
3274static struct ctl_table empty[1]; 3265static struct ctl_table empty[1];
3275 3266
3276static struct ctl_table ipv4_skeleton[] = 3267static struct ctl_table ipv4_skeleton[] =
3277{ 3268{
3278 { .procname = "route", .ctl_name = NET_IPV4_ROUTE, 3269 { .procname = "route",
3279 .mode = 0555, .child = ipv4_route_table}, 3270 .mode = 0555, .child = ipv4_route_table},
3280 { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, 3271 { .procname = "neigh",
3281 .mode = 0555, .child = empty}, 3272 .mode = 0555, .child = empty},
3282 { } 3273 { }
3283}; 3274};
3284 3275
3285static __net_initdata struct ctl_path ipv4_path[] = { 3276static __net_initdata struct ctl_path ipv4_path[] = {
3286 { .procname = "net", .ctl_name = CTL_NET, }, 3277 { .procname = "net", },
3287 { .procname = "ipv4", .ctl_name = NET_IPV4, }, 3278 { .procname = "ipv4", },
3288 { }, 3279 { },
3289}; 3280};
3290 3281
3291static struct ctl_table ipv4_route_flush_table[] = { 3282static struct ctl_table ipv4_route_flush_table[] = {
3292 { 3283 {
3293 .ctl_name = NET_IPV4_ROUTE_FLUSH,
3294 .procname = "flush", 3284 .procname = "flush",
3295 .maxlen = sizeof(int), 3285 .maxlen = sizeof(int),
3296 .mode = 0200, 3286 .mode = 0200,
3297 .proc_handler = ipv4_sysctl_rtcache_flush, 3287 .proc_handler = ipv4_sysctl_rtcache_flush,
3298 .strategy = ipv4_sysctl_rtcache_flush_strategy,
3299 }, 3288 },
3300 { .ctl_name = 0 }, 3289 { },
3301}; 3290};
3302 3291
3303static __net_initdata struct ctl_path ipv4_route_path[] = { 3292static __net_initdata struct ctl_path ipv4_route_path[] = {
3304 { .procname = "net", .ctl_name = CTL_NET, }, 3293 { .procname = "net", },
3305 { .procname = "ipv4", .ctl_name = NET_IPV4, }, 3294 { .procname = "ipv4", },
3306 { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, 3295 { .procname = "route", },
3307 { }, 3296 { },
3308}; 3297};
3309 3298
@@ -3312,7 +3301,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
3312 struct ctl_table *tbl; 3301 struct ctl_table *tbl;
3313 3302
3314 tbl = ipv4_route_flush_table; 3303 tbl = ipv4_route_flush_table;
3315 if (net != &init_net) { 3304 if (!net_eq(net, &init_net)) {
3316 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); 3305 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
3317 if (tbl == NULL) 3306 if (tbl == NULL)
3318 goto err_dup; 3307 goto err_dup;
@@ -3380,7 +3369,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = {
3380 3369
3381 3370
3382#ifdef CONFIG_NET_CLS_ROUTE 3371#ifdef CONFIG_NET_CLS_ROUTE
3383struct ip_rt_acct *ip_rt_acct __read_mostly; 3372struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3384#endif /* CONFIG_NET_CLS_ROUTE */ 3373#endif /* CONFIG_NET_CLS_ROUTE */
3385 3374
3386static __initdata unsigned long rhash_entries; 3375static __initdata unsigned long rhash_entries;