aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c278
1 files changed, 155 insertions, 123 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b5e8e1d94be..780e9484c825 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -118,21 +118,19 @@
118#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
119 119
120static int ip_rt_max_size; 120static int ip_rt_max_size;
121static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval = 60 * HZ; 122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval = HZ / 2; 123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number = 9; 124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load = HZ / 50; 125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); 126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost = HZ; 127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst = 5 * HZ; 128static int ip_rt_error_burst __read_mostly = 5 * HZ;
129static int ip_rt_gc_elasticity = 8; 129static int ip_rt_gc_elasticity __read_mostly = 8;
130static int ip_rt_mtu_expires = 10 * 60 * HZ; 130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu = 512 + 20 + 20; 131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss = 256; 132static int ip_rt_min_advmss __read_mostly = 256;
133static int ip_rt_secret_interval = 10 * 60 * HZ; 133static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134
135#define RTprint(a...) printk(KERN_DEBUG a)
136 134
137static void rt_worker_func(struct work_struct *work); 135static void rt_worker_func(struct work_struct *work);
138static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -252,40 +250,41 @@ static inline void rt_hash_lock_init(void)
252} 250}
253#endif 251#endif
254 252
255static struct rt_hash_bucket *rt_hash_table; 253static struct rt_hash_bucket *rt_hash_table __read_mostly;
256static unsigned rt_hash_mask; 254static unsigned rt_hash_mask __read_mostly;
257static unsigned int rt_hash_log; 255static unsigned int rt_hash_log __read_mostly;
258static atomic_t rt_genid; 256static atomic_t rt_genid __read_mostly;
259 257
260static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
261#define RT_CACHE_STAT_INC(field) \ 259#define RT_CACHE_STAT_INC(field) \
262 (__raw_get_cpu_var(rt_cache_stat).field++) 260 (__raw_get_cpu_var(rt_cache_stat).field++)
263 261
264static unsigned int rt_hash_code(u32 daddr, u32 saddr) 262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
265{ 263{
266 return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) 264 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid))
267 & rt_hash_mask; 267 & rt_hash_mask;
268} 268}
269 269
270#define rt_hash(daddr, saddr, idx) \
271 rt_hash_code((__force u32)(__be32)(daddr),\
272 (__force u32)(__be32)(saddr) ^ ((idx) << 5))
273
274#ifdef CONFIG_PROC_FS 270#ifdef CONFIG_PROC_FS
275struct rt_cache_iter_state { 271struct rt_cache_iter_state {
272 struct seq_net_private p;
276 int bucket; 273 int bucket;
277 int genid; 274 int genid;
278}; 275};
279 276
280static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) 277static struct rtable *rt_cache_get_first(struct seq_file *seq)
281{ 278{
279 struct rt_cache_iter_state *st = seq->private;
282 struct rtable *r = NULL; 280 struct rtable *r = NULL;
283 281
284 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
285 rcu_read_lock_bh(); 283 rcu_read_lock_bh();
286 r = rcu_dereference(rt_hash_table[st->bucket].chain); 284 r = rcu_dereference(rt_hash_table[st->bucket].chain);
287 while (r) { 285 while (r) {
288 if (r->rt_genid == st->genid) 286 if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
287 r->rt_genid == st->genid)
289 return r; 288 return r;
290 r = rcu_dereference(r->u.dst.rt_next); 289 r = rcu_dereference(r->u.dst.rt_next);
291 } 290 }
@@ -294,8 +293,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
294 return r; 293 return r;
295} 294}
296 295
297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) 296static struct rtable *__rt_cache_get_next(struct seq_file *seq,
297 struct rtable *r)
298{ 298{
299 struct rt_cache_iter_state *st = seq->private;
299 r = r->u.dst.rt_next; 300 r = r->u.dst.rt_next;
300 while (!r) { 301 while (!r) {
301 rcu_read_unlock_bh(); 302 rcu_read_unlock_bh();
@@ -307,25 +308,34 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r
307 return rcu_dereference(r); 308 return rcu_dereference(r);
308} 309}
309 310
310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) 311static struct rtable *rt_cache_get_next(struct seq_file *seq,
312 struct rtable *r)
313{
314 struct rt_cache_iter_state *st = seq->private;
315 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
316 if (dev_net(r->u.dst.dev) != seq_file_net(seq))
317 continue;
318 if (r->rt_genid == st->genid)
319 break;
320 }
321 return r;
322}
323
324static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
311{ 325{
312 struct rtable *r = rt_cache_get_first(st); 326 struct rtable *r = rt_cache_get_first(seq);
313 327
314 if (r) 328 if (r)
315 while (pos && (r = rt_cache_get_next(st, r))) { 329 while (pos && (r = rt_cache_get_next(seq, r)))
316 if (r->rt_genid != st->genid)
317 continue;
318 --pos; 330 --pos;
319 }
320 return pos ? NULL : r; 331 return pos ? NULL : r;
321} 332}
322 333
323static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 334static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
324{ 335{
325 struct rt_cache_iter_state *st = seq->private; 336 struct rt_cache_iter_state *st = seq->private;
326
327 if (*pos) 337 if (*pos)
328 return rt_cache_get_idx(st, *pos - 1); 338 return rt_cache_get_idx(seq, *pos - 1);
329 st->genid = atomic_read(&rt_genid); 339 st->genid = atomic_read(&rt_genid);
330 return SEQ_START_TOKEN; 340 return SEQ_START_TOKEN;
331} 341}
@@ -333,12 +343,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
333static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 343static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
334{ 344{
335 struct rtable *r; 345 struct rtable *r;
336 struct rt_cache_iter_state *st = seq->private;
337 346
338 if (v == SEQ_START_TOKEN) 347 if (v == SEQ_START_TOKEN)
339 r = rt_cache_get_first(st); 348 r = rt_cache_get_first(seq);
340 else 349 else
341 r = rt_cache_get_next(st, v); 350 r = rt_cache_get_next(seq, v);
342 ++*pos; 351 ++*pos;
343 return r; 352 return r;
344} 353}
@@ -390,7 +399,7 @@ static const struct seq_operations rt_cache_seq_ops = {
390 399
391static int rt_cache_seq_open(struct inode *inode, struct file *file) 400static int rt_cache_seq_open(struct inode *inode, struct file *file)
392{ 401{
393 return seq_open_private(file, &rt_cache_seq_ops, 402 return seq_open_net(inode, file, &rt_cache_seq_ops,
394 sizeof(struct rt_cache_iter_state)); 403 sizeof(struct rt_cache_iter_state));
395} 404}
396 405
@@ -399,7 +408,7 @@ static const struct file_operations rt_cache_seq_fops = {
399 .open = rt_cache_seq_open, 408 .open = rt_cache_seq_open,
400 .read = seq_read, 409 .read = seq_read,
401 .llseek = seq_lseek, 410 .llseek = seq_lseek,
402 .release = seq_release_private, 411 .release = seq_release_net,
403}; 412};
404 413
405 414
@@ -533,7 +542,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
533} 542}
534#endif 543#endif
535 544
536static __init int ip_rt_proc_init(struct net *net) 545static int __net_init ip_rt_do_proc_init(struct net *net)
537{ 546{
538 struct proc_dir_entry *pde; 547 struct proc_dir_entry *pde;
539 548
@@ -564,25 +573,43 @@ err2:
564err1: 573err1:
565 return -ENOMEM; 574 return -ENOMEM;
566} 575}
576
577static void __net_exit ip_rt_do_proc_exit(struct net *net)
578{
579 remove_proc_entry("rt_cache", net->proc_net_stat);
580 remove_proc_entry("rt_cache", net->proc_net);
581 remove_proc_entry("rt_acct", net->proc_net);
582}
583
584static struct pernet_operations ip_rt_proc_ops __net_initdata = {
585 .init = ip_rt_do_proc_init,
586 .exit = ip_rt_do_proc_exit,
587};
588
589static int __init ip_rt_proc_init(void)
590{
591 return register_pernet_subsys(&ip_rt_proc_ops);
592}
593
567#else 594#else
568static inline int ip_rt_proc_init(struct net *net) 595static inline int ip_rt_proc_init(void)
569{ 596{
570 return 0; 597 return 0;
571} 598}
572#endif /* CONFIG_PROC_FS */ 599#endif /* CONFIG_PROC_FS */
573 600
574static __inline__ void rt_free(struct rtable *rt) 601static inline void rt_free(struct rtable *rt)
575{ 602{
576 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 603 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
577} 604}
578 605
579static __inline__ void rt_drop(struct rtable *rt) 606static inline void rt_drop(struct rtable *rt)
580{ 607{
581 ip_rt_put(rt); 608 ip_rt_put(rt);
582 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 609 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
583} 610}
584 611
585static __inline__ int rt_fast_clean(struct rtable *rth) 612static inline int rt_fast_clean(struct rtable *rth)
586{ 613{
587 /* Kill broadcast/multicast entries very aggresively, if they 614 /* Kill broadcast/multicast entries very aggresively, if they
588 collide in hash table with more useful entries */ 615 collide in hash table with more useful entries */
@@ -590,7 +617,7 @@ static __inline__ int rt_fast_clean(struct rtable *rth)
590 rth->fl.iif && rth->u.dst.rt_next; 617 rth->fl.iif && rth->u.dst.rt_next;
591} 618}
592 619
593static __inline__ int rt_valuable(struct rtable *rth) 620static inline int rt_valuable(struct rtable *rth)
594{ 621{
595 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 622 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
596 rth->u.dst.expires; 623 rth->u.dst.expires;
@@ -652,7 +679,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
652 679
653static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 680static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
654{ 681{
655 return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net; 682 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
656} 683}
657 684
658/* 685/*
@@ -1032,10 +1059,10 @@ restart:
1032#if RT_CACHE_DEBUG >= 2 1059#if RT_CACHE_DEBUG >= 2
1033 if (rt->u.dst.rt_next) { 1060 if (rt->u.dst.rt_next) {
1034 struct rtable *trt; 1061 struct rtable *trt;
1035 printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash, 1062 printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash,
1036 NIPQUAD(rt->rt_dst)); 1063 NIPQUAD(rt->rt_dst));
1037 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1064 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1038 printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); 1065 printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst));
1039 printk("\n"); 1066 printk("\n");
1040 } 1067 }
1041#endif 1068#endif
@@ -1131,10 +1158,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1131 __be32 skeys[2] = { saddr, 0 }; 1158 __be32 skeys[2] = { saddr, 0 };
1132 int ikeys[2] = { dev->ifindex, 0 }; 1159 int ikeys[2] = { dev->ifindex, 0 };
1133 struct netevent_redirect netevent; 1160 struct netevent_redirect netevent;
1161 struct net *net;
1134 1162
1135 if (!in_dev) 1163 if (!in_dev)
1136 return; 1164 return;
1137 1165
1166 net = dev_net(dev);
1138 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1167 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1139 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) 1168 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1140 || ipv4_is_zeronet(new_gw)) 1169 || ipv4_is_zeronet(new_gw))
@@ -1146,7 +1175,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1146 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1175 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1147 goto reject_redirect; 1176 goto reject_redirect;
1148 } else { 1177 } else {
1149 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) 1178 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1150 goto reject_redirect; 1179 goto reject_redirect;
1151 } 1180 }
1152 1181
@@ -1164,7 +1193,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1164 rth->fl.fl4_src != skeys[i] || 1193 rth->fl.fl4_src != skeys[i] ||
1165 rth->fl.oif != ikeys[k] || 1194 rth->fl.oif != ikeys[k] ||
1166 rth->fl.iif != 0 || 1195 rth->fl.iif != 0 ||
1167 rth->rt_genid != atomic_read(&rt_genid)) { 1196 rth->rt_genid != atomic_read(&rt_genid) ||
1197 !net_eq(dev_net(rth->u.dst.dev), net)) {
1168 rthp = &rth->u.dst.rt_next; 1198 rthp = &rth->u.dst.rt_next;
1169 continue; 1199 continue;
1170 } 1200 }
@@ -1245,9 +1275,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1245reject_redirect: 1275reject_redirect:
1246#ifdef CONFIG_IP_ROUTE_VERBOSE 1276#ifdef CONFIG_IP_ROUTE_VERBOSE
1247 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1277 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1248 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " 1278 printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about "
1249 "%u.%u.%u.%u ignored.\n" 1279 NIPQUAD_FMT " ignored.\n"
1250 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", 1280 " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
1251 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1281 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1252 NIPQUAD(saddr), NIPQUAD(daddr)); 1282 NIPQUAD(saddr), NIPQUAD(daddr));
1253#endif 1283#endif
@@ -1256,7 +1286,7 @@ reject_redirect:
1256 1286
1257static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1287static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1258{ 1288{
1259 struct rtable *rt = (struct rtable*)dst; 1289 struct rtable *rt = (struct rtable *)dst;
1260 struct dst_entry *ret = dst; 1290 struct dst_entry *ret = dst;
1261 1291
1262 if (rt) { 1292 if (rt) {
@@ -1269,7 +1299,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1269 rt->fl.oif); 1299 rt->fl.oif);
1270#if RT_CACHE_DEBUG >= 1 1300#if RT_CACHE_DEBUG >= 1
1271 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " 1301 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1272 "%u.%u.%u.%u/%02x dropped\n", 1302 NIPQUAD_FMT "/%02x dropped\n",
1273 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); 1303 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1274#endif 1304#endif
1275 rt_del(hash, rt); 1305 rt_del(hash, rt);
@@ -1297,7 +1327,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1297 1327
1298void ip_rt_send_redirect(struct sk_buff *skb) 1328void ip_rt_send_redirect(struct sk_buff *skb)
1299{ 1329{
1300 struct rtable *rt = (struct rtable*)skb->dst; 1330 struct rtable *rt = skb->rtable;
1301 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1331 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1302 1332
1303 if (!in_dev) 1333 if (!in_dev)
@@ -1334,8 +1364,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1334 if (IN_DEV_LOG_MARTIANS(in_dev) && 1364 if (IN_DEV_LOG_MARTIANS(in_dev) &&
1335 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1365 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1336 net_ratelimit()) 1366 net_ratelimit())
1337 printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " 1367 printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores "
1338 "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", 1368 "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n",
1339 NIPQUAD(rt->rt_src), rt->rt_iif, 1369 NIPQUAD(rt->rt_src), rt->rt_iif,
1340 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); 1370 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1341#endif 1371#endif
@@ -1346,7 +1376,7 @@ out:
1346 1376
1347static int ip_error(struct sk_buff *skb) 1377static int ip_error(struct sk_buff *skb)
1348{ 1378{
1349 struct rtable *rt = (struct rtable*)skb->dst; 1379 struct rtable *rt = skb->rtable;
1350 unsigned long now; 1380 unsigned long now;
1351 int code; 1381 int code;
1352 1382
@@ -1388,7 +1418,7 @@ out: kfree_skb(skb);
1388static const unsigned short mtu_plateau[] = 1418static const unsigned short mtu_plateau[] =
1389{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; 1419{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1390 1420
1391static __inline__ unsigned short guess_mtu(unsigned short old_mtu) 1421static inline unsigned short guess_mtu(unsigned short old_mtu)
1392{ 1422{
1393 int i; 1423 int i;
1394 1424
@@ -1423,7 +1453,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1423 rth->rt_src == iph->saddr && 1453 rth->rt_src == iph->saddr &&
1424 rth->fl.iif == 0 && 1454 rth->fl.iif == 0 &&
1425 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && 1455 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1426 rth->u.dst.dev->nd_net == net && 1456 net_eq(dev_net(rth->u.dst.dev), net) &&
1427 rth->rt_genid == atomic_read(&rt_genid)) { 1457 rth->rt_genid == atomic_read(&rt_genid)) {
1428 unsigned short mtu = new_mtu; 1458 unsigned short mtu = new_mtu;
1429 1459
@@ -1499,9 +1529,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1499{ 1529{
1500 struct rtable *rt = (struct rtable *) dst; 1530 struct rtable *rt = (struct rtable *) dst;
1501 struct in_device *idev = rt->idev; 1531 struct in_device *idev = rt->idev;
1502 if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) { 1532 if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
1503 struct in_device *loopback_idev = 1533 struct in_device *loopback_idev =
1504 in_dev_get(dev->nd_net->loopback_dev); 1534 in_dev_get(dev_net(dev)->loopback_dev);
1505 if (loopback_idev) { 1535 if (loopback_idev) {
1506 rt->idev = loopback_idev; 1536 rt->idev = loopback_idev;
1507 in_dev_put(idev); 1537 in_dev_put(idev);
@@ -1515,14 +1545,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1515 1545
1516 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1546 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1517 1547
1518 rt = (struct rtable *) skb->dst; 1548 rt = skb->rtable;
1519 if (rt) 1549 if (rt)
1520 dst_set_expires(&rt->u.dst, 0); 1550 dst_set_expires(&rt->u.dst, 0);
1521} 1551}
1522 1552
1523static int ip_rt_bug(struct sk_buff *skb) 1553static int ip_rt_bug(struct sk_buff *skb)
1524{ 1554{
1525 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", 1555 printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n",
1526 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), 1556 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
1527 skb->dev ? skb->dev->name : "?"); 1557 skb->dev ? skb->dev->name : "?");
1528 kfree_skb(skb); 1558 kfree_skb(skb);
@@ -1545,7 +1575,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1545 1575
1546 if (rt->fl.iif == 0) 1576 if (rt->fl.iif == 0)
1547 src = rt->rt_src; 1577 src = rt->rt_src;
1548 else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) { 1578 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
1549 src = FIB_RES_PREFSRC(res); 1579 src = FIB_RES_PREFSRC(res);
1550 fib_res_put(&res); 1580 fib_res_put(&res);
1551 } else 1581 } else
@@ -1675,7 +1705,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1675 1705
1676 in_dev_put(in_dev); 1706 in_dev_put(in_dev);
1677 hash = rt_hash(daddr, saddr, dev->ifindex); 1707 hash = rt_hash(daddr, saddr, dev->ifindex);
1678 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1708 return rt_intern_hash(hash, rth, &skb->rtable);
1679 1709
1680e_nobufs: 1710e_nobufs:
1681 in_dev_put(in_dev); 1711 in_dev_put(in_dev);
@@ -1700,8 +1730,8 @@ static void ip_handle_martian_source(struct net_device *dev,
1700 * RFC1812 recommendation, if source is martian, 1730 * RFC1812 recommendation, if source is martian,
1701 * the only hint is MAC header. 1731 * the only hint is MAC header.
1702 */ 1732 */
1703 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1733 printk(KERN_WARNING "martian source " NIPQUAD_FMT " from "
1704 "%u.%u.%u.%u, on dev %s\n", 1734 NIPQUAD_FMT", on dev %s\n",
1705 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1735 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1706 if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 1736 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1707 int i; 1737 int i;
@@ -1718,11 +1748,11 @@ static void ip_handle_martian_source(struct net_device *dev,
1718#endif 1748#endif
1719} 1749}
1720 1750
1721static inline int __mkroute_input(struct sk_buff *skb, 1751static int __mkroute_input(struct sk_buff *skb,
1722 struct fib_result* res, 1752 struct fib_result *res,
1723 struct in_device *in_dev, 1753 struct in_device *in_dev,
1724 __be32 daddr, __be32 saddr, u32 tos, 1754 __be32 daddr, __be32 saddr, u32 tos,
1725 struct rtable **result) 1755 struct rtable **result)
1726{ 1756{
1727 1757
1728 struct rtable *rth; 1758 struct rtable *rth;
@@ -1814,11 +1844,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
1814 return err; 1844 return err;
1815} 1845}
1816 1846
1817static inline int ip_mkroute_input(struct sk_buff *skb, 1847static int ip_mkroute_input(struct sk_buff *skb,
1818 struct fib_result* res, 1848 struct fib_result *res,
1819 const struct flowi *fl, 1849 const struct flowi *fl,
1820 struct in_device *in_dev, 1850 struct in_device *in_dev,
1821 __be32 daddr, __be32 saddr, u32 tos) 1851 __be32 daddr, __be32 saddr, u32 tos)
1822{ 1852{
1823 struct rtable* rth = NULL; 1853 struct rtable* rth = NULL;
1824 int err; 1854 int err;
@@ -1836,7 +1866,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1836 1866
1837 /* put it into the cache */ 1867 /* put it into the cache */
1838 hash = rt_hash(daddr, saddr, fl->iif); 1868 hash = rt_hash(daddr, saddr, fl->iif);
1839 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1869 return rt_intern_hash(hash, rth, &skb->rtable);
1840} 1870}
1841 1871
1842/* 1872/*
@@ -1869,7 +1899,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1869 __be32 spec_dst; 1899 __be32 spec_dst;
1870 int err = -EINVAL; 1900 int err = -EINVAL;
1871 int free_res = 0; 1901 int free_res = 0;
1872 struct net * net = dev->nd_net; 1902 struct net * net = dev_net(dev);
1873 1903
1874 /* IP on this device is disabled. */ 1904 /* IP on this device is disabled. */
1875 1905
@@ -1992,7 +2022,7 @@ local_input:
1992 } 2022 }
1993 rth->rt_type = res.type; 2023 rth->rt_type = res.type;
1994 hash = rt_hash(daddr, saddr, fl.iif); 2024 hash = rt_hash(daddr, saddr, fl.iif);
1995 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2025 err = rt_intern_hash(hash, rth, &skb->rtable);
1996 goto done; 2026 goto done;
1997 2027
1998no_route: 2028no_route:
@@ -2010,8 +2040,8 @@ martian_destination:
2010 RT_CACHE_STAT_INC(in_martian_dst); 2040 RT_CACHE_STAT_INC(in_martian_dst);
2011#ifdef CONFIG_IP_ROUTE_VERBOSE 2041#ifdef CONFIG_IP_ROUTE_VERBOSE
2012 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2042 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
2013 printk(KERN_WARNING "martian destination %u.%u.%u.%u from " 2043 printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from "
2014 "%u.%u.%u.%u, dev %s\n", 2044 NIPQUAD_FMT ", dev %s\n",
2015 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 2045 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
2016#endif 2046#endif
2017 2047
@@ -2040,25 +2070,25 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2040 int iif = dev->ifindex; 2070 int iif = dev->ifindex;
2041 struct net *net; 2071 struct net *net;
2042 2072
2043 net = dev->nd_net; 2073 net = dev_net(dev);
2044 tos &= IPTOS_RT_MASK; 2074 tos &= IPTOS_RT_MASK;
2045 hash = rt_hash(daddr, saddr, iif); 2075 hash = rt_hash(daddr, saddr, iif);
2046 2076
2047 rcu_read_lock(); 2077 rcu_read_lock();
2048 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2078 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2049 rth = rcu_dereference(rth->u.dst.rt_next)) { 2079 rth = rcu_dereference(rth->u.dst.rt_next)) {
2050 if (rth->fl.fl4_dst == daddr && 2080 if (((rth->fl.fl4_dst ^ daddr) |
2051 rth->fl.fl4_src == saddr && 2081 (rth->fl.fl4_src ^ saddr) |
2052 rth->fl.iif == iif && 2082 (rth->fl.iif ^ iif) |
2053 rth->fl.oif == 0 && 2083 rth->fl.oif |
2084 (rth->fl.fl4_tos ^ tos)) == 0 &&
2054 rth->fl.mark == skb->mark && 2085 rth->fl.mark == skb->mark &&
2055 rth->fl.fl4_tos == tos && 2086 net_eq(dev_net(rth->u.dst.dev), net) &&
2056 rth->u.dst.dev->nd_net == net &&
2057 rth->rt_genid == atomic_read(&rt_genid)) { 2087 rth->rt_genid == atomic_read(&rt_genid)) {
2058 dst_use(&rth->u.dst, jiffies); 2088 dst_use(&rth->u.dst, jiffies);
2059 RT_CACHE_STAT_INC(in_hit); 2089 RT_CACHE_STAT_INC(in_hit);
2060 rcu_read_unlock(); 2090 rcu_read_unlock();
2061 skb->dst = (struct dst_entry*)rth; 2091 skb->rtable = rth;
2062 return 0; 2092 return 0;
2063 } 2093 }
2064 RT_CACHE_STAT_INC(in_hlist_search); 2094 RT_CACHE_STAT_INC(in_hlist_search);
@@ -2100,12 +2130,12 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2100 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2130 return ip_route_input_slow(skb, daddr, saddr, tos, dev);
2101} 2131}
2102 2132
2103static inline int __mkroute_output(struct rtable **result, 2133static int __mkroute_output(struct rtable **result,
2104 struct fib_result* res, 2134 struct fib_result *res,
2105 const struct flowi *fl, 2135 const struct flowi *fl,
2106 const struct flowi *oldflp, 2136 const struct flowi *oldflp,
2107 struct net_device *dev_out, 2137 struct net_device *dev_out,
2108 unsigned flags) 2138 unsigned flags)
2109{ 2139{
2110 struct rtable *rth; 2140 struct rtable *rth;
2111 struct in_device *in_dev; 2141 struct in_device *in_dev;
@@ -2220,12 +2250,12 @@ static inline int __mkroute_output(struct rtable **result,
2220 return err; 2250 return err;
2221} 2251}
2222 2252
2223static inline int ip_mkroute_output(struct rtable **rp, 2253static int ip_mkroute_output(struct rtable **rp,
2224 struct fib_result* res, 2254 struct fib_result *res,
2225 const struct flowi *fl, 2255 const struct flowi *fl,
2226 const struct flowi *oldflp, 2256 const struct flowi *oldflp,
2227 struct net_device *dev_out, 2257 struct net_device *dev_out,
2228 unsigned flags) 2258 unsigned flags)
2229{ 2259{
2230 struct rtable *rth = NULL; 2260 struct rtable *rth = NULL;
2231 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2261 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2455,7 +2485,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2455 rth->fl.mark == flp->mark && 2485 rth->fl.mark == flp->mark &&
2456 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2486 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2457 (IPTOS_RT_MASK | RTO_ONLINK)) && 2487 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2458 rth->u.dst.dev->nd_net == net && 2488 net_eq(dev_net(rth->u.dst.dev), net) &&
2459 rth->rt_genid == atomic_read(&rt_genid)) { 2489 rth->rt_genid == atomic_read(&rt_genid)) {
2460 dst_use(&rth->u.dst, jiffies); 2490 dst_use(&rth->u.dst, jiffies);
2461 RT_CACHE_STAT_INC(out_hit); 2491 RT_CACHE_STAT_INC(out_hit);
@@ -2487,7 +2517,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2487}; 2517};
2488 2518
2489 2519
2490static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) 2520static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2491{ 2521{
2492 struct rtable *ort = *rp; 2522 struct rtable *ort = *rp;
2493 struct rtable *rt = (struct rtable *) 2523 struct rtable *rt = (struct rtable *)
@@ -2547,7 +2577,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2547 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2577 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2548 flags ? XFRM_LOOKUP_WAIT : 0); 2578 flags ? XFRM_LOOKUP_WAIT : 0);
2549 if (err == -EREMOTE) 2579 if (err == -EREMOTE)
2550 err = ipv4_dst_blackhole(rp, flp, sk); 2580 err = ipv4_dst_blackhole(rp, flp);
2551 2581
2552 return err; 2582 return err;
2553 } 2583 }
@@ -2565,7 +2595,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2565static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2595static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2566 int nowait, unsigned int flags) 2596 int nowait, unsigned int flags)
2567{ 2597{
2568 struct rtable *rt = (struct rtable*)skb->dst; 2598 struct rtable *rt = skb->rtable;
2569 struct rtmsg *r; 2599 struct rtmsg *r;
2570 struct nlmsghdr *nlh; 2600 struct nlmsghdr *nlh;
2571 long expires; 2601 long expires;
@@ -2658,7 +2688,7 @@ nla_put_failure:
2658 2688
2659static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2689static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660{ 2690{
2661 struct net *net = in_skb->sk->sk_net; 2691 struct net *net = sock_net(in_skb->sk);
2662 struct rtmsg *rtm; 2692 struct rtmsg *rtm;
2663 struct nlattr *tb[RTA_MAX+1]; 2693 struct nlattr *tb[RTA_MAX+1];
2664 struct rtable *rt = NULL; 2694 struct rtable *rt = NULL;
@@ -2668,9 +2698,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2668 int err; 2698 int err;
2669 struct sk_buff *skb; 2699 struct sk_buff *skb;
2670 2700
2671 if (net != &init_net)
2672 return -EINVAL;
2673
2674 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2701 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2675 if (err < 0) 2702 if (err < 0)
2676 goto errout; 2703 goto errout;
@@ -2700,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2700 if (iif) { 2727 if (iif) {
2701 struct net_device *dev; 2728 struct net_device *dev;
2702 2729
2703 dev = __dev_get_by_index(&init_net, iif); 2730 dev = __dev_get_by_index(net, iif);
2704 if (dev == NULL) { 2731 if (dev == NULL) {
2705 err = -ENODEV; 2732 err = -ENODEV;
2706 goto errout_free; 2733 goto errout_free;
@@ -2712,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2712 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2739 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2713 local_bh_enable(); 2740 local_bh_enable();
2714 2741
2715 rt = (struct rtable*) skb->dst; 2742 rt = skb->rtable;
2716 if (err == 0 && rt->u.dst.error) 2743 if (err == 0 && rt->u.dst.error)
2717 err = -rt->u.dst.error; 2744 err = -rt->u.dst.error;
2718 } else { 2745 } else {
@@ -2726,22 +2753,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2726 }, 2753 },
2727 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2754 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2728 }; 2755 };
2729 err = ip_route_output_key(&init_net, &rt, &fl); 2756 err = ip_route_output_key(net, &rt, &fl);
2730 } 2757 }
2731 2758
2732 if (err) 2759 if (err)
2733 goto errout_free; 2760 goto errout_free;
2734 2761
2735 skb->dst = &rt->u.dst; 2762 skb->rtable = rt;
2736 if (rtm->rtm_flags & RTM_F_NOTIFY) 2763 if (rtm->rtm_flags & RTM_F_NOTIFY)
2737 rt->rt_flags |= RTCF_NOTIFY; 2764 rt->rt_flags |= RTCF_NOTIFY;
2738 2765
2739 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2766 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2740 RTM_NEWROUTE, 0, 0); 2767 RTM_NEWROUTE, 0, 0);
2741 if (err <= 0) 2768 if (err <= 0)
2742 goto errout_free; 2769 goto errout_free;
2743 2770
2744 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 2771 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2745errout: 2772errout:
2746 return err; 2773 return err;
2747 2774
@@ -2755,6 +2782,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2755 struct rtable *rt; 2782 struct rtable *rt;
2756 int h, s_h; 2783 int h, s_h;
2757 int idx, s_idx; 2784 int idx, s_idx;
2785 struct net *net;
2786
2787 net = sock_net(skb->sk);
2758 2788
2759 s_h = cb->args[0]; 2789 s_h = cb->args[0];
2760 if (s_h < 0) 2790 if (s_h < 0)
@@ -2764,7 +2794,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2764 rcu_read_lock_bh(); 2794 rcu_read_lock_bh();
2765 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2795 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2766 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2796 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2767 if (idx < s_idx) 2797 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2768 continue; 2798 continue;
2769 if (rt->rt_genid != atomic_read(&rt_genid)) 2799 if (rt->rt_genid != atomic_read(&rt_genid))
2770 continue; 2800 continue;
@@ -3028,7 +3058,9 @@ int __init ip_rt_init(void)
3028 devinet_init(); 3058 devinet_init();
3029 ip_fib_init(); 3059 ip_fib_init();
3030 3060
3031 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); 3061 rt_secret_timer.function = rt_secret_rebuild;
3062 rt_secret_timer.data = 0;
3063 init_timer_deferrable(&rt_secret_timer);
3032 3064
3033 /* All the timers, started at system startup tend 3065 /* All the timers, started at system startup tend
3034 to synchronize. Perturb it a bit. 3066 to synchronize. Perturb it a bit.
@@ -3040,7 +3072,7 @@ int __init ip_rt_init(void)
3040 ip_rt_secret_interval; 3072 ip_rt_secret_interval;
3041 add_timer(&rt_secret_timer); 3073 add_timer(&rt_secret_timer);
3042 3074
3043 if (ip_rt_proc_init(&init_net)) 3075 if (ip_rt_proc_init())
3044 printk(KERN_ERR "Unable to create route proc files\n"); 3076 printk(KERN_ERR "Unable to create route proc files\n");
3045#ifdef CONFIG_XFRM 3077#ifdef CONFIG_XFRM
3046 xfrm_init(); 3078 xfrm_init();