diff options
Diffstat (limited to 'net/ipv4/route.c')
| -rw-r--r-- | net/ipv4/route.c | 759 |
1 files changed, 359 insertions, 400 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d62b05d33384..ac6559cb54f9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -90,6 +90,7 @@ | |||
| 90 | #include <linux/jhash.h> | 90 | #include <linux/jhash.h> |
| 91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
| 92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
| 93 | #include <linux/slab.h> | ||
| 93 | #include <net/dst.h> | 94 | #include <net/dst.h> |
| 94 | #include <net/net_namespace.h> | 95 | #include <net/net_namespace.h> |
| 95 | #include <net/protocol.h> | 96 | #include <net/protocol.h> |
| @@ -128,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; | |||
| 128 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | 129 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; |
| 129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 130 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
| 130 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
| 131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | ||
| 132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
| 133 | 133 | ||
| 134 | static struct delayed_work expires_work; | 134 | static struct delayed_work expires_work; |
| @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | |||
| 146 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
| 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
| 148 | static int rt_garbage_collect(struct dst_ops *ops); | 148 | static int rt_garbage_collect(struct dst_ops *ops); |
| 149 | static void rt_emergency_hash_rebuild(struct net *net); | ||
| 150 | 149 | ||
| 151 | 150 | ||
| 152 | static struct dst_ops ipv4_dst_ops = { | 151 | static struct dst_ops ipv4_dst_ops = { |
| @@ -254,14 +253,12 @@ static unsigned rt_hash_mask __read_mostly; | |||
| 254 | static unsigned int rt_hash_log __read_mostly; | 253 | static unsigned int rt_hash_log __read_mostly; |
| 255 | 254 | ||
| 256 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 255 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
| 257 | #define RT_CACHE_STAT_INC(field) \ | 256 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) |
| 258 | (__raw_get_cpu_var(rt_cache_stat).field++) | ||
| 259 | 257 | ||
| 260 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, | 258 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, |
| 261 | int genid) | 259 | int genid) |
| 262 | { | 260 | { |
| 263 | return jhash_3words((__force u32)(__be32)(daddr), | 261 | return jhash_3words((__force u32)daddr, (__force u32)saddr, |
| 264 | (__force u32)(__be32)(saddr), | ||
| 265 | idx, genid) | 262 | idx, genid) |
| 266 | & rt_hash_mask; | 263 | & rt_hash_mask; |
| 267 | } | 264 | } |
| @@ -287,12 +284,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
| 287 | if (!rt_hash_table[st->bucket].chain) | 284 | if (!rt_hash_table[st->bucket].chain) |
| 288 | continue; | 285 | continue; |
| 289 | rcu_read_lock_bh(); | 286 | rcu_read_lock_bh(); |
| 290 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 287 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
| 291 | while (r) { | 288 | while (r) { |
| 292 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && | 289 | if (dev_net(r->dst.dev) == seq_file_net(seq) && |
| 293 | r->rt_genid == st->genid) | 290 | r->rt_genid == st->genid) |
| 294 | return r; | 291 | return r; |
| 295 | r = rcu_dereference(r->u.dst.rt_next); | 292 | r = rcu_dereference_bh(r->dst.rt_next); |
| 296 | } | 293 | } |
| 297 | rcu_read_unlock_bh(); | 294 | rcu_read_unlock_bh(); |
| 298 | } | 295 | } |
| @@ -304,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
| 304 | { | 301 | { |
| 305 | struct rt_cache_iter_state *st = seq->private; | 302 | struct rt_cache_iter_state *st = seq->private; |
| 306 | 303 | ||
| 307 | r = r->u.dst.rt_next; | 304 | r = r->dst.rt_next; |
| 308 | while (!r) { | 305 | while (!r) { |
| 309 | rcu_read_unlock_bh(); | 306 | rcu_read_unlock_bh(); |
| 310 | do { | 307 | do { |
| @@ -314,7 +311,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
| 314 | rcu_read_lock_bh(); | 311 | rcu_read_lock_bh(); |
| 315 | r = rt_hash_table[st->bucket].chain; | 312 | r = rt_hash_table[st->bucket].chain; |
| 316 | } | 313 | } |
| 317 | return rcu_dereference(r); | 314 | return rcu_dereference_bh(r); |
| 318 | } | 315 | } |
| 319 | 316 | ||
| 320 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 317 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
| @@ -322,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, | |||
| 322 | { | 319 | { |
| 323 | struct rt_cache_iter_state *st = seq->private; | 320 | struct rt_cache_iter_state *st = seq->private; |
| 324 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | 321 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { |
| 325 | if (dev_net(r->u.dst.dev) != seq_file_net(seq)) | 322 | if (dev_net(r->dst.dev) != seq_file_net(seq)) |
| 326 | continue; | 323 | continue; |
| 327 | if (r->rt_genid == st->genid) | 324 | if (r->rt_genid == st->genid) |
| 328 | break; | 325 | break; |
| @@ -378,20 +375,21 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
| 378 | struct rtable *r = v; | 375 | struct rtable *r = v; |
| 379 | int len; | 376 | int len; |
| 380 | 377 | ||
| 381 | seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" | 378 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
| 382 | "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 379 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
| 383 | r->u.dst.dev ? r->u.dst.dev->name : "*", | 380 | r->dst.dev ? r->dst.dev->name : "*", |
| 384 | (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, | 381 | (__force u32)r->rt_dst, |
| 385 | r->rt_flags, atomic_read(&r->u.dst.__refcnt), | 382 | (__force u32)r->rt_gateway, |
| 386 | r->u.dst.__use, 0, (unsigned long)r->rt_src, | 383 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
| 387 | (dst_metric(&r->u.dst, RTAX_ADVMSS) ? | 384 | r->dst.__use, 0, (__force u32)r->rt_src, |
| 388 | (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), | 385 | (dst_metric(&r->dst, RTAX_ADVMSS) ? |
| 389 | dst_metric(&r->u.dst, RTAX_WINDOW), | 386 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), |
| 390 | (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + | 387 | dst_metric(&r->dst, RTAX_WINDOW), |
| 391 | dst_metric(&r->u.dst, RTAX_RTTVAR)), | 388 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
| 389 | dst_metric(&r->dst, RTAX_RTTVAR)), | ||
| 392 | r->fl.fl4_tos, | 390 | r->fl.fl4_tos, |
| 393 | r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, | 391 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
| 394 | r->u.dst.hh ? (r->u.dst.hh->hh_output == | 392 | r->dst.hh ? (r->dst.hh->hh_output == |
| 395 | dev_queue_xmit) : 0, | 393 | dev_queue_xmit) : 0, |
| 396 | r->rt_spec_dst, &len); | 394 | r->rt_spec_dst, &len); |
| 397 | 395 | ||
| @@ -610,13 +608,13 @@ static inline int ip_rt_proc_init(void) | |||
| 610 | 608 | ||
| 611 | static inline void rt_free(struct rtable *rt) | 609 | static inline void rt_free(struct rtable *rt) |
| 612 | { | 610 | { |
| 613 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 611 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
| 614 | } | 612 | } |
| 615 | 613 | ||
| 616 | static inline void rt_drop(struct rtable *rt) | 614 | static inline void rt_drop(struct rtable *rt) |
| 617 | { | 615 | { |
| 618 | ip_rt_put(rt); | 616 | ip_rt_put(rt); |
| 619 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 617 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
| 620 | } | 618 | } |
| 621 | 619 | ||
| 622 | static inline int rt_fast_clean(struct rtable *rth) | 620 | static inline int rt_fast_clean(struct rtable *rth) |
| @@ -624,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
| 624 | /* Kill broadcast/multicast entries very aggresively, if they | 622 | /* Kill broadcast/multicast entries very aggresively, if they |
| 625 | collide in hash table with more useful entries */ | 623 | collide in hash table with more useful entries */ |
| 626 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 624 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
| 627 | rth->fl.iif && rth->u.dst.rt_next; | 625 | rth->fl.iif && rth->dst.rt_next; |
| 628 | } | 626 | } |
| 629 | 627 | ||
| 630 | static inline int rt_valuable(struct rtable *rth) | 628 | static inline int rt_valuable(struct rtable *rth) |
| 631 | { | 629 | { |
| 632 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 630 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
| 633 | rth->u.dst.expires; | 631 | rth->dst.expires; |
| 634 | } | 632 | } |
| 635 | 633 | ||
| 636 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 634 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
| @@ -638,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
| 638 | unsigned long age; | 636 | unsigned long age; |
| 639 | int ret = 0; | 637 | int ret = 0; |
| 640 | 638 | ||
| 641 | if (atomic_read(&rth->u.dst.__refcnt)) | 639 | if (atomic_read(&rth->dst.__refcnt)) |
| 642 | goto out; | 640 | goto out; |
| 643 | 641 | ||
| 644 | ret = 1; | 642 | ret = 1; |
| 645 | if (rth->u.dst.expires && | 643 | if (rth->dst.expires && |
| 646 | time_after_eq(jiffies, rth->u.dst.expires)) | 644 | time_after_eq(jiffies, rth->dst.expires)) |
| 647 | goto out; | 645 | goto out; |
| 648 | 646 | ||
| 649 | age = jiffies - rth->u.dst.lastuse; | 647 | age = jiffies - rth->dst.lastuse; |
| 650 | ret = 0; | 648 | ret = 0; |
| 651 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 649 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
| 652 | (age <= tmo2 && rt_valuable(rth))) | 650 | (age <= tmo2 && rt_valuable(rth))) |
| @@ -662,7 +660,7 @@ out: return ret; | |||
| 662 | */ | 660 | */ |
| 663 | static inline u32 rt_score(struct rtable *rt) | 661 | static inline u32 rt_score(struct rtable *rt) |
| 664 | { | 662 | { |
| 665 | u32 score = jiffies - rt->u.dst.lastuse; | 663 | u32 score = jiffies - rt->dst.lastuse; |
| 666 | 664 | ||
| 667 | score = ~score & ~(3<<30); | 665 | score = ~score & ~(3<<30); |
| 668 | 666 | ||
| @@ -685,30 +683,29 @@ static inline bool rt_caching(const struct net *net) | |||
| 685 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 683 | static inline bool compare_hash_inputs(const struct flowi *fl1, |
| 686 | const struct flowi *fl2) | 684 | const struct flowi *fl2) |
| 687 | { | 685 | { |
| 688 | return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 686 | return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | |
| 689 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | | 687 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | |
| 690 | (fl1->iif ^ fl2->iif)) == 0); | 688 | (fl1->iif ^ fl2->iif)) == 0); |
| 691 | } | 689 | } |
| 692 | 690 | ||
| 693 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 691 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
| 694 | { | 692 | { |
| 695 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 693 | return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | |
| 696 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | | 694 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | |
| 697 | (fl1->mark ^ fl2->mark) | | 695 | (fl1->mark ^ fl2->mark) | |
| 698 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ | 696 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | |
| 699 | *(u16 *)&fl2->nl_u.ip4_u.tos) | | ||
| 700 | (fl1->oif ^ fl2->oif) | | 697 | (fl1->oif ^ fl2->oif) | |
| 701 | (fl1->iif ^ fl2->iif)) == 0; | 698 | (fl1->iif ^ fl2->iif)) == 0; |
| 702 | } | 699 | } |
| 703 | 700 | ||
| 704 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 701 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
| 705 | { | 702 | { |
| 706 | return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); | 703 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); |
| 707 | } | 704 | } |
| 708 | 705 | ||
| 709 | static inline int rt_is_expired(struct rtable *rth) | 706 | static inline int rt_is_expired(struct rtable *rth) |
| 710 | { | 707 | { |
| 711 | return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); | 708 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
| 712 | } | 709 | } |
| 713 | 710 | ||
| 714 | /* | 711 | /* |
| @@ -737,7 +734,7 @@ static void rt_do_flush(int process_context) | |||
| 737 | rth = rt_hash_table[i].chain; | 734 | rth = rt_hash_table[i].chain; |
| 738 | 735 | ||
| 739 | /* defer releasing the head of the list after spin_unlock */ | 736 | /* defer releasing the head of the list after spin_unlock */ |
| 740 | for (tail = rth; tail; tail = tail->u.dst.rt_next) | 737 | for (tail = rth; tail; tail = tail->dst.rt_next) |
| 741 | if (!rt_is_expired(tail)) | 738 | if (!rt_is_expired(tail)) |
| 742 | break; | 739 | break; |
| 743 | if (rth != tail) | 740 | if (rth != tail) |
| @@ -746,9 +743,9 @@ static void rt_do_flush(int process_context) | |||
| 746 | /* call rt_free on entries after the tail requiring flush */ | 743 | /* call rt_free on entries after the tail requiring flush */ |
| 747 | prev = &rt_hash_table[i].chain; | 744 | prev = &rt_hash_table[i].chain; |
| 748 | for (p = *prev; p; p = next) { | 745 | for (p = *prev; p; p = next) { |
| 749 | next = p->u.dst.rt_next; | 746 | next = p->dst.rt_next; |
| 750 | if (!rt_is_expired(p)) { | 747 | if (!rt_is_expired(p)) { |
| 751 | prev = &p->u.dst.rt_next; | 748 | prev = &p->dst.rt_next; |
| 752 | } else { | 749 | } else { |
| 753 | *prev = next; | 750 | *prev = next; |
| 754 | rt_free(p); | 751 | rt_free(p); |
| @@ -763,7 +760,7 @@ static void rt_do_flush(int process_context) | |||
| 763 | spin_unlock_bh(rt_hash_lock_addr(i)); | 760 | spin_unlock_bh(rt_hash_lock_addr(i)); |
| 764 | 761 | ||
| 765 | for (; rth != tail; rth = next) { | 762 | for (; rth != tail; rth = next) { |
| 766 | next = rth->u.dst.rt_next; | 763 | next = rth->dst.rt_next; |
| 767 | rt_free(rth); | 764 | rt_free(rth); |
| 768 | } | 765 | } |
| 769 | } | 766 | } |
| @@ -780,11 +777,30 @@ static void rt_do_flush(int process_context) | |||
| 780 | #define FRACT_BITS 3 | 777 | #define FRACT_BITS 3 |
| 781 | #define ONE (1UL << FRACT_BITS) | 778 | #define ONE (1UL << FRACT_BITS) |
| 782 | 779 | ||
| 780 | /* | ||
| 781 | * Given a hash chain and an item in this hash chain, | ||
| 782 | * find if a previous entry has the same hash_inputs | ||
| 783 | * (but differs on tos, mark or oif) | ||
| 784 | * Returns 0 if an alias is found. | ||
| 785 | * Returns ONE if rth has no alias before itself. | ||
| 786 | */ | ||
| 787 | static int has_noalias(const struct rtable *head, const struct rtable *rth) | ||
| 788 | { | ||
| 789 | const struct rtable *aux = head; | ||
| 790 | |||
| 791 | while (aux != rth) { | ||
| 792 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
| 793 | return 0; | ||
| 794 | aux = aux->dst.rt_next; | ||
| 795 | } | ||
| 796 | return ONE; | ||
| 797 | } | ||
| 798 | |||
| 783 | static void rt_check_expire(void) | 799 | static void rt_check_expire(void) |
| 784 | { | 800 | { |
| 785 | static unsigned int rover; | 801 | static unsigned int rover; |
| 786 | unsigned int i = rover, goal; | 802 | unsigned int i = rover, goal; |
| 787 | struct rtable *rth, *aux, **rthp; | 803 | struct rtable *rth, **rthp; |
| 788 | unsigned long samples = 0; | 804 | unsigned long samples = 0; |
| 789 | unsigned long sum = 0, sum2 = 0; | 805 | unsigned long sum = 0, sum2 = 0; |
| 790 | unsigned long delta; | 806 | unsigned long delta; |
| @@ -815,18 +831,18 @@ static void rt_check_expire(void) | |||
| 815 | length = 0; | 831 | length = 0; |
| 816 | spin_lock_bh(rt_hash_lock_addr(i)); | 832 | spin_lock_bh(rt_hash_lock_addr(i)); |
| 817 | while ((rth = *rthp) != NULL) { | 833 | while ((rth = *rthp) != NULL) { |
| 818 | prefetch(rth->u.dst.rt_next); | 834 | prefetch(rth->dst.rt_next); |
| 819 | if (rt_is_expired(rth)) { | 835 | if (rt_is_expired(rth)) { |
| 820 | *rthp = rth->u.dst.rt_next; | 836 | *rthp = rth->dst.rt_next; |
| 821 | rt_free(rth); | 837 | rt_free(rth); |
| 822 | continue; | 838 | continue; |
| 823 | } | 839 | } |
| 824 | if (rth->u.dst.expires) { | 840 | if (rth->dst.expires) { |
| 825 | /* Entry is expired even if it is in use */ | 841 | /* Entry is expired even if it is in use */ |
| 826 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 842 | if (time_before_eq(jiffies, rth->dst.expires)) { |
| 827 | nofree: | 843 | nofree: |
| 828 | tmo >>= 1; | 844 | tmo >>= 1; |
| 829 | rthp = &rth->u.dst.rt_next; | 845 | rthp = &rth->dst.rt_next; |
| 830 | /* | 846 | /* |
| 831 | * We only count entries on | 847 | * We only count entries on |
| 832 | * a chain with equal hash inputs once | 848 | * a chain with equal hash inputs once |
| @@ -835,22 +851,14 @@ nofree: | |||
| 835 | * attributes don't unfairly skew | 851 | * attributes don't unfairly skew |
| 836 | * the length computation | 852 | * the length computation |
| 837 | */ | 853 | */ |
| 838 | for (aux = rt_hash_table[i].chain;;) { | 854 | length += has_noalias(rt_hash_table[i].chain, rth); |
| 839 | if (aux == rth) { | ||
| 840 | length += ONE; | ||
| 841 | break; | ||
| 842 | } | ||
| 843 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | ||
| 844 | break; | ||
| 845 | aux = aux->u.dst.rt_next; | ||
| 846 | } | ||
| 847 | continue; | 855 | continue; |
| 848 | } | 856 | } |
| 849 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | 857 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) |
| 850 | goto nofree; | 858 | goto nofree; |
| 851 | 859 | ||
| 852 | /* Cleanup aged off entries. */ | 860 | /* Cleanup aged off entries. */ |
| 853 | *rthp = rth->u.dst.rt_next; | 861 | *rthp = rth->dst.rt_next; |
| 854 | rt_free(rth); | 862 | rt_free(rth); |
| 855 | } | 863 | } |
| 856 | spin_unlock_bh(rt_hash_lock_addr(i)); | 864 | spin_unlock_bh(rt_hash_lock_addr(i)); |
| @@ -908,34 +916,11 @@ void rt_cache_flush_batch(void) | |||
| 908 | rt_do_flush(!in_softirq()); | 916 | rt_do_flush(!in_softirq()); |
| 909 | } | 917 | } |
| 910 | 918 | ||
| 911 | /* | ||
| 912 | * We change rt_genid and let gc do the cleanup | ||
| 913 | */ | ||
| 914 | static void rt_secret_rebuild(unsigned long __net) | ||
| 915 | { | ||
| 916 | struct net *net = (struct net *)__net; | ||
| 917 | rt_cache_invalidate(net); | ||
| 918 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); | ||
| 919 | } | ||
| 920 | |||
| 921 | static void rt_secret_rebuild_oneshot(struct net *net) | ||
| 922 | { | ||
| 923 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 924 | rt_cache_invalidate(net); | ||
| 925 | if (ip_rt_secret_interval) { | ||
| 926 | net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; | ||
| 927 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 928 | } | ||
| 929 | } | ||
| 930 | |||
| 931 | static void rt_emergency_hash_rebuild(struct net *net) | 919 | static void rt_emergency_hash_rebuild(struct net *net) |
| 932 | { | 920 | { |
| 933 | if (net_ratelimit()) { | 921 | if (net_ratelimit()) |
| 934 | printk(KERN_WARNING "Route hash chain too long!\n"); | 922 | printk(KERN_WARNING "Route hash chain too long!\n"); |
| 935 | printk(KERN_WARNING "Adjust your secret_interval!\n"); | 923 | rt_cache_invalidate(net); |
| 936 | } | ||
| 937 | |||
| 938 | rt_secret_rebuild_oneshot(net); | ||
| 939 | } | 924 | } |
| 940 | 925 | ||
| 941 | /* | 926 | /* |
| @@ -1014,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
| 1014 | if (!rt_is_expired(rth) && | 999 | if (!rt_is_expired(rth) && |
| 1015 | !rt_may_expire(rth, tmo, expire)) { | 1000 | !rt_may_expire(rth, tmo, expire)) { |
| 1016 | tmo >>= 1; | 1001 | tmo >>= 1; |
| 1017 | rthp = &rth->u.dst.rt_next; | 1002 | rthp = &rth->dst.rt_next; |
| 1018 | continue; | 1003 | continue; |
| 1019 | } | 1004 | } |
| 1020 | *rthp = rth->u.dst.rt_next; | 1005 | *rthp = rth->dst.rt_next; |
| 1021 | rt_free(rth); | 1006 | rt_free(rth); |
| 1022 | goal--; | 1007 | goal--; |
| 1023 | } | 1008 | } |
| @@ -1073,8 +1058,23 @@ work_done: | |||
| 1073 | out: return 0; | 1058 | out: return 0; |
| 1074 | } | 1059 | } |
| 1075 | 1060 | ||
| 1061 | /* | ||
| 1062 | * Returns number of entries in a hash chain that have different hash_inputs | ||
| 1063 | */ | ||
| 1064 | static int slow_chain_length(const struct rtable *head) | ||
| 1065 | { | ||
| 1066 | int length = 0; | ||
| 1067 | const struct rtable *rth = head; | ||
| 1068 | |||
| 1069 | while (rth) { | ||
| 1070 | length += has_noalias(head, rth); | ||
| 1071 | rth = rth->dst.rt_next; | ||
| 1072 | } | ||
| 1073 | return length >> FRACT_BITS; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1076 | static int rt_intern_hash(unsigned hash, struct rtable *rt, |
| 1077 | struct rtable **rp, struct sk_buff *skb) | 1077 | struct rtable **rp, struct sk_buff *skb, int ifindex) |
| 1078 | { | 1078 | { |
| 1079 | struct rtable *rth, **rthp; | 1079 | struct rtable *rth, **rthp; |
| 1080 | unsigned long now; | 1080 | unsigned long now; |
| @@ -1090,7 +1090,7 @@ restart: | |||
| 1090 | candp = NULL; | 1090 | candp = NULL; |
| 1091 | now = jiffies; | 1091 | now = jiffies; |
| 1092 | 1092 | ||
| 1093 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1093 | if (!rt_caching(dev_net(rt->dst.dev))) { |
| 1094 | /* | 1094 | /* |
| 1095 | * If we're not caching, just tell the caller we | 1095 | * If we're not caching, just tell the caller we |
| 1096 | * were successful and don't touch the route. The | 1096 | * were successful and don't touch the route. The |
| @@ -1108,7 +1108,7 @@ restart: | |||
| 1108 | */ | 1108 | */ |
| 1109 | 1109 | ||
| 1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
| 1111 | int err = arp_bind_neighbour(&rt->u.dst); | 1111 | int err = arp_bind_neighbour(&rt->dst); |
| 1112 | if (err) { | 1112 | if (err) { |
| 1113 | if (net_ratelimit()) | 1113 | if (net_ratelimit()) |
| 1114 | printk(KERN_WARNING | 1114 | printk(KERN_WARNING |
| @@ -1127,19 +1127,19 @@ restart: | |||
| 1127 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1127 | spin_lock_bh(rt_hash_lock_addr(hash)); |
| 1128 | while ((rth = *rthp) != NULL) { | 1128 | while ((rth = *rthp) != NULL) { |
| 1129 | if (rt_is_expired(rth)) { | 1129 | if (rt_is_expired(rth)) { |
| 1130 | *rthp = rth->u.dst.rt_next; | 1130 | *rthp = rth->dst.rt_next; |
| 1131 | rt_free(rth); | 1131 | rt_free(rth); |
| 1132 | continue; | 1132 | continue; |
| 1133 | } | 1133 | } |
| 1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
| 1135 | /* Put it first */ | 1135 | /* Put it first */ |
| 1136 | *rthp = rth->u.dst.rt_next; | 1136 | *rthp = rth->dst.rt_next; |
| 1137 | /* | 1137 | /* |
| 1138 | * Since lookup is lockfree, the deletion | 1138 | * Since lookup is lockfree, the deletion |
| 1139 | * must be visible to another weakly ordered CPU before | 1139 | * must be visible to another weakly ordered CPU before |
| 1140 | * the insertion at the start of the hash chain. | 1140 | * the insertion at the start of the hash chain. |
| 1141 | */ | 1141 | */ |
| 1142 | rcu_assign_pointer(rth->u.dst.rt_next, | 1142 | rcu_assign_pointer(rth->dst.rt_next, |
| 1143 | rt_hash_table[hash].chain); | 1143 | rt_hash_table[hash].chain); |
| 1144 | /* | 1144 | /* |
| 1145 | * Since lookup is lockfree, the update writes | 1145 | * Since lookup is lockfree, the update writes |
| @@ -1147,18 +1147,18 @@ restart: | |||
| 1147 | */ | 1147 | */ |
| 1148 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); | 1148 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); |
| 1149 | 1149 | ||
| 1150 | dst_use(&rth->u.dst, now); | 1150 | dst_use(&rth->dst, now); |
| 1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1152 | 1152 | ||
| 1153 | rt_drop(rt); | 1153 | rt_drop(rt); |
| 1154 | if (rp) | 1154 | if (rp) |
| 1155 | *rp = rth; | 1155 | *rp = rth; |
| 1156 | else | 1156 | else |
| 1157 | skb_dst_set(skb, &rth->u.dst); | 1157 | skb_dst_set(skb, &rth->dst); |
| 1158 | return 0; | 1158 | return 0; |
| 1159 | } | 1159 | } |
| 1160 | 1160 | ||
| 1161 | if (!atomic_read(&rth->u.dst.__refcnt)) { | 1161 | if (!atomic_read(&rth->dst.__refcnt)) { |
| 1162 | u32 score = rt_score(rth); | 1162 | u32 score = rt_score(rth); |
| 1163 | 1163 | ||
| 1164 | if (score <= min_score) { | 1164 | if (score <= min_score) { |
| @@ -1170,7 +1170,7 @@ restart: | |||
| 1170 | 1170 | ||
| 1171 | chain_length++; | 1171 | chain_length++; |
| 1172 | 1172 | ||
| 1173 | rthp = &rth->u.dst.rt_next; | 1173 | rthp = &rth->dst.rt_next; |
| 1174 | } | 1174 | } |
| 1175 | 1175 | ||
| 1176 | if (cand) { | 1176 | if (cand) { |
| @@ -1181,18 +1181,24 @@ restart: | |||
| 1181 | * only 2 entries per bucket. We will see. | 1181 | * only 2 entries per bucket. We will see. |
| 1182 | */ | 1182 | */ |
| 1183 | if (chain_length > ip_rt_gc_elasticity) { | 1183 | if (chain_length > ip_rt_gc_elasticity) { |
| 1184 | *candp = cand->u.dst.rt_next; | 1184 | *candp = cand->dst.rt_next; |
| 1185 | rt_free(cand); | 1185 | rt_free(cand); |
| 1186 | } | 1186 | } |
| 1187 | } else { | 1187 | } else { |
| 1188 | if (chain_length > rt_chain_length_max) { | 1188 | if (chain_length > rt_chain_length_max && |
| 1189 | struct net *net = dev_net(rt->u.dst.dev); | 1189 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { |
| 1190 | struct net *net = dev_net(rt->dst.dev); | ||
| 1190 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | 1191 | int num = ++net->ipv4.current_rt_cache_rebuild_count; |
| 1191 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1192 | if (!rt_caching(net)) { |
| 1192 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | 1193 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", |
| 1193 | rt->u.dst.dev->name, num); | 1194 | rt->dst.dev->name, num); |
| 1194 | } | 1195 | } |
| 1195 | rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); | 1196 | rt_emergency_hash_rebuild(net); |
| 1197 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
| 1198 | |||
| 1199 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
| 1200 | ifindex, rt_genid(net)); | ||
| 1201 | goto restart; | ||
| 1196 | } | 1202 | } |
| 1197 | } | 1203 | } |
| 1198 | 1204 | ||
| @@ -1200,7 +1206,7 @@ restart: | |||
| 1200 | route or unicast forwarding path. | 1206 | route or unicast forwarding path. |
| 1201 | */ | 1207 | */ |
| 1202 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1208 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
| 1203 | int err = arp_bind_neighbour(&rt->u.dst); | 1209 | int err = arp_bind_neighbour(&rt->dst); |
| 1204 | if (err) { | 1210 | if (err) { |
| 1205 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1211 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1206 | 1212 | ||
| @@ -1225,20 +1231,20 @@ restart: | |||
| 1225 | } | 1231 | } |
| 1226 | 1232 | ||
| 1227 | if (net_ratelimit()) | 1233 | if (net_ratelimit()) |
| 1228 | printk(KERN_WARNING "Neighbour table overflow.\n"); | 1234 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
| 1229 | rt_drop(rt); | 1235 | rt_drop(rt); |
| 1230 | return -ENOBUFS; | 1236 | return -ENOBUFS; |
| 1231 | } | 1237 | } |
| 1232 | } | 1238 | } |
| 1233 | 1239 | ||
| 1234 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | 1240 | rt->dst.rt_next = rt_hash_table[hash].chain; |
| 1235 | 1241 | ||
| 1236 | #if RT_CACHE_DEBUG >= 2 | 1242 | #if RT_CACHE_DEBUG >= 2 |
| 1237 | if (rt->u.dst.rt_next) { | 1243 | if (rt->dst.rt_next) { |
| 1238 | struct rtable *trt; | 1244 | struct rtable *trt; |
| 1239 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", | 1245 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", |
| 1240 | hash, &rt->rt_dst); | 1246 | hash, &rt->rt_dst); |
| 1241 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1247 | for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) |
| 1242 | printk(" . %pI4", &trt->rt_dst); | 1248 | printk(" . %pI4", &trt->rt_dst); |
| 1243 | printk("\n"); | 1249 | printk("\n"); |
| 1244 | } | 1250 | } |
| @@ -1256,7 +1262,7 @@ skip_hashing: | |||
| 1256 | if (rp) | 1262 | if (rp) |
| 1257 | *rp = rt; | 1263 | *rp = rt; |
| 1258 | else | 1264 | else |
| 1259 | skb_dst_set(skb, &rt->u.dst); | 1265 | skb_dst_set(skb, &rt->dst); |
| 1260 | return 0; | 1266 | return 0; |
| 1261 | } | 1267 | } |
| 1262 | 1268 | ||
| @@ -1318,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
| 1318 | 1324 | ||
| 1319 | ip_select_fb_ident(iph); | 1325 | ip_select_fb_ident(iph); |
| 1320 | } | 1326 | } |
| 1327 | EXPORT_SYMBOL(__ip_select_ident); | ||
| 1321 | 1328 | ||
| 1322 | static void rt_del(unsigned hash, struct rtable *rt) | 1329 | static void rt_del(unsigned hash, struct rtable *rt) |
| 1323 | { | 1330 | { |
| @@ -1328,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
| 1328 | ip_rt_put(rt); | 1335 | ip_rt_put(rt); |
| 1329 | while ((aux = *rthp) != NULL) { | 1336 | while ((aux = *rthp) != NULL) { |
| 1330 | if (aux == rt || rt_is_expired(aux)) { | 1337 | if (aux == rt || rt_is_expired(aux)) { |
| 1331 | *rthp = aux->u.dst.rt_next; | 1338 | *rthp = aux->dst.rt_next; |
| 1332 | rt_free(aux); | 1339 | rt_free(aux); |
| 1333 | continue; | 1340 | continue; |
| 1334 | } | 1341 | } |
| 1335 | rthp = &aux->u.dst.rt_next; | 1342 | rthp = &aux->dst.rt_next; |
| 1336 | } | 1343 | } |
| 1337 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1344 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1338 | } | 1345 | } |
| 1339 | 1346 | ||
| 1347 | /* called in rcu_read_lock() section */ | ||
| 1340 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1348 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
| 1341 | __be32 saddr, struct net_device *dev) | 1349 | __be32 saddr, struct net_device *dev) |
| 1342 | { | 1350 | { |
| 1343 | int i, k; | 1351 | int i, k; |
| 1344 | struct in_device *in_dev = in_dev_get(dev); | 1352 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 1345 | struct rtable *rth, **rthp; | 1353 | struct rtable *rth, **rthp; |
| 1346 | __be32 skeys[2] = { saddr, 0 }; | 1354 | __be32 skeys[2] = { saddr, 0 }; |
| 1347 | int ikeys[2] = { dev->ifindex, 0 }; | 1355 | int ikeys[2] = { dev->ifindex, 0 }; |
| @@ -1377,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1377 | 1385 | ||
| 1378 | rthp=&rt_hash_table[hash].chain; | 1386 | rthp=&rt_hash_table[hash].chain; |
| 1379 | 1387 | ||
| 1380 | rcu_read_lock(); | ||
| 1381 | while ((rth = rcu_dereference(*rthp)) != NULL) { | 1388 | while ((rth = rcu_dereference(*rthp)) != NULL) { |
| 1382 | struct rtable *rt; | 1389 | struct rtable *rt; |
| 1383 | 1390 | ||
| @@ -1386,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1386 | rth->fl.oif != ikeys[k] || | 1393 | rth->fl.oif != ikeys[k] || |
| 1387 | rth->fl.iif != 0 || | 1394 | rth->fl.iif != 0 || |
| 1388 | rt_is_expired(rth) || | 1395 | rt_is_expired(rth) || |
| 1389 | !net_eq(dev_net(rth->u.dst.dev), net)) { | 1396 | !net_eq(dev_net(rth->dst.dev), net)) { |
| 1390 | rthp = &rth->u.dst.rt_next; | 1397 | rthp = &rth->dst.rt_next; |
| 1391 | continue; | 1398 | continue; |
| 1392 | } | 1399 | } |
| 1393 | 1400 | ||
| 1394 | if (rth->rt_dst != daddr || | 1401 | if (rth->rt_dst != daddr || |
| 1395 | rth->rt_src != saddr || | 1402 | rth->rt_src != saddr || |
| 1396 | rth->u.dst.error || | 1403 | rth->dst.error || |
| 1397 | rth->rt_gateway != old_gw || | 1404 | rth->rt_gateway != old_gw || |
| 1398 | rth->u.dst.dev != dev) | 1405 | rth->dst.dev != dev) |
| 1399 | break; | 1406 | break; |
| 1400 | 1407 | ||
| 1401 | dst_hold(&rth->u.dst); | 1408 | dst_hold(&rth->dst); |
| 1402 | rcu_read_unlock(); | ||
| 1403 | 1409 | ||
| 1404 | rt = dst_alloc(&ipv4_dst_ops); | 1410 | rt = dst_alloc(&ipv4_dst_ops); |
| 1405 | if (rt == NULL) { | 1411 | if (rt == NULL) { |
| 1406 | ip_rt_put(rth); | 1412 | ip_rt_put(rth); |
| 1407 | in_dev_put(in_dev); | ||
| 1408 | return; | 1413 | return; |
| 1409 | } | 1414 | } |
| 1410 | 1415 | ||
| 1411 | /* Copy all the information. */ | 1416 | /* Copy all the information. */ |
| 1412 | *rt = *rth; | 1417 | *rt = *rth; |
| 1413 | rt->u.dst.__use = 1; | 1418 | rt->dst.__use = 1; |
| 1414 | atomic_set(&rt->u.dst.__refcnt, 1); | 1419 | atomic_set(&rt->dst.__refcnt, 1); |
| 1415 | rt->u.dst.child = NULL; | 1420 | rt->dst.child = NULL; |
| 1416 | if (rt->u.dst.dev) | 1421 | if (rt->dst.dev) |
| 1417 | dev_hold(rt->u.dst.dev); | 1422 | dev_hold(rt->dst.dev); |
| 1418 | if (rt->idev) | 1423 | if (rt->idev) |
| 1419 | in_dev_hold(rt->idev); | 1424 | in_dev_hold(rt->idev); |
| 1420 | rt->u.dst.obsolete = 0; | 1425 | rt->dst.obsolete = -1; |
| 1421 | rt->u.dst.lastuse = jiffies; | 1426 | rt->dst.lastuse = jiffies; |
| 1422 | rt->u.dst.path = &rt->u.dst; | 1427 | rt->dst.path = &rt->dst; |
| 1423 | rt->u.dst.neighbour = NULL; | 1428 | rt->dst.neighbour = NULL; |
| 1424 | rt->u.dst.hh = NULL; | 1429 | rt->dst.hh = NULL; |
| 1425 | #ifdef CONFIG_XFRM | 1430 | #ifdef CONFIG_XFRM |
| 1426 | rt->u.dst.xfrm = NULL; | 1431 | rt->dst.xfrm = NULL; |
| 1427 | #endif | 1432 | #endif |
| 1428 | rt->rt_genid = rt_genid(net); | 1433 | rt->rt_genid = rt_genid(net); |
| 1429 | rt->rt_flags |= RTCF_REDIRECTED; | 1434 | rt->rt_flags |= RTCF_REDIRECTED; |
| @@ -1432,37 +1437,35 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1432 | rt->rt_gateway = new_gw; | 1437 | rt->rt_gateway = new_gw; |
| 1433 | 1438 | ||
| 1434 | /* Redirect received -> path was valid */ | 1439 | /* Redirect received -> path was valid */ |
| 1435 | dst_confirm(&rth->u.dst); | 1440 | dst_confirm(&rth->dst); |
| 1436 | 1441 | ||
| 1437 | if (rt->peer) | 1442 | if (rt->peer) |
| 1438 | atomic_inc(&rt->peer->refcnt); | 1443 | atomic_inc(&rt->peer->refcnt); |
| 1439 | 1444 | ||
| 1440 | if (arp_bind_neighbour(&rt->u.dst) || | 1445 | if (arp_bind_neighbour(&rt->dst) || |
| 1441 | !(rt->u.dst.neighbour->nud_state & | 1446 | !(rt->dst.neighbour->nud_state & |
| 1442 | NUD_VALID)) { | 1447 | NUD_VALID)) { |
| 1443 | if (rt->u.dst.neighbour) | 1448 | if (rt->dst.neighbour) |
| 1444 | neigh_event_send(rt->u.dst.neighbour, NULL); | 1449 | neigh_event_send(rt->dst.neighbour, NULL); |
| 1445 | ip_rt_put(rth); | 1450 | ip_rt_put(rth); |
| 1446 | rt_drop(rt); | 1451 | rt_drop(rt); |
| 1447 | goto do_next; | 1452 | goto do_next; |
| 1448 | } | 1453 | } |
| 1449 | 1454 | ||
| 1450 | netevent.old = &rth->u.dst; | 1455 | netevent.old = &rth->dst; |
| 1451 | netevent.new = &rt->u.dst; | 1456 | netevent.new = &rt->dst; |
| 1452 | call_netevent_notifiers(NETEVENT_REDIRECT, | 1457 | call_netevent_notifiers(NETEVENT_REDIRECT, |
| 1453 | &netevent); | 1458 | &netevent); |
| 1454 | 1459 | ||
| 1455 | rt_del(hash, rth); | 1460 | rt_del(hash, rth); |
| 1456 | if (!rt_intern_hash(hash, rt, &rt, NULL)) | 1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) |
| 1457 | ip_rt_put(rt); | 1462 | ip_rt_put(rt); |
| 1458 | goto do_next; | 1463 | goto do_next; |
| 1459 | } | 1464 | } |
| 1460 | rcu_read_unlock(); | ||
| 1461 | do_next: | 1465 | do_next: |
| 1462 | ; | 1466 | ; |
| 1463 | } | 1467 | } |
| 1464 | } | 1468 | } |
| 1465 | in_dev_put(in_dev); | ||
| 1466 | return; | 1469 | return; |
| 1467 | 1470 | ||
| 1468 | reject_redirect: | 1471 | reject_redirect: |
| @@ -1473,7 +1476,7 @@ reject_redirect: | |||
| 1473 | &old_gw, dev->name, &new_gw, | 1476 | &old_gw, dev->name, &new_gw, |
| 1474 | &saddr, &daddr); | 1477 | &saddr, &daddr); |
| 1475 | #endif | 1478 | #endif |
| 1476 | in_dev_put(in_dev); | 1479 | ; |
| 1477 | } | 1480 | } |
| 1478 | 1481 | ||
| 1479 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1482 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
| @@ -1482,11 +1485,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
| 1482 | struct dst_entry *ret = dst; | 1485 | struct dst_entry *ret = dst; |
| 1483 | 1486 | ||
| 1484 | if (rt) { | 1487 | if (rt) { |
| 1485 | if (dst->obsolete) { | 1488 | if (dst->obsolete > 0) { |
| 1486 | ip_rt_put(rt); | 1489 | ip_rt_put(rt); |
| 1487 | ret = NULL; | 1490 | ret = NULL; |
| 1488 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
| 1489 | rt->u.dst.expires) { | 1492 | (rt->dst.expires && |
| 1493 | time_after_eq(jiffies, rt->dst.expires))) { | ||
| 1490 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
| 1491 | rt->fl.oif, | 1495 | rt->fl.oif, |
| 1492 | rt_genid(dev_net(dst->dev))); | 1496 | rt_genid(dev_net(dst->dev))); |
| @@ -1524,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1524 | int log_martians; | 1528 | int log_martians; |
| 1525 | 1529 | ||
| 1526 | rcu_read_lock(); | 1530 | rcu_read_lock(); |
| 1527 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | 1531 | in_dev = __in_dev_get_rcu(rt->dst.dev); |
| 1528 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | 1532 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { |
| 1529 | rcu_read_unlock(); | 1533 | rcu_read_unlock(); |
| 1530 | return; | 1534 | return; |
| @@ -1535,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1535 | /* No redirected packets during ip_rt_redirect_silence; | 1539 | /* No redirected packets during ip_rt_redirect_silence; |
| 1536 | * reset the algorithm. | 1540 | * reset the algorithm. |
| 1537 | */ | 1541 | */ |
| 1538 | if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) | 1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) |
| 1539 | rt->u.dst.rate_tokens = 0; | 1543 | rt->dst.rate_tokens = 0; |
| 1540 | 1544 | ||
| 1541 | /* Too many ignored redirects; do not send anything | 1545 | /* Too many ignored redirects; do not send anything |
| 1542 | * set u.dst.rate_last to the last seen redirected packet. | 1546 | * set dst.rate_last to the last seen redirected packet. |
| 1543 | */ | 1547 | */ |
| 1544 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { | 1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { |
| 1545 | rt->u.dst.rate_last = jiffies; | 1549 | rt->dst.rate_last = jiffies; |
| 1546 | return; | 1550 | return; |
| 1547 | } | 1551 | } |
| 1548 | 1552 | ||
| 1549 | /* Check for load limit; set rate_last to the latest sent | 1553 | /* Check for load limit; set rate_last to the latest sent |
| 1550 | * redirect. | 1554 | * redirect. |
| 1551 | */ | 1555 | */ |
| 1552 | if (rt->u.dst.rate_tokens == 0 || | 1556 | if (rt->dst.rate_tokens == 0 || |
| 1553 | time_after(jiffies, | 1557 | time_after(jiffies, |
| 1554 | (rt->u.dst.rate_last + | 1558 | (rt->dst.rate_last + |
| 1555 | (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { | 1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { |
| 1556 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
| 1557 | rt->u.dst.rate_last = jiffies; | 1561 | rt->dst.rate_last = jiffies; |
| 1558 | ++rt->u.dst.rate_tokens; | 1562 | ++rt->dst.rate_tokens; |
| 1559 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
| 1560 | if (log_martians && | 1564 | if (log_martians && |
| 1561 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1565 | rt->dst.rate_tokens == ip_rt_redirect_number && |
| 1562 | net_ratelimit()) | 1566 | net_ratelimit()) |
| 1563 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
| 1564 | &rt->rt_src, rt->rt_iif, | 1568 | &rt->rt_src, rt->rt_iif, |
| @@ -1573,7 +1577,7 @@ static int ip_error(struct sk_buff *skb) | |||
| 1573 | unsigned long now; | 1577 | unsigned long now; |
| 1574 | int code; | 1578 | int code; |
| 1575 | 1579 | ||
| 1576 | switch (rt->u.dst.error) { | 1580 | switch (rt->dst.error) { |
| 1577 | case EINVAL: | 1581 | case EINVAL: |
| 1578 | default: | 1582 | default: |
| 1579 | goto out; | 1583 | goto out; |
| @@ -1582,7 +1586,7 @@ static int ip_error(struct sk_buff *skb) | |||
| 1582 | break; | 1586 | break; |
| 1583 | case ENETUNREACH: | 1587 | case ENETUNREACH: |
| 1584 | code = ICMP_NET_UNREACH; | 1588 | code = ICMP_NET_UNREACH; |
| 1585 | IP_INC_STATS_BH(dev_net(rt->u.dst.dev), | 1589 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
| 1586 | IPSTATS_MIB_INNOROUTES); | 1590 | IPSTATS_MIB_INNOROUTES); |
| 1587 | break; | 1591 | break; |
| 1588 | case EACCES: | 1592 | case EACCES: |
| @@ -1591,12 +1595,12 @@ static int ip_error(struct sk_buff *skb) | |||
| 1591 | } | 1595 | } |
| 1592 | 1596 | ||
| 1593 | now = jiffies; | 1597 | now = jiffies; |
| 1594 | rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; | 1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; |
| 1595 | if (rt->u.dst.rate_tokens > ip_rt_error_burst) | 1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) |
| 1596 | rt->u.dst.rate_tokens = ip_rt_error_burst; | 1600 | rt->dst.rate_tokens = ip_rt_error_burst; |
| 1597 | rt->u.dst.rate_last = now; | 1601 | rt->dst.rate_last = now; |
| 1598 | if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { | 1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { |
| 1599 | rt->u.dst.rate_tokens -= ip_rt_error_cost; | 1603 | rt->dst.rate_tokens -= ip_rt_error_cost; |
| 1600 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
| 1601 | } | 1605 | } |
| 1602 | 1606 | ||
| @@ -1641,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1641 | 1645 | ||
| 1642 | rcu_read_lock(); | 1646 | rcu_read_lock(); |
| 1643 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
| 1644 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 1648 | rth = rcu_dereference(rth->dst.rt_next)) { |
| 1645 | unsigned short mtu = new_mtu; | 1649 | unsigned short mtu = new_mtu; |
| 1646 | 1650 | ||
| 1647 | if (rth->fl.fl4_dst != daddr || | 1651 | if (rth->fl.fl4_dst != daddr || |
| @@ -1650,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1650 | rth->rt_src != iph->saddr || | 1654 | rth->rt_src != iph->saddr || |
| 1651 | rth->fl.oif != ikeys[k] || | 1655 | rth->fl.oif != ikeys[k] || |
| 1652 | rth->fl.iif != 0 || | 1656 | rth->fl.iif != 0 || |
| 1653 | dst_metric_locked(&rth->u.dst, RTAX_MTU) || | 1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || |
| 1654 | !net_eq(dev_net(rth->u.dst.dev), net) || | 1658 | !net_eq(dev_net(rth->dst.dev), net) || |
| 1655 | rt_is_expired(rth)) | 1659 | rt_is_expired(rth)) |
| 1656 | continue; | 1660 | continue; |
| 1657 | 1661 | ||
| @@ -1659,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1659 | 1663 | ||
| 1660 | /* BSD 4.2 compatibility hack :-( */ | 1664 | /* BSD 4.2 compatibility hack :-( */ |
| 1661 | if (mtu == 0 && | 1665 | if (mtu == 0 && |
| 1662 | old_mtu >= dst_mtu(&rth->u.dst) && | 1666 | old_mtu >= dst_mtu(&rth->dst) && |
| 1663 | old_mtu >= 68 + (iph->ihl << 2)) | 1667 | old_mtu >= 68 + (iph->ihl << 2)) |
| 1664 | old_mtu -= iph->ihl << 2; | 1668 | old_mtu -= iph->ihl << 2; |
| 1665 | 1669 | ||
| 1666 | mtu = guess_mtu(old_mtu); | 1670 | mtu = guess_mtu(old_mtu); |
| 1667 | } | 1671 | } |
| 1668 | if (mtu <= dst_mtu(&rth->u.dst)) { | 1672 | if (mtu <= dst_mtu(&rth->dst)) { |
| 1669 | if (mtu < dst_mtu(&rth->u.dst)) { | 1673 | if (mtu < dst_mtu(&rth->dst)) { |
| 1670 | dst_confirm(&rth->u.dst); | 1674 | dst_confirm(&rth->dst); |
| 1671 | if (mtu < ip_rt_min_pmtu) { | 1675 | if (mtu < ip_rt_min_pmtu) { |
| 1672 | mtu = ip_rt_min_pmtu; | 1676 | mtu = ip_rt_min_pmtu; |
| 1673 | rth->u.dst.metrics[RTAX_LOCK-1] |= | 1677 | rth->dst.metrics[RTAX_LOCK-1] |= |
| 1674 | (1 << RTAX_MTU); | 1678 | (1 << RTAX_MTU); |
| 1675 | } | 1679 | } |
| 1676 | rth->u.dst.metrics[RTAX_MTU-1] = mtu; | 1680 | rth->dst.metrics[RTAX_MTU-1] = mtu; |
| 1677 | dst_set_expires(&rth->u.dst, | 1681 | dst_set_expires(&rth->dst, |
| 1678 | ip_rt_mtu_expires); | 1682 | ip_rt_mtu_expires); |
| 1679 | } | 1683 | } |
| 1680 | est_mtu = mtu; | 1684 | est_mtu = mtu; |
| @@ -1702,7 +1706,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
| 1702 | 1706 | ||
| 1703 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1707 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
| 1704 | { | 1708 | { |
| 1705 | return NULL; | 1709 | if (rt_is_expired((struct rtable *)dst)) |
| 1710 | return NULL; | ||
| 1711 | return dst; | ||
| 1706 | } | 1712 | } |
| 1707 | 1713 | ||
| 1708 | static void ipv4_dst_destroy(struct dst_entry *dst) | 1714 | static void ipv4_dst_destroy(struct dst_entry *dst) |
| @@ -1745,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
| 1745 | 1751 | ||
| 1746 | rt = skb_rtable(skb); | 1752 | rt = skb_rtable(skb); |
| 1747 | if (rt) | 1753 | if (rt) |
| 1748 | dst_set_expires(&rt->u.dst, 0); | 1754 | dst_set_expires(&rt->dst, 0); |
| 1749 | } | 1755 | } |
| 1750 | 1756 | ||
| 1751 | static int ip_rt_bug(struct sk_buff *skb) | 1757 | static int ip_rt_bug(struct sk_buff *skb) |
| @@ -1773,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1773 | 1779 | ||
| 1774 | if (rt->fl.iif == 0) | 1780 | if (rt->fl.iif == 0) |
| 1775 | src = rt->rt_src; | 1781 | src = rt->rt_src; |
| 1776 | else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { | 1782 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { |
| 1777 | src = FIB_RES_PREFSRC(res); | 1783 | src = FIB_RES_PREFSRC(res); |
| 1778 | fib_res_put(&res); | 1784 | fib_res_put(&res); |
| 1779 | } else | 1785 | } else |
| 1780 | src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, | 1786 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
| 1781 | RT_SCOPE_UNIVERSE); | 1787 | RT_SCOPE_UNIVERSE); |
| 1782 | memcpy(addr, &src, 4); | 1788 | memcpy(addr, &src, 4); |
| 1783 | } | 1789 | } |
| @@ -1785,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1785 | #ifdef CONFIG_NET_CLS_ROUTE | 1791 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1786 | static void set_class_tag(struct rtable *rt, u32 tag) | 1792 | static void set_class_tag(struct rtable *rt, u32 tag) |
| 1787 | { | 1793 | { |
| 1788 | if (!(rt->u.dst.tclassid & 0xFFFF)) | 1794 | if (!(rt->dst.tclassid & 0xFFFF)) |
| 1789 | rt->u.dst.tclassid |= tag & 0xFFFF; | 1795 | rt->dst.tclassid |= tag & 0xFFFF; |
| 1790 | if (!(rt->u.dst.tclassid & 0xFFFF0000)) | 1796 | if (!(rt->dst.tclassid & 0xFFFF0000)) |
| 1791 | rt->u.dst.tclassid |= tag & 0xFFFF0000; | 1797 | rt->dst.tclassid |= tag & 0xFFFF0000; |
| 1792 | } | 1798 | } |
| 1793 | #endif | 1799 | #endif |
| 1794 | 1800 | ||
| @@ -1800,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
| 1800 | if (FIB_RES_GW(*res) && | 1806 | if (FIB_RES_GW(*res) && |
| 1801 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1807 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
| 1802 | rt->rt_gateway = FIB_RES_GW(*res); | 1808 | rt->rt_gateway = FIB_RES_GW(*res); |
| 1803 | memcpy(rt->u.dst.metrics, fi->fib_metrics, | 1809 | memcpy(rt->dst.metrics, fi->fib_metrics, |
| 1804 | sizeof(rt->u.dst.metrics)); | 1810 | sizeof(rt->dst.metrics)); |
| 1805 | if (fi->fib_mtu == 0) { | 1811 | if (fi->fib_mtu == 0) { |
| 1806 | rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; | 1812 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; |
| 1807 | if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && | 1813 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && |
| 1808 | rt->rt_gateway != rt->rt_dst && | 1814 | rt->rt_gateway != rt->rt_dst && |
| 1809 | rt->u.dst.dev->mtu > 576) | 1815 | rt->dst.dev->mtu > 576) |
| 1810 | rt->u.dst.metrics[RTAX_MTU-1] = 576; | 1816 | rt->dst.metrics[RTAX_MTU-1] = 576; |
| 1811 | } | 1817 | } |
| 1812 | #ifdef CONFIG_NET_CLS_ROUTE | 1818 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1813 | rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1819 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; |
| 1814 | #endif | 1820 | #endif |
| 1815 | } else | 1821 | } else |
| 1816 | rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; | 1822 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; |
| 1817 | 1823 | ||
| 1818 | if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) | 1824 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) |
| 1819 | rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1825 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; |
| 1820 | if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) | 1826 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) |
| 1821 | rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | 1827 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; |
| 1822 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) | 1828 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) |
| 1823 | rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, | 1829 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, |
| 1824 | ip_rt_min_advmss); | 1830 | ip_rt_min_advmss); |
| 1825 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) | 1831 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) |
| 1826 | rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | 1832 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; |
| 1827 | 1833 | ||
| 1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1834 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1829 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1835 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| @@ -1834,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
| 1834 | rt->rt_type = res->type; | 1840 | rt->rt_type = res->type; |
| 1835 | } | 1841 | } |
| 1836 | 1842 | ||
| 1843 | /* called in rcu_read_lock() section */ | ||
| 1837 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1844 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
| 1838 | u8 tos, struct net_device *dev, int our) | 1845 | u8 tos, struct net_device *dev, int our) |
| 1839 | { | 1846 | { |
| 1840 | unsigned hash; | 1847 | unsigned int hash; |
| 1841 | struct rtable *rth; | 1848 | struct rtable *rth; |
| 1842 | __be32 spec_dst; | 1849 | __be32 spec_dst; |
| 1843 | struct in_device *in_dev = in_dev_get(dev); | 1850 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 1844 | u32 itag = 0; | 1851 | u32 itag = 0; |
| 1852 | int err; | ||
| 1845 | 1853 | ||
| 1846 | /* Primary sanity checks. */ | 1854 | /* Primary sanity checks. */ |
| 1847 | 1855 | ||
| @@ -1856,20 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1856 | if (!ipv4_is_local_multicast(daddr)) | 1864 | if (!ipv4_is_local_multicast(daddr)) |
| 1857 | goto e_inval; | 1865 | goto e_inval; |
| 1858 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1866 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
| 1859 | } else if (fib_validate_source(saddr, 0, tos, 0, | 1867 | } else { |
| 1860 | dev, &spec_dst, &itag, 0) < 0) | 1868 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
| 1861 | goto e_inval; | 1869 | &itag, 0); |
| 1862 | 1870 | if (err < 0) | |
| 1871 | goto e_err; | ||
| 1872 | } | ||
| 1863 | rth = dst_alloc(&ipv4_dst_ops); | 1873 | rth = dst_alloc(&ipv4_dst_ops); |
| 1864 | if (!rth) | 1874 | if (!rth) |
| 1865 | goto e_nobufs; | 1875 | goto e_nobufs; |
| 1866 | 1876 | ||
| 1867 | rth->u.dst.output= ip_rt_bug; | 1877 | rth->dst.output = ip_rt_bug; |
| 1878 | rth->dst.obsolete = -1; | ||
| 1868 | 1879 | ||
| 1869 | atomic_set(&rth->u.dst.__refcnt, 1); | 1880 | atomic_set(&rth->dst.__refcnt, 1); |
| 1870 | rth->u.dst.flags= DST_HOST; | 1881 | rth->dst.flags= DST_HOST; |
| 1871 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 1882 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 1872 | rth->u.dst.flags |= DST_NOPOLICY; | 1883 | rth->dst.flags |= DST_NOPOLICY; |
| 1873 | rth->fl.fl4_dst = daddr; | 1884 | rth->fl.fl4_dst = daddr; |
| 1874 | rth->rt_dst = daddr; | 1885 | rth->rt_dst = daddr; |
| 1875 | rth->fl.fl4_tos = tos; | 1886 | rth->fl.fl4_tos = tos; |
| @@ -1877,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1877 | rth->fl.fl4_src = saddr; | 1888 | rth->fl.fl4_src = saddr; |
| 1878 | rth->rt_src = saddr; | 1889 | rth->rt_src = saddr; |
| 1879 | #ifdef CONFIG_NET_CLS_ROUTE | 1890 | #ifdef CONFIG_NET_CLS_ROUTE |
| 1880 | rth->u.dst.tclassid = itag; | 1891 | rth->dst.tclassid = itag; |
| 1881 | #endif | 1892 | #endif |
| 1882 | rth->rt_iif = | 1893 | rth->rt_iif = |
| 1883 | rth->fl.iif = dev->ifindex; | 1894 | rth->fl.iif = dev->ifindex; |
| 1884 | rth->u.dst.dev = init_net.loopback_dev; | 1895 | rth->dst.dev = init_net.loopback_dev; |
| 1885 | dev_hold(rth->u.dst.dev); | 1896 | dev_hold(rth->dst.dev); |
| 1886 | rth->idev = in_dev_get(rth->u.dst.dev); | 1897 | rth->idev = in_dev_get(rth->dst.dev); |
| 1887 | rth->fl.oif = 0; | 1898 | rth->fl.oif = 0; |
| 1888 | rth->rt_gateway = daddr; | 1899 | rth->rt_gateway = daddr; |
| 1889 | rth->rt_spec_dst= spec_dst; | 1900 | rth->rt_spec_dst= spec_dst; |
| @@ -1891,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1891 | rth->rt_flags = RTCF_MULTICAST; | 1902 | rth->rt_flags = RTCF_MULTICAST; |
| 1892 | rth->rt_type = RTN_MULTICAST; | 1903 | rth->rt_type = RTN_MULTICAST; |
| 1893 | if (our) { | 1904 | if (our) { |
| 1894 | rth->u.dst.input= ip_local_deliver; | 1905 | rth->dst.input= ip_local_deliver; |
| 1895 | rth->rt_flags |= RTCF_LOCAL; | 1906 | rth->rt_flags |= RTCF_LOCAL; |
| 1896 | } | 1907 | } |
| 1897 | 1908 | ||
| 1898 | #ifdef CONFIG_IP_MROUTE | 1909 | #ifdef CONFIG_IP_MROUTE |
| 1899 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) | 1910 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) |
| 1900 | rth->u.dst.input = ip_mr_input; | 1911 | rth->dst.input = ip_mr_input; |
| 1901 | #endif | 1912 | #endif |
| 1902 | RT_CACHE_STAT_INC(in_slow_mc); | 1913 | RT_CACHE_STAT_INC(in_slow_mc); |
| 1903 | 1914 | ||
| 1904 | in_dev_put(in_dev); | ||
| 1905 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1915 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
| 1906 | return rt_intern_hash(hash, rth, NULL, skb); | 1916 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); |
| 1907 | 1917 | ||
| 1908 | e_nobufs: | 1918 | e_nobufs: |
| 1909 | in_dev_put(in_dev); | ||
| 1910 | return -ENOBUFS; | 1919 | return -ENOBUFS; |
| 1911 | |||
| 1912 | e_inval: | 1920 | e_inval: |
| 1913 | in_dev_put(in_dev); | ||
| 1914 | return -EINVAL; | 1921 | return -EINVAL; |
| 1922 | e_err: | ||
| 1923 | return err; | ||
| 1915 | } | 1924 | } |
| 1916 | 1925 | ||
| 1917 | 1926 | ||
| @@ -1945,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
| 1945 | #endif | 1954 | #endif |
| 1946 | } | 1955 | } |
| 1947 | 1956 | ||
| 1957 | /* called in rcu_read_lock() section */ | ||
| 1948 | static int __mkroute_input(struct sk_buff *skb, | 1958 | static int __mkroute_input(struct sk_buff *skb, |
| 1949 | struct fib_result *res, | 1959 | struct fib_result *res, |
| 1950 | struct in_device *in_dev, | 1960 | struct in_device *in_dev, |
| 1951 | __be32 daddr, __be32 saddr, u32 tos, | 1961 | __be32 daddr, __be32 saddr, u32 tos, |
| 1952 | struct rtable **result) | 1962 | struct rtable **result) |
| 1953 | { | 1963 | { |
| 1954 | |||
| 1955 | struct rtable *rth; | 1964 | struct rtable *rth; |
| 1956 | int err; | 1965 | int err; |
| 1957 | struct in_device *out_dev; | 1966 | struct in_device *out_dev; |
| 1958 | unsigned flags = 0; | 1967 | unsigned int flags = 0; |
| 1959 | __be32 spec_dst; | 1968 | __be32 spec_dst; |
| 1960 | u32 itag; | 1969 | u32 itag; |
| 1961 | 1970 | ||
| 1962 | /* get a working reference to the output device */ | 1971 | /* get a working reference to the output device */ |
| 1963 | out_dev = in_dev_get(FIB_RES_DEV(*res)); | 1972 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
| 1964 | if (out_dev == NULL) { | 1973 | if (out_dev == NULL) { |
| 1965 | if (net_ratelimit()) | 1974 | if (net_ratelimit()) |
| 1966 | printk(KERN_CRIT "Bug in ip_route_input" \ | 1975 | printk(KERN_CRIT "Bug in ip_route_input" \ |
| @@ -1975,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1975 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1984 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
| 1976 | saddr); | 1985 | saddr); |
| 1977 | 1986 | ||
| 1978 | err = -EINVAL; | ||
| 1979 | goto cleanup; | 1987 | goto cleanup; |
| 1980 | } | 1988 | } |
| 1981 | 1989 | ||
| @@ -1990,8 +1998,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1990 | if (skb->protocol != htons(ETH_P_IP)) { | 1998 | if (skb->protocol != htons(ETH_P_IP)) { |
| 1991 | /* Not IP (i.e. ARP). Do not create route, if it is | 1999 | /* Not IP (i.e. ARP). Do not create route, if it is |
| 1992 | * invalid for proxy arp. DNAT routes are always valid. | 2000 | * invalid for proxy arp. DNAT routes are always valid. |
| 2001 | * | ||
| 2002 | * Proxy arp feature have been extended to allow, ARP | ||
| 2003 | * replies back to the same interface, to support | ||
| 2004 | * Private VLAN switch technologies. See arp.c. | ||
| 1993 | */ | 2005 | */ |
| 1994 | if (out_dev == in_dev) { | 2006 | if (out_dev == in_dev && |
| 2007 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { | ||
| 1995 | err = -EINVAL; | 2008 | err = -EINVAL; |
| 1996 | goto cleanup; | 2009 | goto cleanup; |
| 1997 | } | 2010 | } |
| @@ -2004,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2004 | goto cleanup; | 2017 | goto cleanup; |
| 2005 | } | 2018 | } |
| 2006 | 2019 | ||
| 2007 | atomic_set(&rth->u.dst.__refcnt, 1); | 2020 | atomic_set(&rth->dst.__refcnt, 1); |
| 2008 | rth->u.dst.flags= DST_HOST; | 2021 | rth->dst.flags= DST_HOST; |
| 2009 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2022 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2010 | rth->u.dst.flags |= DST_NOPOLICY; | 2023 | rth->dst.flags |= DST_NOPOLICY; |
| 2011 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | 2024 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) |
| 2012 | rth->u.dst.flags |= DST_NOXFRM; | 2025 | rth->dst.flags |= DST_NOXFRM; |
| 2013 | rth->fl.fl4_dst = daddr; | 2026 | rth->fl.fl4_dst = daddr; |
| 2014 | rth->rt_dst = daddr; | 2027 | rth->rt_dst = daddr; |
| 2015 | rth->fl.fl4_tos = tos; | 2028 | rth->fl.fl4_tos = tos; |
| @@ -2019,15 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2019 | rth->rt_gateway = daddr; | 2032 | rth->rt_gateway = daddr; |
| 2020 | rth->rt_iif = | 2033 | rth->rt_iif = |
| 2021 | rth->fl.iif = in_dev->dev->ifindex; | 2034 | rth->fl.iif = in_dev->dev->ifindex; |
| 2022 | rth->u.dst.dev = (out_dev)->dev; | 2035 | rth->dst.dev = (out_dev)->dev; |
| 2023 | dev_hold(rth->u.dst.dev); | 2036 | dev_hold(rth->dst.dev); |
| 2024 | rth->idev = in_dev_get(rth->u.dst.dev); | 2037 | rth->idev = in_dev_get(rth->dst.dev); |
| 2025 | rth->fl.oif = 0; | 2038 | rth->fl.oif = 0; |
| 2026 | rth->rt_spec_dst= spec_dst; | 2039 | rth->rt_spec_dst= spec_dst; |
| 2027 | 2040 | ||
| 2028 | rth->u.dst.input = ip_forward; | 2041 | rth->dst.obsolete = -1; |
| 2029 | rth->u.dst.output = ip_output; | 2042 | rth->dst.input = ip_forward; |
| 2030 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); | 2043 | rth->dst.output = ip_output; |
| 2044 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | ||
| 2031 | 2045 | ||
| 2032 | rt_set_nexthop(rth, res, itag); | 2046 | rt_set_nexthop(rth, res, itag); |
| 2033 | 2047 | ||
| @@ -2036,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2036 | *result = rth; | 2050 | *result = rth; |
| 2037 | err = 0; | 2051 | err = 0; |
| 2038 | cleanup: | 2052 | cleanup: |
| 2039 | /* release the working reference to the output device */ | ||
| 2040 | in_dev_put(out_dev); | ||
| 2041 | return err; | 2053 | return err; |
| 2042 | } | 2054 | } |
| 2043 | 2055 | ||
| @@ -2063,8 +2075,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
| 2063 | 2075 | ||
| 2064 | /* put it into the cache */ | 2076 | /* put it into the cache */ |
| 2065 | hash = rt_hash(daddr, saddr, fl->iif, | 2077 | hash = rt_hash(daddr, saddr, fl->iif, |
| 2066 | rt_genid(dev_net(rth->u.dst.dev))); | 2078 | rt_genid(dev_net(rth->dst.dev))); |
| 2067 | return rt_intern_hash(hash, rth, NULL, skb); | 2079 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); |
| 2068 | } | 2080 | } |
| 2069 | 2081 | ||
| 2070 | /* | 2082 | /* |
| @@ -2081,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2081 | u8 tos, struct net_device *dev) | 2093 | u8 tos, struct net_device *dev) |
| 2082 | { | 2094 | { |
| 2083 | struct fib_result res; | 2095 | struct fib_result res; |
| 2084 | struct in_device *in_dev = in_dev_get(dev); | 2096 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2085 | struct flowi fl = { .nl_u = { .ip4_u = | 2097 | struct flowi fl = { .nl_u = { .ip4_u = |
| 2086 | { .daddr = daddr, | 2098 | { .daddr = daddr, |
| 2087 | .saddr = saddr, | 2099 | .saddr = saddr, |
| @@ -2141,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2141 | goto brd_input; | 2153 | goto brd_input; |
| 2142 | 2154 | ||
| 2143 | if (res.type == RTN_LOCAL) { | 2155 | if (res.type == RTN_LOCAL) { |
| 2144 | int result; | 2156 | err = fib_validate_source(saddr, daddr, tos, |
| 2145 | result = fib_validate_source(saddr, daddr, tos, | ||
| 2146 | net->loopback_dev->ifindex, | 2157 | net->loopback_dev->ifindex, |
| 2147 | dev, &spec_dst, &itag, skb->mark); | 2158 | dev, &spec_dst, &itag, skb->mark); |
| 2148 | if (result < 0) | 2159 | if (err < 0) |
| 2149 | goto martian_source; | 2160 | goto martian_source_keep_err; |
| 2150 | if (result) | 2161 | if (err) |
| 2151 | flags |= RTCF_DIRECTSRC; | 2162 | flags |= RTCF_DIRECTSRC; |
| 2152 | spec_dst = daddr; | 2163 | spec_dst = daddr; |
| 2153 | goto local_input; | 2164 | goto local_input; |
| @@ -2160,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2160 | 2171 | ||
| 2161 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2172 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); |
| 2162 | done: | 2173 | done: |
| 2163 | in_dev_put(in_dev); | ||
| 2164 | if (free_res) | 2174 | if (free_res) |
| 2165 | fib_res_put(&res); | 2175 | fib_res_put(&res); |
| 2166 | out: return err; | 2176 | out: return err; |
| @@ -2175,7 +2185,7 @@ brd_input: | |||
| 2175 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2185 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
| 2176 | &itag, skb->mark); | 2186 | &itag, skb->mark); |
| 2177 | if (err < 0) | 2187 | if (err < 0) |
| 2178 | goto martian_source; | 2188 | goto martian_source_keep_err; |
| 2179 | if (err) | 2189 | if (err) |
| 2180 | flags |= RTCF_DIRECTSRC; | 2190 | flags |= RTCF_DIRECTSRC; |
| 2181 | } | 2191 | } |
| @@ -2188,13 +2198,14 @@ local_input: | |||
| 2188 | if (!rth) | 2198 | if (!rth) |
| 2189 | goto e_nobufs; | 2199 | goto e_nobufs; |
| 2190 | 2200 | ||
| 2191 | rth->u.dst.output= ip_rt_bug; | 2201 | rth->dst.output= ip_rt_bug; |
| 2202 | rth->dst.obsolete = -1; | ||
| 2192 | rth->rt_genid = rt_genid(net); | 2203 | rth->rt_genid = rt_genid(net); |
| 2193 | 2204 | ||
| 2194 | atomic_set(&rth->u.dst.__refcnt, 1); | 2205 | atomic_set(&rth->dst.__refcnt, 1); |
| 2195 | rth->u.dst.flags= DST_HOST; | 2206 | rth->dst.flags= DST_HOST; |
| 2196 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2207 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2197 | rth->u.dst.flags |= DST_NOPOLICY; | 2208 | rth->dst.flags |= DST_NOPOLICY; |
| 2198 | rth->fl.fl4_dst = daddr; | 2209 | rth->fl.fl4_dst = daddr; |
| 2199 | rth->rt_dst = daddr; | 2210 | rth->rt_dst = daddr; |
| 2200 | rth->fl.fl4_tos = tos; | 2211 | rth->fl.fl4_tos = tos; |
| @@ -2202,25 +2213,25 @@ local_input: | |||
| 2202 | rth->fl.fl4_src = saddr; | 2213 | rth->fl.fl4_src = saddr; |
| 2203 | rth->rt_src = saddr; | 2214 | rth->rt_src = saddr; |
| 2204 | #ifdef CONFIG_NET_CLS_ROUTE | 2215 | #ifdef CONFIG_NET_CLS_ROUTE |
| 2205 | rth->u.dst.tclassid = itag; | 2216 | rth->dst.tclassid = itag; |
| 2206 | #endif | 2217 | #endif |
| 2207 | rth->rt_iif = | 2218 | rth->rt_iif = |
| 2208 | rth->fl.iif = dev->ifindex; | 2219 | rth->fl.iif = dev->ifindex; |
| 2209 | rth->u.dst.dev = net->loopback_dev; | 2220 | rth->dst.dev = net->loopback_dev; |
| 2210 | dev_hold(rth->u.dst.dev); | 2221 | dev_hold(rth->dst.dev); |
| 2211 | rth->idev = in_dev_get(rth->u.dst.dev); | 2222 | rth->idev = in_dev_get(rth->dst.dev); |
| 2212 | rth->rt_gateway = daddr; | 2223 | rth->rt_gateway = daddr; |
| 2213 | rth->rt_spec_dst= spec_dst; | 2224 | rth->rt_spec_dst= spec_dst; |
| 2214 | rth->u.dst.input= ip_local_deliver; | 2225 | rth->dst.input= ip_local_deliver; |
| 2215 | rth->rt_flags = flags|RTCF_LOCAL; | 2226 | rth->rt_flags = flags|RTCF_LOCAL; |
| 2216 | if (res.type == RTN_UNREACHABLE) { | 2227 | if (res.type == RTN_UNREACHABLE) { |
| 2217 | rth->u.dst.input= ip_error; | 2228 | rth->dst.input= ip_error; |
| 2218 | rth->u.dst.error= -err; | 2229 | rth->dst.error= -err; |
| 2219 | rth->rt_flags &= ~RTCF_LOCAL; | 2230 | rth->rt_flags &= ~RTCF_LOCAL; |
| 2220 | } | 2231 | } |
| 2221 | rth->rt_type = res.type; | 2232 | rth->rt_type = res.type; |
| 2222 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2233 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
| 2223 | err = rt_intern_hash(hash, rth, NULL, skb); | 2234 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); |
| 2224 | goto done; | 2235 | goto done; |
| 2225 | 2236 | ||
| 2226 | no_route: | 2237 | no_route: |
| @@ -2255,46 +2266,54 @@ e_nobufs: | |||
| 2255 | goto done; | 2266 | goto done; |
| 2256 | 2267 | ||
| 2257 | martian_source: | 2268 | martian_source: |
| 2269 | err = -EINVAL; | ||
| 2270 | martian_source_keep_err: | ||
| 2258 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2271 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
| 2259 | goto e_inval; | 2272 | goto done; |
| 2260 | } | 2273 | } |
| 2261 | 2274 | ||
| 2262 | int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2275 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
| 2263 | u8 tos, struct net_device *dev) | 2276 | u8 tos, struct net_device *dev, bool noref) |
| 2264 | { | 2277 | { |
| 2265 | struct rtable * rth; | 2278 | struct rtable * rth; |
| 2266 | unsigned hash; | 2279 | unsigned hash; |
| 2267 | int iif = dev->ifindex; | 2280 | int iif = dev->ifindex; |
| 2268 | struct net *net; | 2281 | struct net *net; |
| 2282 | int res; | ||
| 2269 | 2283 | ||
| 2270 | net = dev_net(dev); | 2284 | net = dev_net(dev); |
| 2271 | 2285 | ||
| 2286 | rcu_read_lock(); | ||
| 2287 | |||
| 2272 | if (!rt_caching(net)) | 2288 | if (!rt_caching(net)) |
| 2273 | goto skip_cache; | 2289 | goto skip_cache; |
| 2274 | 2290 | ||
| 2275 | tos &= IPTOS_RT_MASK; | 2291 | tos &= IPTOS_RT_MASK; |
| 2276 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2292 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
| 2277 | 2293 | ||
| 2278 | rcu_read_lock(); | ||
| 2279 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2294 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
| 2280 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2295 | rth = rcu_dereference(rth->dst.rt_next)) { |
| 2281 | if (((rth->fl.fl4_dst ^ daddr) | | 2296 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | |
| 2282 | (rth->fl.fl4_src ^ saddr) | | 2297 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | |
| 2283 | (rth->fl.iif ^ iif) | | 2298 | (rth->fl.iif ^ iif) | |
| 2284 | rth->fl.oif | | 2299 | rth->fl.oif | |
| 2285 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2300 | (rth->fl.fl4_tos ^ tos)) == 0 && |
| 2286 | rth->fl.mark == skb->mark && | 2301 | rth->fl.mark == skb->mark && |
| 2287 | net_eq(dev_net(rth->u.dst.dev), net) && | 2302 | net_eq(dev_net(rth->dst.dev), net) && |
| 2288 | !rt_is_expired(rth)) { | 2303 | !rt_is_expired(rth)) { |
| 2289 | dst_use(&rth->u.dst, jiffies); | 2304 | if (noref) { |
| 2305 | dst_use_noref(&rth->dst, jiffies); | ||
| 2306 | skb_dst_set_noref(skb, &rth->dst); | ||
| 2307 | } else { | ||
| 2308 | dst_use(&rth->dst, jiffies); | ||
| 2309 | skb_dst_set(skb, &rth->dst); | ||
| 2310 | } | ||
| 2290 | RT_CACHE_STAT_INC(in_hit); | 2311 | RT_CACHE_STAT_INC(in_hit); |
| 2291 | rcu_read_unlock(); | 2312 | rcu_read_unlock(); |
| 2292 | skb_dst_set(skb, &rth->u.dst); | ||
| 2293 | return 0; | 2313 | return 0; |
| 2294 | } | 2314 | } |
| 2295 | RT_CACHE_STAT_INC(in_hlist_search); | 2315 | RT_CACHE_STAT_INC(in_hlist_search); |
| 2296 | } | 2316 | } |
| 2297 | rcu_read_unlock(); | ||
| 2298 | 2317 | ||
| 2299 | skip_cache: | 2318 | skip_cache: |
| 2300 | /* Multicast recognition logic is moved from route cache to here. | 2319 | /* Multicast recognition logic is moved from route cache to here. |
| @@ -2309,12 +2328,11 @@ skip_cache: | |||
| 2309 | route cache entry is created eventually. | 2328 | route cache entry is created eventually. |
| 2310 | */ | 2329 | */ |
| 2311 | if (ipv4_is_multicast(daddr)) { | 2330 | if (ipv4_is_multicast(daddr)) { |
| 2312 | struct in_device *in_dev; | 2331 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2313 | 2332 | ||
| 2314 | rcu_read_lock(); | 2333 | if (in_dev) { |
| 2315 | if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { | ||
| 2316 | int our = ip_check_mc(in_dev, daddr, saddr, | 2334 | int our = ip_check_mc(in_dev, daddr, saddr, |
| 2317 | ip_hdr(skb)->protocol); | 2335 | ip_hdr(skb)->protocol); |
| 2318 | if (our | 2336 | if (our |
| 2319 | #ifdef CONFIG_IP_MROUTE | 2337 | #ifdef CONFIG_IP_MROUTE |
| 2320 | || | 2338 | || |
| @@ -2322,16 +2340,20 @@ skip_cache: | |||
| 2322 | IN_DEV_MFORWARD(in_dev)) | 2340 | IN_DEV_MFORWARD(in_dev)) |
| 2323 | #endif | 2341 | #endif |
| 2324 | ) { | 2342 | ) { |
| 2343 | int res = ip_route_input_mc(skb, daddr, saddr, | ||
| 2344 | tos, dev, our); | ||
| 2325 | rcu_read_unlock(); | 2345 | rcu_read_unlock(); |
| 2326 | return ip_route_input_mc(skb, daddr, saddr, | 2346 | return res; |
| 2327 | tos, dev, our); | ||
| 2328 | } | 2347 | } |
| 2329 | } | 2348 | } |
| 2330 | rcu_read_unlock(); | 2349 | rcu_read_unlock(); |
| 2331 | return -EINVAL; | 2350 | return -EINVAL; |
| 2332 | } | 2351 | } |
| 2333 | return ip_route_input_slow(skb, daddr, saddr, tos, dev); | 2352 | res = ip_route_input_slow(skb, daddr, saddr, tos, dev); |
| 2353 | rcu_read_unlock(); | ||
| 2354 | return res; | ||
| 2334 | } | 2355 | } |
| 2356 | EXPORT_SYMBOL(ip_route_input_common); | ||
| 2335 | 2357 | ||
| 2336 | static int __mkroute_output(struct rtable **result, | 2358 | static int __mkroute_output(struct rtable **result, |
| 2337 | struct fib_result *res, | 2359 | struct fib_result *res, |
| @@ -2391,12 +2413,12 @@ static int __mkroute_output(struct rtable **result, | |||
| 2391 | goto cleanup; | 2413 | goto cleanup; |
| 2392 | } | 2414 | } |
| 2393 | 2415 | ||
| 2394 | atomic_set(&rth->u.dst.__refcnt, 1); | 2416 | atomic_set(&rth->dst.__refcnt, 1); |
| 2395 | rth->u.dst.flags= DST_HOST; | 2417 | rth->dst.flags= DST_HOST; |
| 2396 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2418 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) |
| 2397 | rth->u.dst.flags |= DST_NOXFRM; | 2419 | rth->dst.flags |= DST_NOXFRM; |
| 2398 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2420 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
| 2399 | rth->u.dst.flags |= DST_NOPOLICY; | 2421 | rth->dst.flags |= DST_NOPOLICY; |
| 2400 | 2422 | ||
| 2401 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2423 | rth->fl.fl4_dst = oldflp->fl4_dst; |
| 2402 | rth->fl.fl4_tos = tos; | 2424 | rth->fl.fl4_tos = tos; |
| @@ -2408,34 +2430,35 @@ static int __mkroute_output(struct rtable **result, | |||
| 2408 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | 2430 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; |
| 2409 | /* get references to the devices that are to be hold by the routing | 2431 | /* get references to the devices that are to be hold by the routing |
| 2410 | cache entry */ | 2432 | cache entry */ |
| 2411 | rth->u.dst.dev = dev_out; | 2433 | rth->dst.dev = dev_out; |
| 2412 | dev_hold(dev_out); | 2434 | dev_hold(dev_out); |
| 2413 | rth->idev = in_dev_get(dev_out); | 2435 | rth->idev = in_dev_get(dev_out); |
| 2414 | rth->rt_gateway = fl->fl4_dst; | 2436 | rth->rt_gateway = fl->fl4_dst; |
| 2415 | rth->rt_spec_dst= fl->fl4_src; | 2437 | rth->rt_spec_dst= fl->fl4_src; |
| 2416 | 2438 | ||
| 2417 | rth->u.dst.output=ip_output; | 2439 | rth->dst.output=ip_output; |
| 2440 | rth->dst.obsolete = -1; | ||
| 2418 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2441 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
| 2419 | 2442 | ||
| 2420 | RT_CACHE_STAT_INC(out_slow_tot); | 2443 | RT_CACHE_STAT_INC(out_slow_tot); |
| 2421 | 2444 | ||
| 2422 | if (flags & RTCF_LOCAL) { | 2445 | if (flags & RTCF_LOCAL) { |
| 2423 | rth->u.dst.input = ip_local_deliver; | 2446 | rth->dst.input = ip_local_deliver; |
| 2424 | rth->rt_spec_dst = fl->fl4_dst; | 2447 | rth->rt_spec_dst = fl->fl4_dst; |
| 2425 | } | 2448 | } |
| 2426 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2449 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
| 2427 | rth->rt_spec_dst = fl->fl4_src; | 2450 | rth->rt_spec_dst = fl->fl4_src; |
| 2428 | if (flags & RTCF_LOCAL && | 2451 | if (flags & RTCF_LOCAL && |
| 2429 | !(dev_out->flags & IFF_LOOPBACK)) { | 2452 | !(dev_out->flags & IFF_LOOPBACK)) { |
| 2430 | rth->u.dst.output = ip_mc_output; | 2453 | rth->dst.output = ip_mc_output; |
| 2431 | RT_CACHE_STAT_INC(out_slow_mc); | 2454 | RT_CACHE_STAT_INC(out_slow_mc); |
| 2432 | } | 2455 | } |
| 2433 | #ifdef CONFIG_IP_MROUTE | 2456 | #ifdef CONFIG_IP_MROUTE |
| 2434 | if (res->type == RTN_MULTICAST) { | 2457 | if (res->type == RTN_MULTICAST) { |
| 2435 | if (IN_DEV_MFORWARD(in_dev) && | 2458 | if (IN_DEV_MFORWARD(in_dev) && |
| 2436 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2459 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { |
| 2437 | rth->u.dst.input = ip_mr_input; | 2460 | rth->dst.input = ip_mr_input; |
| 2438 | rth->u.dst.output = ip_mc_output; | 2461 | rth->dst.output = ip_mc_output; |
| 2439 | } | 2462 | } |
| 2440 | } | 2463 | } |
| 2441 | #endif | 2464 | #endif |
| @@ -2466,7 +2489,7 @@ static int ip_mkroute_output(struct rtable **rp, | |||
| 2466 | if (err == 0) { | 2489 | if (err == 0) { |
| 2467 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | 2490 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
| 2468 | rt_genid(dev_net(dev_out))); | 2491 | rt_genid(dev_net(dev_out))); |
| 2469 | err = rt_intern_hash(hash, rth, rp, NULL); | 2492 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); |
| 2470 | } | 2493 | } |
| 2471 | 2494 | ||
| 2472 | return err; | 2495 | return err; |
| @@ -2689,8 +2712,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2689 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2712 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
| 2690 | 2713 | ||
| 2691 | rcu_read_lock_bh(); | 2714 | rcu_read_lock_bh(); |
| 2692 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2715 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
| 2693 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2716 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
| 2694 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2717 | if (rth->fl.fl4_dst == flp->fl4_dst && |
| 2695 | rth->fl.fl4_src == flp->fl4_src && | 2718 | rth->fl.fl4_src == flp->fl4_src && |
| 2696 | rth->fl.iif == 0 && | 2719 | rth->fl.iif == 0 && |
| @@ -2698,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2698 | rth->fl.mark == flp->mark && | 2721 | rth->fl.mark == flp->mark && |
| 2699 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2722 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
| 2700 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2723 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
| 2701 | net_eq(dev_net(rth->u.dst.dev), net) && | 2724 | net_eq(dev_net(rth->dst.dev), net) && |
| 2702 | !rt_is_expired(rth)) { | 2725 | !rt_is_expired(rth)) { |
| 2703 | dst_use(&rth->u.dst, jiffies); | 2726 | dst_use(&rth->dst, jiffies); |
| 2704 | RT_CACHE_STAT_INC(out_hit); | 2727 | RT_CACHE_STAT_INC(out_hit); |
| 2705 | rcu_read_unlock_bh(); | 2728 | rcu_read_unlock_bh(); |
| 2706 | *rp = rth; | 2729 | *rp = rth; |
| @@ -2713,9 +2736,13 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
| 2713 | slow_output: | 2736 | slow_output: |
| 2714 | return ip_route_output_slow(net, rp, flp); | 2737 | return ip_route_output_slow(net, rp, flp); |
| 2715 | } | 2738 | } |
| 2716 | |||
| 2717 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2739 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
| 2718 | 2740 | ||
| 2741 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) | ||
| 2742 | { | ||
| 2743 | return NULL; | ||
| 2744 | } | ||
| 2745 | |||
| 2719 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2746 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
| 2720 | { | 2747 | { |
| 2721 | } | 2748 | } |
| @@ -2724,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
| 2724 | .family = AF_INET, | 2751 | .family = AF_INET, |
| 2725 | .protocol = cpu_to_be16(ETH_P_IP), | 2752 | .protocol = cpu_to_be16(ETH_P_IP), |
| 2726 | .destroy = ipv4_dst_destroy, | 2753 | .destroy = ipv4_dst_destroy, |
| 2727 | .check = ipv4_dst_check, | 2754 | .check = ipv4_blackhole_dst_check, |
| 2728 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2755 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
| 2729 | .entries = ATOMIC_INIT(0), | 2756 | .entries = ATOMIC_INIT(0), |
| 2730 | }; | 2757 | }; |
| @@ -2737,15 +2764,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2737 | dst_alloc(&ipv4_dst_blackhole_ops); | 2764 | dst_alloc(&ipv4_dst_blackhole_ops); |
| 2738 | 2765 | ||
| 2739 | if (rt) { | 2766 | if (rt) { |
| 2740 | struct dst_entry *new = &rt->u.dst; | 2767 | struct dst_entry *new = &rt->dst; |
| 2741 | 2768 | ||
| 2742 | atomic_set(&new->__refcnt, 1); | 2769 | atomic_set(&new->__refcnt, 1); |
| 2743 | new->__use = 1; | 2770 | new->__use = 1; |
| 2744 | new->input = dst_discard; | 2771 | new->input = dst_discard; |
| 2745 | new->output = dst_discard; | 2772 | new->output = dst_discard; |
| 2746 | memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); | 2773 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); |
| 2747 | 2774 | ||
| 2748 | new->dev = ort->u.dst.dev; | 2775 | new->dev = ort->dst.dev; |
| 2749 | if (new->dev) | 2776 | if (new->dev) |
| 2750 | dev_hold(new->dev); | 2777 | dev_hold(new->dev); |
| 2751 | 2778 | ||
| @@ -2769,7 +2796,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2769 | dst_free(new); | 2796 | dst_free(new); |
| 2770 | } | 2797 | } |
| 2771 | 2798 | ||
| 2772 | dst_release(&(*rp)->u.dst); | 2799 | dst_release(&(*rp)->dst); |
| 2773 | *rp = rt; | 2800 | *rp = rt; |
| 2774 | return (rt ? 0 : -ENOMEM); | 2801 | return (rt ? 0 : -ENOMEM); |
| 2775 | } | 2802 | } |
| @@ -2797,13 +2824,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
| 2797 | 2824 | ||
| 2798 | return 0; | 2825 | return 0; |
| 2799 | } | 2826 | } |
| 2800 | |||
| 2801 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2827 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
| 2802 | 2828 | ||
| 2803 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | 2829 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) |
| 2804 | { | 2830 | { |
| 2805 | return ip_route_output_flow(net, rp, flp, NULL, 0); | 2831 | return ip_route_output_flow(net, rp, flp, NULL, 0); |
| 2806 | } | 2832 | } |
| 2833 | EXPORT_SYMBOL(ip_route_output_key); | ||
| 2807 | 2834 | ||
| 2808 | static int rt_fill_info(struct net *net, | 2835 | static int rt_fill_info(struct net *net, |
| 2809 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2836 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
| @@ -2839,11 +2866,11 @@ static int rt_fill_info(struct net *net, | |||
| 2839 | r->rtm_src_len = 32; | 2866 | r->rtm_src_len = 32; |
| 2840 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2867 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); |
| 2841 | } | 2868 | } |
| 2842 | if (rt->u.dst.dev) | 2869 | if (rt->dst.dev) |
| 2843 | NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); | 2870 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
| 2844 | #ifdef CONFIG_NET_CLS_ROUTE | 2871 | #ifdef CONFIG_NET_CLS_ROUTE |
| 2845 | if (rt->u.dst.tclassid) | 2872 | if (rt->dst.tclassid) |
| 2846 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); | 2873 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
| 2847 | #endif | 2874 | #endif |
| 2848 | if (rt->fl.iif) | 2875 | if (rt->fl.iif) |
| 2849 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2876 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
| @@ -2853,12 +2880,16 @@ static int rt_fill_info(struct net *net, | |||
| 2853 | if (rt->rt_dst != rt->rt_gateway) | 2880 | if (rt->rt_dst != rt->rt_gateway) |
| 2854 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2881 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
| 2855 | 2882 | ||
| 2856 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) | 2883 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) |
| 2857 | goto nla_put_failure; | 2884 | goto nla_put_failure; |
| 2858 | 2885 | ||
| 2859 | error = rt->u.dst.error; | 2886 | if (rt->fl.mark) |
| 2860 | expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; | 2887 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); |
| 2888 | |||
| 2889 | error = rt->dst.error; | ||
| 2890 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | ||
| 2861 | if (rt->peer) { | 2891 | if (rt->peer) { |
| 2892 | inet_peer_refcheck(rt->peer); | ||
| 2862 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2893 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
| 2863 | if (rt->peer->tcp_ts_stamp) { | 2894 | if (rt->peer->tcp_ts_stamp) { |
| 2864 | ts = rt->peer->tcp_ts; | 2895 | ts = rt->peer->tcp_ts; |
| @@ -2889,7 +2920,7 @@ static int rt_fill_info(struct net *net, | |||
| 2889 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2920 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); |
| 2890 | } | 2921 | } |
| 2891 | 2922 | ||
| 2892 | if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, | 2923 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
| 2893 | expires, error) < 0) | 2924 | expires, error) < 0) |
| 2894 | goto nla_put_failure; | 2925 | goto nla_put_failure; |
| 2895 | 2926 | ||
| @@ -2910,6 +2941,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2910 | __be32 src = 0; | 2941 | __be32 src = 0; |
| 2911 | u32 iif; | 2942 | u32 iif; |
| 2912 | int err; | 2943 | int err; |
| 2944 | int mark; | ||
| 2913 | struct sk_buff *skb; | 2945 | struct sk_buff *skb; |
| 2914 | 2946 | ||
| 2915 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2947 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
| @@ -2937,6 +2969,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2937 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; | 2969 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; |
| 2938 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; | 2970 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; |
| 2939 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; | 2971 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
| 2972 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; | ||
| 2940 | 2973 | ||
| 2941 | if (iif) { | 2974 | if (iif) { |
| 2942 | struct net_device *dev; | 2975 | struct net_device *dev; |
| @@ -2949,13 +2982,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2949 | 2982 | ||
| 2950 | skb->protocol = htons(ETH_P_IP); | 2983 | skb->protocol = htons(ETH_P_IP); |
| 2951 | skb->dev = dev; | 2984 | skb->dev = dev; |
| 2985 | skb->mark = mark; | ||
| 2952 | local_bh_disable(); | 2986 | local_bh_disable(); |
| 2953 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2987 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
| 2954 | local_bh_enable(); | 2988 | local_bh_enable(); |
| 2955 | 2989 | ||
| 2956 | rt = skb_rtable(skb); | 2990 | rt = skb_rtable(skb); |
| 2957 | if (err == 0 && rt->u.dst.error) | 2991 | if (err == 0 && rt->dst.error) |
| 2958 | err = -rt->u.dst.error; | 2992 | err = -rt->dst.error; |
| 2959 | } else { | 2993 | } else { |
| 2960 | struct flowi fl = { | 2994 | struct flowi fl = { |
| 2961 | .nl_u = { | 2995 | .nl_u = { |
| @@ -2966,6 +3000,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2966 | }, | 3000 | }, |
| 2967 | }, | 3001 | }, |
| 2968 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 3002 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
| 3003 | .mark = mark, | ||
| 2969 | }; | 3004 | }; |
| 2970 | err = ip_route_output_key(net, &rt, &fl); | 3005 | err = ip_route_output_key(net, &rt, &fl); |
| 2971 | } | 3006 | } |
| @@ -2973,7 +3008,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2973 | if (err) | 3008 | if (err) |
| 2974 | goto errout_free; | 3009 | goto errout_free; |
| 2975 | 3010 | ||
| 2976 | skb_dst_set(skb, &rt->u.dst); | 3011 | skb_dst_set(skb, &rt->dst); |
| 2977 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 3012 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
| 2978 | rt->rt_flags |= RTCF_NOTIFY; | 3013 | rt->rt_flags |= RTCF_NOTIFY; |
| 2979 | 3014 | ||
| @@ -3008,13 +3043,13 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 3008 | if (!rt_hash_table[h].chain) | 3043 | if (!rt_hash_table[h].chain) |
| 3009 | continue; | 3044 | continue; |
| 3010 | rcu_read_lock_bh(); | 3045 | rcu_read_lock_bh(); |
| 3011 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 3046 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; |
| 3012 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 3047 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { |
| 3013 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) | 3048 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) |
| 3014 | continue; | 3049 | continue; |
| 3015 | if (rt_is_expired(rt)) | 3050 | if (rt_is_expired(rt)) |
| 3016 | continue; | 3051 | continue; |
| 3017 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 3052 | skb_dst_set_noref(skb, &rt->dst); |
| 3018 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | 3053 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
| 3019 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 3054 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
| 3020 | 1, NLM_F_MULTI) <= 0) { | 3055 | 1, NLM_F_MULTI) <= 0) { |
| @@ -3060,50 +3095,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | |||
| 3060 | return -EINVAL; | 3095 | return -EINVAL; |
| 3061 | } | 3096 | } |
| 3062 | 3097 | ||
| 3063 | static void rt_secret_reschedule(int old) | ||
| 3064 | { | ||
| 3065 | struct net *net; | ||
| 3066 | int new = ip_rt_secret_interval; | ||
| 3067 | int diff = new - old; | ||
| 3068 | |||
| 3069 | if (!diff) | ||
| 3070 | return; | ||
| 3071 | |||
| 3072 | rtnl_lock(); | ||
| 3073 | for_each_net(net) { | ||
| 3074 | int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 3075 | |||
| 3076 | if (!new) | ||
| 3077 | continue; | ||
| 3078 | |||
| 3079 | if (deleted) { | ||
| 3080 | long time = net->ipv4.rt_secret_timer.expires - jiffies; | ||
| 3081 | |||
| 3082 | if (time <= 0 || (time += diff) <= 0) | ||
| 3083 | time = 0; | ||
| 3084 | |||
| 3085 | net->ipv4.rt_secret_timer.expires = time; | ||
| 3086 | } else | ||
| 3087 | net->ipv4.rt_secret_timer.expires = new; | ||
| 3088 | |||
| 3089 | net->ipv4.rt_secret_timer.expires += jiffies; | ||
| 3090 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 3091 | } | ||
| 3092 | rtnl_unlock(); | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, | ||
| 3096 | void __user *buffer, size_t *lenp, | ||
| 3097 | loff_t *ppos) | ||
| 3098 | { | ||
| 3099 | int old = ip_rt_secret_interval; | ||
| 3100 | int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); | ||
| 3101 | |||
| 3102 | rt_secret_reschedule(old); | ||
| 3103 | |||
| 3104 | return ret; | ||
| 3105 | } | ||
| 3106 | |||
| 3107 | static ctl_table ipv4_route_table[] = { | 3098 | static ctl_table ipv4_route_table[] = { |
| 3108 | { | 3099 | { |
| 3109 | .procname = "gc_thresh", | 3100 | .procname = "gc_thresh", |
| @@ -3212,13 +3203,6 @@ static ctl_table ipv4_route_table[] = { | |||
| 3212 | .mode = 0644, | 3203 | .mode = 0644, |
| 3213 | .proc_handler = proc_dointvec, | 3204 | .proc_handler = proc_dointvec, |
| 3214 | }, | 3205 | }, |
| 3215 | { | ||
| 3216 | .procname = "secret_interval", | ||
| 3217 | .data = &ip_rt_secret_interval, | ||
| 3218 | .maxlen = sizeof(int), | ||
| 3219 | .mode = 0644, | ||
| 3220 | .proc_handler = ipv4_sysctl_rt_secret_interval, | ||
| 3221 | }, | ||
| 3222 | { } | 3206 | { } |
| 3223 | }; | 3207 | }; |
| 3224 | 3208 | ||
| @@ -3297,39 +3281,20 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { | |||
| 3297 | }; | 3281 | }; |
| 3298 | #endif | 3282 | #endif |
| 3299 | 3283 | ||
| 3300 | 3284 | static __net_init int rt_genid_init(struct net *net) | |
| 3301 | static __net_init int rt_secret_timer_init(struct net *net) | ||
| 3302 | { | 3285 | { |
| 3303 | atomic_set(&net->ipv4.rt_genid, | 3286 | get_random_bytes(&net->ipv4.rt_genid, |
| 3304 | (int) ((num_physpages ^ (num_physpages>>8)) ^ | 3287 | sizeof(net->ipv4.rt_genid)); |
| 3305 | (jiffies ^ (jiffies >> 7)))); | ||
| 3306 | |||
| 3307 | net->ipv4.rt_secret_timer.function = rt_secret_rebuild; | ||
| 3308 | net->ipv4.rt_secret_timer.data = (unsigned long)net; | ||
| 3309 | init_timer_deferrable(&net->ipv4.rt_secret_timer); | ||
| 3310 | |||
| 3311 | if (ip_rt_secret_interval) { | ||
| 3312 | net->ipv4.rt_secret_timer.expires = | ||
| 3313 | jiffies + net_random() % ip_rt_secret_interval + | ||
| 3314 | ip_rt_secret_interval; | ||
| 3315 | add_timer(&net->ipv4.rt_secret_timer); | ||
| 3316 | } | ||
| 3317 | return 0; | 3288 | return 0; |
| 3318 | } | 3289 | } |
| 3319 | 3290 | ||
| 3320 | static __net_exit void rt_secret_timer_exit(struct net *net) | 3291 | static __net_initdata struct pernet_operations rt_genid_ops = { |
| 3321 | { | 3292 | .init = rt_genid_init, |
| 3322 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
| 3323 | } | ||
| 3324 | |||
| 3325 | static __net_initdata struct pernet_operations rt_secret_timer_ops = { | ||
| 3326 | .init = rt_secret_timer_init, | ||
| 3327 | .exit = rt_secret_timer_exit, | ||
| 3328 | }; | 3293 | }; |
| 3329 | 3294 | ||
| 3330 | 3295 | ||
| 3331 | #ifdef CONFIG_NET_CLS_ROUTE | 3296 | #ifdef CONFIG_NET_CLS_ROUTE |
| 3332 | struct ip_rt_acct *ip_rt_acct __read_mostly; | 3297 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
| 3333 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3298 | #endif /* CONFIG_NET_CLS_ROUTE */ |
| 3334 | 3299 | ||
| 3335 | static __initdata unsigned long rhash_entries; | 3300 | static __initdata unsigned long rhash_entries; |
| @@ -3385,9 +3350,6 @@ int __init ip_rt_init(void) | |||
| 3385 | schedule_delayed_work(&expires_work, | 3350 | schedule_delayed_work(&expires_work, |
| 3386 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | 3351 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); |
| 3387 | 3352 | ||
| 3388 | if (register_pernet_subsys(&rt_secret_timer_ops)) | ||
| 3389 | printk(KERN_ERR "Unable to setup rt_secret_timer\n"); | ||
| 3390 | |||
| 3391 | if (ip_rt_proc_init()) | 3353 | if (ip_rt_proc_init()) |
| 3392 | printk(KERN_ERR "Unable to create route proc files\n"); | 3354 | printk(KERN_ERR "Unable to create route proc files\n"); |
| 3393 | #ifdef CONFIG_XFRM | 3355 | #ifdef CONFIG_XFRM |
| @@ -3399,6 +3361,7 @@ int __init ip_rt_init(void) | |||
| 3399 | #ifdef CONFIG_SYSCTL | 3361 | #ifdef CONFIG_SYSCTL |
| 3400 | register_pernet_subsys(&sysctl_route_ops); | 3362 | register_pernet_subsys(&sysctl_route_ops); |
| 3401 | #endif | 3363 | #endif |
| 3364 | register_pernet_subsys(&rt_genid_ops); | ||
| 3402 | return rc; | 3365 | return rc; |
| 3403 | } | 3366 | } |
| 3404 | 3367 | ||
| @@ -3412,7 +3375,3 @@ void __init ip_static_sysctl_init(void) | |||
| 3412 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3375 | register_sysctl_paths(ipv4_path, ipv4_skeleton); |
| 3413 | } | 3376 | } |
| 3414 | #endif | 3377 | #endif |
| 3415 | |||
| 3416 | EXPORT_SYMBOL(__ip_select_ident); | ||
| 3417 | EXPORT_SYMBOL(ip_route_input); | ||
| 3418 | EXPORT_SYMBOL(ip_route_output_key); | ||
