diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/ipv4/route.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 1697 |
1 files changed, 822 insertions, 875 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6559cb54f9..aa13ef105110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -109,8 +109,8 @@ | |||
109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #define RT_FL_TOS(oldflp) \ | 112 | #define RT_FL_TOS(oldflp4) \ |
113 | ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
114 | 114 | ||
115 | #define IP_MAX_MTU 0xFFF0 | 115 | #define IP_MAX_MTU 0xFFF0 |
116 | 116 | ||
@@ -131,42 +131,80 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | |||
131 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
133 | 133 | ||
134 | static struct delayed_work expires_work; | ||
135 | static unsigned long expires_ljiffies; | ||
136 | |||
137 | /* | 134 | /* |
138 | * Interface to generic destination cache. | 135 | * Interface to generic destination cache. |
139 | */ | 136 | */ |
140 | 137 | ||
141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 138 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
139 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); | ||
140 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst); | ||
142 | static void ipv4_dst_destroy(struct dst_entry *dst); | 141 | static void ipv4_dst_destroy(struct dst_entry *dst); |
143 | static void ipv4_dst_ifdown(struct dst_entry *dst, | ||
144 | struct net_device *dev, int how); | ||
145 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | 142 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
146 | static void ipv4_link_failure(struct sk_buff *skb); | 143 | static void ipv4_link_failure(struct sk_buff *skb); |
147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 144 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
148 | static int rt_garbage_collect(struct dst_ops *ops); | 145 | static int rt_garbage_collect(struct dst_ops *ops); |
149 | 146 | ||
147 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
148 | int how) | ||
149 | { | ||
150 | } | ||
151 | |||
152 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | ||
153 | { | ||
154 | struct rtable *rt = (struct rtable *) dst; | ||
155 | struct inet_peer *peer; | ||
156 | u32 *p = NULL; | ||
157 | |||
158 | if (!rt->peer) | ||
159 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
160 | |||
161 | peer = rt->peer; | ||
162 | if (peer) { | ||
163 | u32 *old_p = __DST_METRICS_PTR(old); | ||
164 | unsigned long prev, new; | ||
165 | |||
166 | p = peer->metrics; | ||
167 | if (inet_metrics_new(peer)) | ||
168 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
169 | |||
170 | new = (unsigned long) p; | ||
171 | prev = cmpxchg(&dst->_metrics, old, new); | ||
172 | |||
173 | if (prev != old) { | ||
174 | p = __DST_METRICS_PTR(prev); | ||
175 | if (prev & DST_METRICS_READ_ONLY) | ||
176 | p = NULL; | ||
177 | } else { | ||
178 | if (rt->fi) { | ||
179 | fib_info_put(rt->fi); | ||
180 | rt->fi = NULL; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return p; | ||
185 | } | ||
150 | 186 | ||
151 | static struct dst_ops ipv4_dst_ops = { | 187 | static struct dst_ops ipv4_dst_ops = { |
152 | .family = AF_INET, | 188 | .family = AF_INET, |
153 | .protocol = cpu_to_be16(ETH_P_IP), | 189 | .protocol = cpu_to_be16(ETH_P_IP), |
154 | .gc = rt_garbage_collect, | 190 | .gc = rt_garbage_collect, |
155 | .check = ipv4_dst_check, | 191 | .check = ipv4_dst_check, |
192 | .default_advmss = ipv4_default_advmss, | ||
193 | .default_mtu = ipv4_default_mtu, | ||
194 | .cow_metrics = ipv4_cow_metrics, | ||
156 | .destroy = ipv4_dst_destroy, | 195 | .destroy = ipv4_dst_destroy, |
157 | .ifdown = ipv4_dst_ifdown, | 196 | .ifdown = ipv4_dst_ifdown, |
158 | .negative_advice = ipv4_negative_advice, | 197 | .negative_advice = ipv4_negative_advice, |
159 | .link_failure = ipv4_link_failure, | 198 | .link_failure = ipv4_link_failure, |
160 | .update_pmtu = ip_rt_update_pmtu, | 199 | .update_pmtu = ip_rt_update_pmtu, |
161 | .local_out = __ip_local_out, | 200 | .local_out = __ip_local_out, |
162 | .entries = ATOMIC_INIT(0), | ||
163 | }; | 201 | }; |
164 | 202 | ||
165 | #define ECN_OR_COST(class) TC_PRIO_##class | 203 | #define ECN_OR_COST(class) TC_PRIO_##class |
166 | 204 | ||
167 | const __u8 ip_tos2prio[16] = { | 205 | const __u8 ip_tos2prio[16] = { |
168 | TC_PRIO_BESTEFFORT, | 206 | TC_PRIO_BESTEFFORT, |
169 | ECN_OR_COST(FILLER), | 207 | ECN_OR_COST(BESTEFFORT), |
170 | TC_PRIO_BESTEFFORT, | 208 | TC_PRIO_BESTEFFORT, |
171 | ECN_OR_COST(BESTEFFORT), | 209 | ECN_OR_COST(BESTEFFORT), |
172 | TC_PRIO_BULK, | 210 | TC_PRIO_BULK, |
@@ -199,7 +237,7 @@ const __u8 ip_tos2prio[16] = { | |||
199 | */ | 237 | */ |
200 | 238 | ||
201 | struct rt_hash_bucket { | 239 | struct rt_hash_bucket { |
202 | struct rtable *chain; | 240 | struct rtable __rcu *chain; |
203 | }; | 241 | }; |
204 | 242 | ||
205 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | 243 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
@@ -281,7 +319,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
281 | struct rtable *r = NULL; | 319 | struct rtable *r = NULL; |
282 | 320 | ||
283 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 321 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
284 | if (!rt_hash_table[st->bucket].chain) | 322 | if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) |
285 | continue; | 323 | continue; |
286 | rcu_read_lock_bh(); | 324 | rcu_read_lock_bh(); |
287 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | 325 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
@@ -301,17 +339,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
301 | { | 339 | { |
302 | struct rt_cache_iter_state *st = seq->private; | 340 | struct rt_cache_iter_state *st = seq->private; |
303 | 341 | ||
304 | r = r->dst.rt_next; | 342 | r = rcu_dereference_bh(r->dst.rt_next); |
305 | while (!r) { | 343 | while (!r) { |
306 | rcu_read_unlock_bh(); | 344 | rcu_read_unlock_bh(); |
307 | do { | 345 | do { |
308 | if (--st->bucket < 0) | 346 | if (--st->bucket < 0) |
309 | return NULL; | 347 | return NULL; |
310 | } while (!rt_hash_table[st->bucket].chain); | 348 | } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); |
311 | rcu_read_lock_bh(); | 349 | rcu_read_lock_bh(); |
312 | r = rt_hash_table[st->bucket].chain; | 350 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
313 | } | 351 | } |
314 | return rcu_dereference_bh(r); | 352 | return r; |
315 | } | 353 | } |
316 | 354 | ||
317 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | 355 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
@@ -382,12 +420,11 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
382 | (__force u32)r->rt_gateway, | 420 | (__force u32)r->rt_gateway, |
383 | r->rt_flags, atomic_read(&r->dst.__refcnt), | 421 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
384 | r->dst.__use, 0, (__force u32)r->rt_src, | 422 | r->dst.__use, 0, (__force u32)r->rt_src, |
385 | (dst_metric(&r->dst, RTAX_ADVMSS) ? | 423 | dst_metric_advmss(&r->dst) + 40, |
386 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), | ||
387 | dst_metric(&r->dst, RTAX_WINDOW), | 424 | dst_metric(&r->dst, RTAX_WINDOW), |
388 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
389 | dst_metric(&r->dst, RTAX_RTTVAR)), | 426 | dst_metric(&r->dst, RTAX_RTTVAR)), |
390 | r->fl.fl4_tos, | 427 | r->rt_key_tos, |
391 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
392 | r->dst.hh ? (r->dst.hh->hh_output == | 429 | r->dst.hh ? (r->dst.hh->hh_output == |
393 | dev_queue_xmit) : 0, | 430 | dev_queue_xmit) : 0, |
@@ -466,7 +503,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) | |||
466 | 503 | ||
467 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " | 504 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " |
468 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", | 505 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", |
469 | atomic_read(&ipv4_dst_ops.entries), | 506 | dst_entries_get_slow(&ipv4_dst_ops), |
470 | st->in_hit, | 507 | st->in_hit, |
471 | st->in_slow_tot, | 508 | st->in_slow_tot, |
472 | st->in_slow_mc, | 509 | st->in_slow_mc, |
@@ -510,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
510 | .release = seq_release, | 547 | .release = seq_release, |
511 | }; | 548 | }; |
512 | 549 | ||
513 | #ifdef CONFIG_NET_CLS_ROUTE | 550 | #ifdef CONFIG_IP_ROUTE_CLASSID |
514 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 551 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
515 | { | 552 | { |
516 | struct ip_rt_acct *dst, *src; | 553 | struct ip_rt_acct *dst, *src; |
@@ -563,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
563 | if (!pde) | 600 | if (!pde) |
564 | goto err2; | 601 | goto err2; |
565 | 602 | ||
566 | #ifdef CONFIG_NET_CLS_ROUTE | 603 | #ifdef CONFIG_IP_ROUTE_CLASSID |
567 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 604 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
568 | if (!pde) | 605 | if (!pde) |
569 | goto err3; | 606 | goto err3; |
570 | #endif | 607 | #endif |
571 | return 0; | 608 | return 0; |
572 | 609 | ||
573 | #ifdef CONFIG_NET_CLS_ROUTE | 610 | #ifdef CONFIG_IP_ROUTE_CLASSID |
574 | err3: | 611 | err3: |
575 | remove_proc_entry("rt_cache", net->proc_net_stat); | 612 | remove_proc_entry("rt_cache", net->proc_net_stat); |
576 | #endif | 613 | #endif |
@@ -584,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
584 | { | 621 | { |
585 | remove_proc_entry("rt_cache", net->proc_net_stat); | 622 | remove_proc_entry("rt_cache", net->proc_net_stat); |
586 | remove_proc_entry("rt_cache", net->proc_net); | 623 | remove_proc_entry("rt_cache", net->proc_net); |
587 | #ifdef CONFIG_NET_CLS_ROUTE | 624 | #ifdef CONFIG_IP_ROUTE_CLASSID |
588 | remove_proc_entry("rt_acct", net->proc_net); | 625 | remove_proc_entry("rt_acct", net->proc_net); |
589 | #endif | 626 | #endif |
590 | } | 627 | } |
@@ -622,13 +659,13 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
622 | /* Kill broadcast/multicast entries very aggresively, if they | 659 | /* Kill broadcast/multicast entries very aggresively, if they |
623 | collide in hash table with more useful entries */ | 660 | collide in hash table with more useful entries */ |
624 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 661 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
625 | rth->fl.iif && rth->dst.rt_next; | 662 | rt_is_input_route(rth) && rth->dst.rt_next; |
626 | } | 663 | } |
627 | 664 | ||
628 | static inline int rt_valuable(struct rtable *rth) | 665 | static inline int rt_valuable(struct rtable *rth) |
629 | { | 666 | { |
630 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 667 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
631 | rth->dst.expires; | 668 | (rth->peer && rth->peer->pmtu_expires); |
632 | } | 669 | } |
633 | 670 | ||
634 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 671 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -639,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
639 | if (atomic_read(&rth->dst.__refcnt)) | 676 | if (atomic_read(&rth->dst.__refcnt)) |
640 | goto out; | 677 | goto out; |
641 | 678 | ||
642 | ret = 1; | ||
643 | if (rth->dst.expires && | ||
644 | time_after_eq(jiffies, rth->dst.expires)) | ||
645 | goto out; | ||
646 | |||
647 | age = jiffies - rth->dst.lastuse; | 679 | age = jiffies - rth->dst.lastuse; |
648 | ret = 0; | ||
649 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 680 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
650 | (age <= tmo2 && rt_valuable(rth))) | 681 | (age <= tmo2 && rt_valuable(rth))) |
651 | goto out; | 682 | goto out; |
@@ -667,7 +698,7 @@ static inline u32 rt_score(struct rtable *rt) | |||
667 | if (rt_valuable(rt)) | 698 | if (rt_valuable(rt)) |
668 | score |= (1<<31); | 699 | score |= (1<<31); |
669 | 700 | ||
670 | if (!rt->fl.iif || | 701 | if (rt_is_output_route(rt) || |
671 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) | 702 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) |
672 | score |= (1<<30); | 703 | score |= (1<<30); |
673 | 704 | ||
@@ -680,22 +711,22 @@ static inline bool rt_caching(const struct net *net) | |||
680 | net->ipv4.sysctl_rt_cache_rebuild_count; | 711 | net->ipv4.sysctl_rt_cache_rebuild_count; |
681 | } | 712 | } |
682 | 713 | ||
683 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 714 | static inline bool compare_hash_inputs(const struct rtable *rt1, |
684 | const struct flowi *fl2) | 715 | const struct rtable *rt2) |
685 | { | 716 | { |
686 | return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 717 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
687 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 718 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
688 | (fl1->iif ^ fl2->iif)) == 0); | 719 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); |
689 | } | 720 | } |
690 | 721 | ||
691 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 722 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
692 | { | 723 | { |
693 | return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
694 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
695 | (fl1->mark ^ fl2->mark) | | 726 | (rt1->rt_mark ^ rt2->rt_mark) | |
696 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | | 727 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | |
697 | (fl1->oif ^ fl2->oif) | | 728 | (rt1->rt_oif ^ rt2->rt_oif) | |
698 | (fl1->iif ^ fl2->iif)) == 0; | 729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
699 | } | 730 | } |
700 | 731 | ||
701 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 732 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
@@ -713,55 +744,48 @@ static inline int rt_is_expired(struct rtable *rth) | |||
713 | * Can be called by a softirq or a process. | 744 | * Can be called by a softirq or a process. |
714 | * In the later case, we want to be reschedule if necessary | 745 | * In the later case, we want to be reschedule if necessary |
715 | */ | 746 | */ |
716 | static void rt_do_flush(int process_context) | 747 | static void rt_do_flush(struct net *net, int process_context) |
717 | { | 748 | { |
718 | unsigned int i; | 749 | unsigned int i; |
719 | struct rtable *rth, *next; | 750 | struct rtable *rth, *next; |
720 | struct rtable * tail; | ||
721 | 751 | ||
722 | for (i = 0; i <= rt_hash_mask; i++) { | 752 | for (i = 0; i <= rt_hash_mask; i++) { |
753 | struct rtable __rcu **pprev; | ||
754 | struct rtable *list; | ||
755 | |||
723 | if (process_context && need_resched()) | 756 | if (process_context && need_resched()) |
724 | cond_resched(); | 757 | cond_resched(); |
725 | rth = rt_hash_table[i].chain; | 758 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
726 | if (!rth) | 759 | if (!rth) |
727 | continue; | 760 | continue; |
728 | 761 | ||
729 | spin_lock_bh(rt_hash_lock_addr(i)); | 762 | spin_lock_bh(rt_hash_lock_addr(i)); |
730 | #ifdef CONFIG_NET_NS | ||
731 | { | ||
732 | struct rtable ** prev, * p; | ||
733 | 763 | ||
734 | rth = rt_hash_table[i].chain; | 764 | list = NULL; |
765 | pprev = &rt_hash_table[i].chain; | ||
766 | rth = rcu_dereference_protected(*pprev, | ||
767 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
735 | 768 | ||
736 | /* defer releasing the head of the list after spin_unlock */ | 769 | while (rth) { |
737 | for (tail = rth; tail; tail = tail->dst.rt_next) | 770 | next = rcu_dereference_protected(rth->dst.rt_next, |
738 | if (!rt_is_expired(tail)) | 771 | lockdep_is_held(rt_hash_lock_addr(i))); |
739 | break; | 772 | |
740 | if (rth != tail) | 773 | if (!net || |
741 | rt_hash_table[i].chain = tail; | 774 | net_eq(dev_net(rth->dst.dev), net)) { |
742 | 775 | rcu_assign_pointer(*pprev, next); | |
743 | /* call rt_free on entries after the tail requiring flush */ | 776 | rcu_assign_pointer(rth->dst.rt_next, list); |
744 | prev = &rt_hash_table[i].chain; | 777 | list = rth; |
745 | for (p = *prev; p; p = next) { | ||
746 | next = p->dst.rt_next; | ||
747 | if (!rt_is_expired(p)) { | ||
748 | prev = &p->dst.rt_next; | ||
749 | } else { | 778 | } else { |
750 | *prev = next; | 779 | pprev = &rth->dst.rt_next; |
751 | rt_free(p); | ||
752 | } | 780 | } |
781 | rth = next; | ||
753 | } | 782 | } |
754 | } | 783 | |
755 | #else | ||
756 | rth = rt_hash_table[i].chain; | ||
757 | rt_hash_table[i].chain = NULL; | ||
758 | tail = NULL; | ||
759 | #endif | ||
760 | spin_unlock_bh(rt_hash_lock_addr(i)); | 784 | spin_unlock_bh(rt_hash_lock_addr(i)); |
761 | 785 | ||
762 | for (; rth != tail; rth = next) { | 786 | for (; list; list = next) { |
763 | next = rth->dst.rt_next; | 787 | next = rcu_dereference_protected(list->dst.rt_next, 1); |
764 | rt_free(rth); | 788 | rt_free(list); |
765 | } | 789 | } |
766 | } | 790 | } |
767 | } | 791 | } |
@@ -789,104 +813,15 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
789 | const struct rtable *aux = head; | 813 | const struct rtable *aux = head; |
790 | 814 | ||
791 | while (aux != rth) { | 815 | while (aux != rth) { |
792 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 816 | if (compare_hash_inputs(aux, rth)) |
793 | return 0; | 817 | return 0; |
794 | aux = aux->dst.rt_next; | 818 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
795 | } | 819 | } |
796 | return ONE; | 820 | return ONE; |
797 | } | 821 | } |
798 | 822 | ||
799 | static void rt_check_expire(void) | ||
800 | { | ||
801 | static unsigned int rover; | ||
802 | unsigned int i = rover, goal; | ||
803 | struct rtable *rth, **rthp; | ||
804 | unsigned long samples = 0; | ||
805 | unsigned long sum = 0, sum2 = 0; | ||
806 | unsigned long delta; | ||
807 | u64 mult; | ||
808 | |||
809 | delta = jiffies - expires_ljiffies; | ||
810 | expires_ljiffies = jiffies; | ||
811 | mult = ((u64)delta) << rt_hash_log; | ||
812 | if (ip_rt_gc_timeout > 1) | ||
813 | do_div(mult, ip_rt_gc_timeout); | ||
814 | goal = (unsigned int)mult; | ||
815 | if (goal > rt_hash_mask) | ||
816 | goal = rt_hash_mask + 1; | ||
817 | for (; goal > 0; goal--) { | ||
818 | unsigned long tmo = ip_rt_gc_timeout; | ||
819 | unsigned long length; | ||
820 | |||
821 | i = (i + 1) & rt_hash_mask; | ||
822 | rthp = &rt_hash_table[i].chain; | ||
823 | |||
824 | if (need_resched()) | ||
825 | cond_resched(); | ||
826 | |||
827 | samples++; | ||
828 | |||
829 | if (*rthp == NULL) | ||
830 | continue; | ||
831 | length = 0; | ||
832 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
833 | while ((rth = *rthp) != NULL) { | ||
834 | prefetch(rth->dst.rt_next); | ||
835 | if (rt_is_expired(rth)) { | ||
836 | *rthp = rth->dst.rt_next; | ||
837 | rt_free(rth); | ||
838 | continue; | ||
839 | } | ||
840 | if (rth->dst.expires) { | ||
841 | /* Entry is expired even if it is in use */ | ||
842 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
843 | nofree: | ||
844 | tmo >>= 1; | ||
845 | rthp = &rth->dst.rt_next; | ||
846 | /* | ||
847 | * We only count entries on | ||
848 | * a chain with equal hash inputs once | ||
849 | * so that entries for different QOS | ||
850 | * levels, and other non-hash input | ||
851 | * attributes don't unfairly skew | ||
852 | * the length computation | ||
853 | */ | ||
854 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
855 | continue; | ||
856 | } | ||
857 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
858 | goto nofree; | ||
859 | |||
860 | /* Cleanup aged off entries. */ | ||
861 | *rthp = rth->dst.rt_next; | ||
862 | rt_free(rth); | ||
863 | } | ||
864 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
865 | sum += length; | ||
866 | sum2 += length*length; | ||
867 | } | ||
868 | if (samples) { | ||
869 | unsigned long avg = sum / samples; | ||
870 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
871 | rt_chain_length_max = max_t(unsigned long, | ||
872 | ip_rt_gc_elasticity, | ||
873 | (avg + 4*sd) >> FRACT_BITS); | ||
874 | } | ||
875 | rover = i; | ||
876 | } | ||
877 | |||
878 | /* | ||
879 | * rt_worker_func() is run in process context. | ||
880 | * we call rt_check_expire() to scan part of the hash table | ||
881 | */ | ||
882 | static void rt_worker_func(struct work_struct *work) | ||
883 | { | ||
884 | rt_check_expire(); | ||
885 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
886 | } | ||
887 | |||
888 | /* | 823 | /* |
889 | * Pertubation of rt_genid by a small quantity [1..256] | 824 | * Perturbation of rt_genid by a small quantity [1..256] |
890 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
891 | * many times (2^24) without giving recent rt_genid. | 826 | * many times (2^24) without giving recent rt_genid. |
892 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | 827 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. |
@@ -907,13 +842,13 @@ void rt_cache_flush(struct net *net, int delay) | |||
907 | { | 842 | { |
908 | rt_cache_invalidate(net); | 843 | rt_cache_invalidate(net); |
909 | if (delay >= 0) | 844 | if (delay >= 0) |
910 | rt_do_flush(!in_softirq()); | 845 | rt_do_flush(net, !in_softirq()); |
911 | } | 846 | } |
912 | 847 | ||
913 | /* Flush previous cache invalidated entries from the cache */ | 848 | /* Flush previous cache invalidated entries from the cache */ |
914 | void rt_cache_flush_batch(void) | 849 | void rt_cache_flush_batch(struct net *net) |
915 | { | 850 | { |
916 | rt_do_flush(!in_softirq()); | 851 | rt_do_flush(net, !in_softirq()); |
917 | } | 852 | } |
918 | 853 | ||
919 | static void rt_emergency_hash_rebuild(struct net *net) | 854 | static void rt_emergency_hash_rebuild(struct net *net) |
@@ -942,9 +877,11 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
942 | static unsigned long last_gc; | 877 | static unsigned long last_gc; |
943 | static int rover; | 878 | static int rover; |
944 | static int equilibrium; | 879 | static int equilibrium; |
945 | struct rtable *rth, **rthp; | 880 | struct rtable *rth; |
881 | struct rtable __rcu **rthp; | ||
946 | unsigned long now = jiffies; | 882 | unsigned long now = jiffies; |
947 | int goal; | 883 | int goal; |
884 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | ||
948 | 885 | ||
949 | /* | 886 | /* |
950 | * Garbage collection is pretty expensive, | 887 | * Garbage collection is pretty expensive, |
@@ -954,28 +891,28 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
954 | RT_CACHE_STAT_INC(gc_total); | 891 | RT_CACHE_STAT_INC(gc_total); |
955 | 892 | ||
956 | if (now - last_gc < ip_rt_gc_min_interval && | 893 | if (now - last_gc < ip_rt_gc_min_interval && |
957 | atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { | 894 | entries < ip_rt_max_size) { |
958 | RT_CACHE_STAT_INC(gc_ignored); | 895 | RT_CACHE_STAT_INC(gc_ignored); |
959 | goto out; | 896 | goto out; |
960 | } | 897 | } |
961 | 898 | ||
899 | entries = dst_entries_get_slow(&ipv4_dst_ops); | ||
962 | /* Calculate number of entries, which we want to expire now. */ | 900 | /* Calculate number of entries, which we want to expire now. */ |
963 | goal = atomic_read(&ipv4_dst_ops.entries) - | 901 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); |
964 | (ip_rt_gc_elasticity << rt_hash_log); | ||
965 | if (goal <= 0) { | 902 | if (goal <= 0) { |
966 | if (equilibrium < ipv4_dst_ops.gc_thresh) | 903 | if (equilibrium < ipv4_dst_ops.gc_thresh) |
967 | equilibrium = ipv4_dst_ops.gc_thresh; | 904 | equilibrium = ipv4_dst_ops.gc_thresh; |
968 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 905 | goal = entries - equilibrium; |
969 | if (goal > 0) { | 906 | if (goal > 0) { |
970 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 907 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
971 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 908 | goal = entries - equilibrium; |
972 | } | 909 | } |
973 | } else { | 910 | } else { |
974 | /* We are in dangerous area. Try to reduce cache really | 911 | /* We are in dangerous area. Try to reduce cache really |
975 | * aggressively. | 912 | * aggressively. |
976 | */ | 913 | */ |
977 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 914 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
978 | equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; | 915 | equilibrium = entries - goal; |
979 | } | 916 | } |
980 | 917 | ||
981 | if (now - last_gc >= ip_rt_gc_min_interval) | 918 | if (now - last_gc >= ip_rt_gc_min_interval) |
@@ -995,7 +932,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
995 | k = (k + 1) & rt_hash_mask; | 932 | k = (k + 1) & rt_hash_mask; |
996 | rthp = &rt_hash_table[k].chain; | 933 | rthp = &rt_hash_table[k].chain; |
997 | spin_lock_bh(rt_hash_lock_addr(k)); | 934 | spin_lock_bh(rt_hash_lock_addr(k)); |
998 | while ((rth = *rthp) != NULL) { | 935 | while ((rth = rcu_dereference_protected(*rthp, |
936 | lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { | ||
999 | if (!rt_is_expired(rth) && | 937 | if (!rt_is_expired(rth) && |
1000 | !rt_may_expire(rth, tmo, expire)) { | 938 | !rt_may_expire(rth, tmo, expire)) { |
1001 | tmo >>= 1; | 939 | tmo >>= 1; |
@@ -1030,16 +968,14 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1030 | break; | 968 | break; |
1031 | 969 | ||
1032 | expire >>= 1; | 970 | expire >>= 1; |
1033 | #if RT_CACHE_DEBUG >= 2 | ||
1034 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, | ||
1035 | atomic_read(&ipv4_dst_ops.entries), goal, i); | ||
1036 | #endif | ||
1037 | 971 | ||
1038 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 972 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
1039 | goto out; | 973 | goto out; |
1040 | } while (!in_softirq() && time_before_eq(jiffies, now)); | 974 | } while (!in_softirq() && time_before_eq(jiffies, now)); |
1041 | 975 | ||
1042 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 976 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
977 | goto out; | ||
978 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | ||
1043 | goto out; | 979 | goto out; |
1044 | if (net_ratelimit()) | 980 | if (net_ratelimit()) |
1045 | printk(KERN_WARNING "dst cache overflow\n"); | 981 | printk(KERN_WARNING "dst cache overflow\n"); |
@@ -1049,12 +985,9 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1049 | work_done: | 985 | work_done: |
1050 | expire += ip_rt_gc_min_interval; | 986 | expire += ip_rt_gc_min_interval; |
1051 | if (expire > ip_rt_gc_timeout || | 987 | if (expire > ip_rt_gc_timeout || |
1052 | atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) | 988 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || |
989 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | ||
1053 | expire = ip_rt_gc_timeout; | 990 | expire = ip_rt_gc_timeout; |
1054 | #if RT_CACHE_DEBUG >= 2 | ||
1055 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, | ||
1056 | atomic_read(&ipv4_dst_ops.entries), goal, rover); | ||
1057 | #endif | ||
1058 | out: return 0; | 991 | out: return 0; |
1059 | } | 992 | } |
1060 | 993 | ||
@@ -1068,17 +1001,17 @@ static int slow_chain_length(const struct rtable *head) | |||
1068 | 1001 | ||
1069 | while (rth) { | 1002 | while (rth) { |
1070 | length += has_noalias(head, rth); | 1003 | length += has_noalias(head, rth); |
1071 | rth = rth->dst.rt_next; | 1004 | rth = rcu_dereference_protected(rth->dst.rt_next, 1); |
1072 | } | 1005 | } |
1073 | return length >> FRACT_BITS; | 1006 | return length >> FRACT_BITS; |
1074 | } | 1007 | } |
1075 | 1008 | ||
1076 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1009 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1077 | struct rtable **rp, struct sk_buff *skb, int ifindex) | 1010 | struct sk_buff *skb, int ifindex) |
1078 | { | 1011 | { |
1079 | struct rtable *rth, **rthp; | 1012 | struct rtable *rth, *cand; |
1013 | struct rtable __rcu **rthp, **candp; | ||
1080 | unsigned long now; | 1014 | unsigned long now; |
1081 | struct rtable *cand, **candp; | ||
1082 | u32 min_score; | 1015 | u32 min_score; |
1083 | int chain_length; | 1016 | int chain_length; |
1084 | int attempts = !in_softirq(); | 1017 | int attempts = !in_softirq(); |
@@ -1102,36 +1035,37 @@ restart: | |||
1102 | * Note that we do rt_free on this new route entry, so that | 1035 | * Note that we do rt_free on this new route entry, so that |
1103 | * once its refcount hits zero, we are still able to reap it | 1036 | * once its refcount hits zero, we are still able to reap it |
1104 | * (Thanks Alexey) | 1037 | * (Thanks Alexey) |
1105 | * Note also the rt_free uses call_rcu. We don't actually | 1038 | * Note: To avoid expensive rcu stuff for this uncached dst, |
1106 | * need rcu protection here, this is just our path to get | 1039 | * we set DST_NOCACHE so that dst_release() can free dst without |
1107 | * on the route gc list. | 1040 | * waiting a grace period. |
1108 | */ | 1041 | */ |
1109 | 1042 | ||
1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1043 | rt->dst.flags |= DST_NOCACHE; |
1044 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | ||
1111 | int err = arp_bind_neighbour(&rt->dst); | 1045 | int err = arp_bind_neighbour(&rt->dst); |
1112 | if (err) { | 1046 | if (err) { |
1113 | if (net_ratelimit()) | 1047 | if (net_ratelimit()) |
1114 | printk(KERN_WARNING | 1048 | printk(KERN_WARNING |
1115 | "Neighbour table failure & not caching routes.\n"); | 1049 | "Neighbour table failure & not caching routes.\n"); |
1116 | rt_drop(rt); | 1050 | ip_rt_put(rt); |
1117 | return err; | 1051 | return ERR_PTR(err); |
1118 | } | 1052 | } |
1119 | } | 1053 | } |
1120 | 1054 | ||
1121 | rt_free(rt); | ||
1122 | goto skip_hashing; | 1055 | goto skip_hashing; |
1123 | } | 1056 | } |
1124 | 1057 | ||
1125 | rthp = &rt_hash_table[hash].chain; | 1058 | rthp = &rt_hash_table[hash].chain; |
1126 | 1059 | ||
1127 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1060 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1128 | while ((rth = *rthp) != NULL) { | 1061 | while ((rth = rcu_dereference_protected(*rthp, |
1062 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1129 | if (rt_is_expired(rth)) { | 1063 | if (rt_is_expired(rth)) { |
1130 | *rthp = rth->dst.rt_next; | 1064 | *rthp = rth->dst.rt_next; |
1131 | rt_free(rth); | 1065 | rt_free(rth); |
1132 | continue; | 1066 | continue; |
1133 | } | 1067 | } |
1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1068 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { |
1135 | /* Put it first */ | 1069 | /* Put it first */ |
1136 | *rthp = rth->dst.rt_next; | 1070 | *rthp = rth->dst.rt_next; |
1137 | /* | 1071 | /* |
@@ -1151,11 +1085,9 @@ restart: | |||
1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1085 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1152 | 1086 | ||
1153 | rt_drop(rt); | 1087 | rt_drop(rt); |
1154 | if (rp) | 1088 | if (skb) |
1155 | *rp = rth; | ||
1156 | else | ||
1157 | skb_dst_set(skb, &rth->dst); | 1089 | skb_dst_set(skb, &rth->dst); |
1158 | return 0; | 1090 | return rth; |
1159 | } | 1091 | } |
1160 | 1092 | ||
1161 | if (!atomic_read(&rth->dst.__refcnt)) { | 1093 | if (!atomic_read(&rth->dst.__refcnt)) { |
@@ -1196,7 +1128,7 @@ restart: | |||
1196 | rt_emergency_hash_rebuild(net); | 1128 | rt_emergency_hash_rebuild(net); |
1197 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1129 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1198 | 1130 | ||
1199 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1131 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1200 | ifindex, rt_genid(net)); | 1132 | ifindex, rt_genid(net)); |
1201 | goto restart; | 1133 | goto restart; |
1202 | } | 1134 | } |
@@ -1205,14 +1137,14 @@ restart: | |||
1205 | /* Try to bind route to arp only if it is output | 1137 | /* Try to bind route to arp only if it is output |
1206 | route or unicast forwarding path. | 1138 | route or unicast forwarding path. |
1207 | */ | 1139 | */ |
1208 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1140 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1209 | int err = arp_bind_neighbour(&rt->dst); | 1141 | int err = arp_bind_neighbour(&rt->dst); |
1210 | if (err) { | 1142 | if (err) { |
1211 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1143 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1212 | 1144 | ||
1213 | if (err != -ENOBUFS) { | 1145 | if (err != -ENOBUFS) { |
1214 | rt_drop(rt); | 1146 | rt_drop(rt); |
1215 | return err; | 1147 | return ERR_PTR(err); |
1216 | } | 1148 | } |
1217 | 1149 | ||
1218 | /* Neighbour tables are full and nothing | 1150 | /* Neighbour tables are full and nothing |
@@ -1233,25 +1165,15 @@ restart: | |||
1233 | if (net_ratelimit()) | 1165 | if (net_ratelimit()) |
1234 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); | 1166 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
1235 | rt_drop(rt); | 1167 | rt_drop(rt); |
1236 | return -ENOBUFS; | 1168 | return ERR_PTR(-ENOBUFS); |
1237 | } | 1169 | } |
1238 | } | 1170 | } |
1239 | 1171 | ||
1240 | rt->dst.rt_next = rt_hash_table[hash].chain; | 1172 | rt->dst.rt_next = rt_hash_table[hash].chain; |
1241 | 1173 | ||
1242 | #if RT_CACHE_DEBUG >= 2 | ||
1243 | if (rt->dst.rt_next) { | ||
1244 | struct rtable *trt; | ||
1245 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", | ||
1246 | hash, &rt->rt_dst); | ||
1247 | for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) | ||
1248 | printk(" . %pI4", &trt->rt_dst); | ||
1249 | printk("\n"); | ||
1250 | } | ||
1251 | #endif | ||
1252 | /* | 1174 | /* |
1253 | * Since lookup is lockfree, we must make sure | 1175 | * Since lookup is lockfree, we must make sure |
1254 | * previous writes to rt are comitted to memory | 1176 | * previous writes to rt are committed to memory |
1255 | * before making rt visible to other CPUS. | 1177 | * before making rt visible to other CPUS. |
1256 | */ | 1178 | */ |
1257 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | 1179 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); |
@@ -1259,28 +1181,28 @@ restart: | |||
1259 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1181 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1260 | 1182 | ||
1261 | skip_hashing: | 1183 | skip_hashing: |
1262 | if (rp) | 1184 | if (skb) |
1263 | *rp = rt; | ||
1264 | else | ||
1265 | skb_dst_set(skb, &rt->dst); | 1185 | skb_dst_set(skb, &rt->dst); |
1266 | return 0; | 1186 | return rt; |
1187 | } | ||
1188 | |||
1189 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
1190 | |||
1191 | static u32 rt_peer_genid(void) | ||
1192 | { | ||
1193 | return atomic_read(&__rt_peer_genid); | ||
1267 | } | 1194 | } |
1268 | 1195 | ||
1269 | void rt_bind_peer(struct rtable *rt, int create) | 1196 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) |
1270 | { | 1197 | { |
1271 | static DEFINE_SPINLOCK(rt_peer_lock); | ||
1272 | struct inet_peer *peer; | 1198 | struct inet_peer *peer; |
1273 | 1199 | ||
1274 | peer = inet_getpeer(rt->rt_dst, create); | 1200 | peer = inet_getpeer_v4(daddr, create); |
1275 | 1201 | ||
1276 | spin_lock_bh(&rt_peer_lock); | 1202 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1277 | if (rt->peer == NULL) { | ||
1278 | rt->peer = peer; | ||
1279 | peer = NULL; | ||
1280 | } | ||
1281 | spin_unlock_bh(&rt_peer_lock); | ||
1282 | if (peer) | ||
1283 | inet_putpeer(peer); | 1203 | inet_putpeer(peer); |
1204 | else | ||
1205 | rt->rt_peer_genid = rt_peer_genid(); | ||
1284 | } | 1206 | } |
1285 | 1207 | ||
1286 | /* | 1208 | /* |
@@ -1309,7 +1231,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1309 | 1231 | ||
1310 | if (rt) { | 1232 | if (rt) { |
1311 | if (rt->peer == NULL) | 1233 | if (rt->peer == NULL) |
1312 | rt_bind_peer(rt, 1); | 1234 | rt_bind_peer(rt, rt->rt_dst, 1); |
1313 | 1235 | ||
1314 | /* If peer is attached to destination, it is never detached, | 1236 | /* If peer is attached to destination, it is never detached, |
1315 | so that we need not to grab a lock to dereference it. | 1237 | so that we need not to grab a lock to dereference it. |
@@ -1328,12 +1250,14 @@ EXPORT_SYMBOL(__ip_select_ident); | |||
1328 | 1250 | ||
1329 | static void rt_del(unsigned hash, struct rtable *rt) | 1251 | static void rt_del(unsigned hash, struct rtable *rt) |
1330 | { | 1252 | { |
1331 | struct rtable **rthp, *aux; | 1253 | struct rtable __rcu **rthp; |
1254 | struct rtable *aux; | ||
1332 | 1255 | ||
1333 | rthp = &rt_hash_table[hash].chain; | 1256 | rthp = &rt_hash_table[hash].chain; |
1334 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1257 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1335 | ip_rt_put(rt); | 1258 | ip_rt_put(rt); |
1336 | while ((aux = *rthp) != NULL) { | 1259 | while ((aux = rcu_dereference_protected(*rthp, |
1260 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1337 | if (aux == rt || rt_is_expired(aux)) { | 1261 | if (aux == rt || rt_is_expired(aux)) { |
1338 | *rthp = aux->dst.rt_next; | 1262 | *rthp = aux->dst.rt_next; |
1339 | rt_free(aux); | 1263 | rt_free(aux); |
@@ -1348,12 +1272,8 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1348 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1272 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1349 | __be32 saddr, struct net_device *dev) | 1273 | __be32 saddr, struct net_device *dev) |
1350 | { | 1274 | { |
1351 | int i, k; | ||
1352 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1275 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1353 | struct rtable *rth, **rthp; | 1276 | struct inet_peer *peer; |
1354 | __be32 skeys[2] = { saddr, 0 }; | ||
1355 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1356 | struct netevent_redirect netevent; | ||
1357 | struct net *net; | 1277 | struct net *net; |
1358 | 1278 | ||
1359 | if (!in_dev) | 1279 | if (!in_dev) |
@@ -1365,9 +1285,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1365 | ipv4_is_zeronet(new_gw)) | 1285 | ipv4_is_zeronet(new_gw)) |
1366 | goto reject_redirect; | 1286 | goto reject_redirect; |
1367 | 1287 | ||
1368 | if (!rt_caching(net)) | ||
1369 | goto reject_redirect; | ||
1370 | |||
1371 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1288 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1372 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1289 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1373 | goto reject_redirect; | 1290 | goto reject_redirect; |
@@ -1378,93 +1295,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1378 | goto reject_redirect; | 1295 | goto reject_redirect; |
1379 | } | 1296 | } |
1380 | 1297 | ||
1381 | for (i = 0; i < 2; i++) { | 1298 | peer = inet_getpeer_v4(daddr, 1); |
1382 | for (k = 0; k < 2; k++) { | 1299 | if (peer) { |
1383 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1300 | peer->redirect_learned.a4 = new_gw; |
1384 | rt_genid(net)); | ||
1385 | |||
1386 | rthp=&rt_hash_table[hash].chain; | ||
1387 | |||
1388 | while ((rth = rcu_dereference(*rthp)) != NULL) { | ||
1389 | struct rtable *rt; | ||
1390 | |||
1391 | if (rth->fl.fl4_dst != daddr || | ||
1392 | rth->fl.fl4_src != skeys[i] || | ||
1393 | rth->fl.oif != ikeys[k] || | ||
1394 | rth->fl.iif != 0 || | ||
1395 | rt_is_expired(rth) || | ||
1396 | !net_eq(dev_net(rth->dst.dev), net)) { | ||
1397 | rthp = &rth->dst.rt_next; | ||
1398 | continue; | ||
1399 | } | ||
1400 | |||
1401 | if (rth->rt_dst != daddr || | ||
1402 | rth->rt_src != saddr || | ||
1403 | rth->dst.error || | ||
1404 | rth->rt_gateway != old_gw || | ||
1405 | rth->dst.dev != dev) | ||
1406 | break; | ||
1407 | |||
1408 | dst_hold(&rth->dst); | ||
1409 | |||
1410 | rt = dst_alloc(&ipv4_dst_ops); | ||
1411 | if (rt == NULL) { | ||
1412 | ip_rt_put(rth); | ||
1413 | return; | ||
1414 | } | ||
1415 | |||
1416 | /* Copy all the information. */ | ||
1417 | *rt = *rth; | ||
1418 | rt->dst.__use = 1; | ||
1419 | atomic_set(&rt->dst.__refcnt, 1); | ||
1420 | rt->dst.child = NULL; | ||
1421 | if (rt->dst.dev) | ||
1422 | dev_hold(rt->dst.dev); | ||
1423 | if (rt->idev) | ||
1424 | in_dev_hold(rt->idev); | ||
1425 | rt->dst.obsolete = -1; | ||
1426 | rt->dst.lastuse = jiffies; | ||
1427 | rt->dst.path = &rt->dst; | ||
1428 | rt->dst.neighbour = NULL; | ||
1429 | rt->dst.hh = NULL; | ||
1430 | #ifdef CONFIG_XFRM | ||
1431 | rt->dst.xfrm = NULL; | ||
1432 | #endif | ||
1433 | rt->rt_genid = rt_genid(net); | ||
1434 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1435 | |||
1436 | /* Gateway is different ... */ | ||
1437 | rt->rt_gateway = new_gw; | ||
1438 | |||
1439 | /* Redirect received -> path was valid */ | ||
1440 | dst_confirm(&rth->dst); | ||
1441 | |||
1442 | if (rt->peer) | ||
1443 | atomic_inc(&rt->peer->refcnt); | ||
1444 | |||
1445 | if (arp_bind_neighbour(&rt->dst) || | ||
1446 | !(rt->dst.neighbour->nud_state & | ||
1447 | NUD_VALID)) { | ||
1448 | if (rt->dst.neighbour) | ||
1449 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1450 | ip_rt_put(rth); | ||
1451 | rt_drop(rt); | ||
1452 | goto do_next; | ||
1453 | } | ||
1454 | 1301 | ||
1455 | netevent.old = &rth->dst; | 1302 | inet_putpeer(peer); |
1456 | netevent.new = &rt->dst; | ||
1457 | call_netevent_notifiers(NETEVENT_REDIRECT, | ||
1458 | &netevent); | ||
1459 | 1303 | ||
1460 | rt_del(hash, rth); | 1304 | atomic_inc(&__rt_peer_genid); |
1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) | ||
1462 | ip_rt_put(rt); | ||
1463 | goto do_next; | ||
1464 | } | ||
1465 | do_next: | ||
1466 | ; | ||
1467 | } | ||
1468 | } | 1305 | } |
1469 | return; | 1306 | return; |
1470 | 1307 | ||
@@ -1479,6 +1316,23 @@ reject_redirect: | |||
1479 | ; | 1316 | ; |
1480 | } | 1317 | } |
1481 | 1318 | ||
1319 | static bool peer_pmtu_expired(struct inet_peer *peer) | ||
1320 | { | ||
1321 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | ||
1322 | |||
1323 | return orig && | ||
1324 | time_after_eq(jiffies, orig) && | ||
1325 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | ||
1326 | } | ||
1327 | |||
1328 | static bool peer_pmtu_cleaned(struct inet_peer *peer) | ||
1329 | { | ||
1330 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | ||
1331 | |||
1332 | return orig && | ||
1333 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | ||
1334 | } | ||
1335 | |||
1482 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1336 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
1483 | { | 1337 | { |
1484 | struct rtable *rt = (struct rtable *)dst; | 1338 | struct rtable *rt = (struct rtable *)dst; |
@@ -1488,18 +1342,14 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1488 | if (dst->obsolete > 0) { | 1342 | if (dst->obsolete > 0) { |
1489 | ip_rt_put(rt); | 1343 | ip_rt_put(rt); |
1490 | ret = NULL; | 1344 | ret = NULL; |
1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1345 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1492 | (rt->dst.expires && | 1346 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1493 | time_after_eq(jiffies, rt->dst.expires))) { | 1347 | rt->rt_oif, |
1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
1495 | rt->fl.oif, | ||
1496 | rt_genid(dev_net(dst->dev))); | 1348 | rt_genid(dev_net(dst->dev))); |
1497 | #if RT_CACHE_DEBUG >= 1 | ||
1498 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", | ||
1499 | &rt->rt_dst, rt->fl.fl4_tos); | ||
1500 | #endif | ||
1501 | rt_del(hash, rt); | 1349 | rt_del(hash, rt); |
1502 | ret = NULL; | 1350 | ret = NULL; |
1351 | } else if (rt->peer && peer_pmtu_expired(rt->peer)) { | ||
1352 | dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); | ||
1503 | } | 1353 | } |
1504 | } | 1354 | } |
1505 | return ret; | 1355 | return ret; |
@@ -1525,6 +1375,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1525 | { | 1375 | { |
1526 | struct rtable *rt = skb_rtable(skb); | 1376 | struct rtable *rt = skb_rtable(skb); |
1527 | struct in_device *in_dev; | 1377 | struct in_device *in_dev; |
1378 | struct inet_peer *peer; | ||
1528 | int log_martians; | 1379 | int log_martians; |
1529 | 1380 | ||
1530 | rcu_read_lock(); | 1381 | rcu_read_lock(); |
@@ -1536,36 +1387,44 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1536 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1387 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1537 | rcu_read_unlock(); | 1388 | rcu_read_unlock(); |
1538 | 1389 | ||
1390 | if (!rt->peer) | ||
1391 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1392 | peer = rt->peer; | ||
1393 | if (!peer) { | ||
1394 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
1395 | return; | ||
1396 | } | ||
1397 | |||
1539 | /* No redirected packets during ip_rt_redirect_silence; | 1398 | /* No redirected packets during ip_rt_redirect_silence; |
1540 | * reset the algorithm. | 1399 | * reset the algorithm. |
1541 | */ | 1400 | */ |
1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1401 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1543 | rt->dst.rate_tokens = 0; | 1402 | peer->rate_tokens = 0; |
1544 | 1403 | ||
1545 | /* Too many ignored redirects; do not send anything | 1404 | /* Too many ignored redirects; do not send anything |
1546 | * set dst.rate_last to the last seen redirected packet. | 1405 | * set dst.rate_last to the last seen redirected packet. |
1547 | */ | 1406 | */ |
1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1407 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1549 | rt->dst.rate_last = jiffies; | 1408 | peer->rate_last = jiffies; |
1550 | return; | 1409 | return; |
1551 | } | 1410 | } |
1552 | 1411 | ||
1553 | /* Check for load limit; set rate_last to the latest sent | 1412 | /* Check for load limit; set rate_last to the latest sent |
1554 | * redirect. | 1413 | * redirect. |
1555 | */ | 1414 | */ |
1556 | if (rt->dst.rate_tokens == 0 || | 1415 | if (peer->rate_tokens == 0 || |
1557 | time_after(jiffies, | 1416 | time_after(jiffies, |
1558 | (rt->dst.rate_last + | 1417 | (peer->rate_last + |
1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1418 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1419 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1561 | rt->dst.rate_last = jiffies; | 1420 | peer->rate_last = jiffies; |
1562 | ++rt->dst.rate_tokens; | 1421 | ++peer->rate_tokens; |
1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1422 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1564 | if (log_martians && | 1423 | if (log_martians && |
1565 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1424 | peer->rate_tokens == ip_rt_redirect_number && |
1566 | net_ratelimit()) | 1425 | net_ratelimit()) |
1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1426 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1568 | &rt->rt_src, rt->rt_iif, | 1427 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1569 | &rt->rt_dst, &rt->rt_gateway); | 1428 | &rt->rt_dst, &rt->rt_gateway); |
1570 | #endif | 1429 | #endif |
1571 | } | 1430 | } |
@@ -1574,7 +1433,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1574 | static int ip_error(struct sk_buff *skb) | 1433 | static int ip_error(struct sk_buff *skb) |
1575 | { | 1434 | { |
1576 | struct rtable *rt = skb_rtable(skb); | 1435 | struct rtable *rt = skb_rtable(skb); |
1436 | struct inet_peer *peer; | ||
1577 | unsigned long now; | 1437 | unsigned long now; |
1438 | bool send; | ||
1578 | int code; | 1439 | int code; |
1579 | 1440 | ||
1580 | switch (rt->dst.error) { | 1441 | switch (rt->dst.error) { |
@@ -1594,15 +1455,24 @@ static int ip_error(struct sk_buff *skb) | |||
1594 | break; | 1455 | break; |
1595 | } | 1456 | } |
1596 | 1457 | ||
1597 | now = jiffies; | 1458 | if (!rt->peer) |
1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1459 | rt_bind_peer(rt, rt->rt_dst, 1); |
1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1460 | peer = rt->peer; |
1600 | rt->dst.rate_tokens = ip_rt_error_burst; | 1461 | |
1601 | rt->dst.rate_last = now; | 1462 | send = true; |
1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1463 | if (peer) { |
1603 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1464 | now = jiffies; |
1465 | peer->rate_tokens += now - peer->rate_last; | ||
1466 | if (peer->rate_tokens > ip_rt_error_burst) | ||
1467 | peer->rate_tokens = ip_rt_error_burst; | ||
1468 | peer->rate_last = now; | ||
1469 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
1470 | peer->rate_tokens -= ip_rt_error_cost; | ||
1471 | else | ||
1472 | send = false; | ||
1473 | } | ||
1474 | if (send) | ||
1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1475 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
1605 | } | ||
1606 | 1476 | ||
1607 | out: kfree_skb(skb); | 1477 | out: kfree_skb(skb); |
1608 | return 0; | 1478 | return 0; |
@@ -1626,88 +1496,148 @@ static inline unsigned short guess_mtu(unsigned short old_mtu) | |||
1626 | return 68; | 1496 | return 68; |
1627 | } | 1497 | } |
1628 | 1498 | ||
1629 | unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | 1499 | unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, |
1630 | unsigned short new_mtu, | 1500 | unsigned short new_mtu, |
1631 | struct net_device *dev) | 1501 | struct net_device *dev) |
1632 | { | 1502 | { |
1633 | int i, k; | ||
1634 | unsigned short old_mtu = ntohs(iph->tot_len); | 1503 | unsigned short old_mtu = ntohs(iph->tot_len); |
1635 | struct rtable *rth; | ||
1636 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1637 | __be32 skeys[2] = { iph->saddr, 0, }; | ||
1638 | __be32 daddr = iph->daddr; | ||
1639 | unsigned short est_mtu = 0; | 1504 | unsigned short est_mtu = 0; |
1505 | struct inet_peer *peer; | ||
1640 | 1506 | ||
1641 | for (k = 0; k < 2; k++) { | 1507 | peer = inet_getpeer_v4(iph->daddr, 1); |
1642 | for (i = 0; i < 2; i++) { | 1508 | if (peer) { |
1643 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1509 | unsigned short mtu = new_mtu; |
1644 | rt_genid(net)); | 1510 | |
1645 | 1511 | if (new_mtu < 68 || new_mtu >= old_mtu) { | |
1646 | rcu_read_lock(); | 1512 | /* BSD 4.2 derived systems incorrectly adjust |
1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1513 | * tot_len by the IP header length, and report |
1648 | rth = rcu_dereference(rth->dst.rt_next)) { | 1514 | * a zero MTU in the ICMP message. |
1649 | unsigned short mtu = new_mtu; | 1515 | */ |
1650 | 1516 | if (mtu == 0 && | |
1651 | if (rth->fl.fl4_dst != daddr || | 1517 | old_mtu >= 68 + (iph->ihl << 2)) |
1652 | rth->fl.fl4_src != skeys[i] || | 1518 | old_mtu -= iph->ihl << 2; |
1653 | rth->rt_dst != daddr || | 1519 | mtu = guess_mtu(old_mtu); |
1654 | rth->rt_src != iph->saddr || | 1520 | } |
1655 | rth->fl.oif != ikeys[k] || | ||
1656 | rth->fl.iif != 0 || | ||
1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || | ||
1658 | !net_eq(dev_net(rth->dst.dev), net) || | ||
1659 | rt_is_expired(rth)) | ||
1660 | continue; | ||
1661 | 1521 | ||
1662 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1522 | if (mtu < ip_rt_min_pmtu) |
1523 | mtu = ip_rt_min_pmtu; | ||
1524 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { | ||
1525 | unsigned long pmtu_expires; | ||
1663 | 1526 | ||
1664 | /* BSD 4.2 compatibility hack :-( */ | 1527 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
1665 | if (mtu == 0 && | 1528 | if (!pmtu_expires) |
1666 | old_mtu >= dst_mtu(&rth->dst) && | 1529 | pmtu_expires = 1UL; |
1667 | old_mtu >= 68 + (iph->ihl << 2)) | ||
1668 | old_mtu -= iph->ihl << 2; | ||
1669 | 1530 | ||
1670 | mtu = guess_mtu(old_mtu); | 1531 | est_mtu = mtu; |
1671 | } | 1532 | peer->pmtu_learned = mtu; |
1672 | if (mtu <= dst_mtu(&rth->dst)) { | 1533 | peer->pmtu_expires = pmtu_expires; |
1673 | if (mtu < dst_mtu(&rth->dst)) { | ||
1674 | dst_confirm(&rth->dst); | ||
1675 | if (mtu < ip_rt_min_pmtu) { | ||
1676 | mtu = ip_rt_min_pmtu; | ||
1677 | rth->dst.metrics[RTAX_LOCK-1] |= | ||
1678 | (1 << RTAX_MTU); | ||
1679 | } | ||
1680 | rth->dst.metrics[RTAX_MTU-1] = mtu; | ||
1681 | dst_set_expires(&rth->dst, | ||
1682 | ip_rt_mtu_expires); | ||
1683 | } | ||
1684 | est_mtu = mtu; | ||
1685 | } | ||
1686 | } | ||
1687 | rcu_read_unlock(); | ||
1688 | } | 1534 | } |
1535 | |||
1536 | inet_putpeer(peer); | ||
1537 | |||
1538 | atomic_inc(&__rt_peer_genid); | ||
1689 | } | 1539 | } |
1690 | return est_mtu ? : new_mtu; | 1540 | return est_mtu ? : new_mtu; |
1691 | } | 1541 | } |
1692 | 1542 | ||
1543 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
1544 | { | ||
1545 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); | ||
1546 | |||
1547 | if (!expires) | ||
1548 | return; | ||
1549 | if (time_before(jiffies, expires)) { | ||
1550 | u32 orig_dst_mtu = dst_mtu(dst); | ||
1551 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
1552 | if (!peer->pmtu_orig) | ||
1553 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
1554 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
1555 | } | ||
1556 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
1557 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1558 | } | ||
1559 | |||
1693 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1560 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1694 | { | 1561 | { |
1695 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1562 | struct rtable *rt = (struct rtable *) dst; |
1696 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1563 | struct inet_peer *peer; |
1697 | if (mtu < ip_rt_min_pmtu) { | 1564 | |
1565 | dst_confirm(dst); | ||
1566 | |||
1567 | if (!rt->peer) | ||
1568 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1569 | peer = rt->peer; | ||
1570 | if (peer) { | ||
1571 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); | ||
1572 | |||
1573 | if (mtu < ip_rt_min_pmtu) | ||
1698 | mtu = ip_rt_min_pmtu; | 1574 | mtu = ip_rt_min_pmtu; |
1699 | dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); | 1575 | if (!pmtu_expires || mtu < peer->pmtu_learned) { |
1576 | |||
1577 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1578 | if (!pmtu_expires) | ||
1579 | pmtu_expires = 1UL; | ||
1580 | |||
1581 | peer->pmtu_learned = mtu; | ||
1582 | peer->pmtu_expires = pmtu_expires; | ||
1583 | |||
1584 | atomic_inc(&__rt_peer_genid); | ||
1585 | rt->rt_peer_genid = rt_peer_genid(); | ||
1700 | } | 1586 | } |
1701 | dst->metrics[RTAX_MTU-1] = mtu; | 1587 | check_peer_pmtu(dst, peer); |
1702 | dst_set_expires(dst, ip_rt_mtu_expires); | 1588 | } |
1703 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1589 | } |
1590 | |||
1591 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1592 | { | ||
1593 | struct rtable *rt = (struct rtable *) dst; | ||
1594 | __be32 orig_gw = rt->rt_gateway; | ||
1595 | |||
1596 | dst_confirm(&rt->dst); | ||
1597 | |||
1598 | neigh_release(rt->dst.neighbour); | ||
1599 | rt->dst.neighbour = NULL; | ||
1600 | |||
1601 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1602 | if (arp_bind_neighbour(&rt->dst) || | ||
1603 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
1604 | if (rt->dst.neighbour) | ||
1605 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1606 | rt->rt_gateway = orig_gw; | ||
1607 | return -EAGAIN; | ||
1608 | } else { | ||
1609 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1610 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
1611 | rt->dst.neighbour); | ||
1704 | } | 1612 | } |
1613 | return 0; | ||
1705 | } | 1614 | } |
1706 | 1615 | ||
1707 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1616 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1708 | { | 1617 | { |
1709 | if (rt_is_expired((struct rtable *)dst)) | 1618 | struct rtable *rt = (struct rtable *) dst; |
1619 | |||
1620 | if (rt_is_expired(rt)) | ||
1710 | return NULL; | 1621 | return NULL; |
1622 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
1623 | struct inet_peer *peer; | ||
1624 | |||
1625 | if (!rt->peer) | ||
1626 | rt_bind_peer(rt, rt->rt_dst, 0); | ||
1627 | |||
1628 | peer = rt->peer; | ||
1629 | if (peer) { | ||
1630 | check_peer_pmtu(dst, peer); | ||
1631 | |||
1632 | if (peer->redirect_learned.a4 && | ||
1633 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1634 | if (check_peer_redir(dst, peer)) | ||
1635 | return NULL; | ||
1636 | } | ||
1637 | } | ||
1638 | |||
1639 | rt->rt_peer_genid = rt_peer_genid(); | ||
1640 | } | ||
1711 | return dst; | 1641 | return dst; |
1712 | } | 1642 | } |
1713 | 1643 | ||
@@ -1715,33 +1645,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1715 | { | 1645 | { |
1716 | struct rtable *rt = (struct rtable *) dst; | 1646 | struct rtable *rt = (struct rtable *) dst; |
1717 | struct inet_peer *peer = rt->peer; | 1647 | struct inet_peer *peer = rt->peer; |
1718 | struct in_device *idev = rt->idev; | ||
1719 | 1648 | ||
1649 | if (rt->fi) { | ||
1650 | fib_info_put(rt->fi); | ||
1651 | rt->fi = NULL; | ||
1652 | } | ||
1720 | if (peer) { | 1653 | if (peer) { |
1721 | rt->peer = NULL; | 1654 | rt->peer = NULL; |
1722 | inet_putpeer(peer); | 1655 | inet_putpeer(peer); |
1723 | } | 1656 | } |
1724 | |||
1725 | if (idev) { | ||
1726 | rt->idev = NULL; | ||
1727 | in_dev_put(idev); | ||
1728 | } | ||
1729 | } | 1657 | } |
1730 | 1658 | ||
1731 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
1732 | int how) | ||
1733 | { | ||
1734 | struct rtable *rt = (struct rtable *) dst; | ||
1735 | struct in_device *idev = rt->idev; | ||
1736 | if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) { | ||
1737 | struct in_device *loopback_idev = | ||
1738 | in_dev_get(dev_net(dev)->loopback_dev); | ||
1739 | if (loopback_idev) { | ||
1740 | rt->idev = loopback_idev; | ||
1741 | in_dev_put(idev); | ||
1742 | } | ||
1743 | } | ||
1744 | } | ||
1745 | 1659 | ||
1746 | static void ipv4_link_failure(struct sk_buff *skb) | 1660 | static void ipv4_link_failure(struct sk_buff *skb) |
1747 | { | 1661 | { |
@@ -1750,8 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1750 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1664 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1751 | 1665 | ||
1752 | rt = skb_rtable(skb); | 1666 | rt = skb_rtable(skb); |
1753 | if (rt) | 1667 | if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) |
1754 | dst_set_expires(&rt->dst, 0); | 1668 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); |
1755 | } | 1669 | } |
1756 | 1670 | ||
1757 | static int ip_rt_bug(struct sk_buff *skb) | 1671 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1760,6 +1674,7 @@ static int ip_rt_bug(struct sk_buff *skb) | |||
1760 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 1674 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1761 | skb->dev ? skb->dev->name : "?"); | 1675 | skb->dev ? skb->dev->name : "?"); |
1762 | kfree_skb(skb); | 1676 | kfree_skb(skb); |
1677 | WARN_ON(1); | ||
1763 | return 0; | 1678 | return 0; |
1764 | } | 1679 | } |
1765 | 1680 | ||
@@ -1772,23 +1687,39 @@ static int ip_rt_bug(struct sk_buff *skb) | |||
1772 | in IP options! | 1687 | in IP options! |
1773 | */ | 1688 | */ |
1774 | 1689 | ||
1775 | void ip_rt_get_source(u8 *addr, struct rtable *rt) | 1690 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) |
1776 | { | 1691 | { |
1777 | __be32 src; | 1692 | __be32 src; |
1778 | struct fib_result res; | ||
1779 | 1693 | ||
1780 | if (rt->fl.iif == 0) | 1694 | if (rt_is_output_route(rt)) |
1781 | src = rt->rt_src; | 1695 | src = ip_hdr(skb)->saddr; |
1782 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { | 1696 | else { |
1783 | src = FIB_RES_PREFSRC(res); | 1697 | struct fib_result res; |
1784 | fib_res_put(&res); | 1698 | struct flowi4 fl4; |
1785 | } else | 1699 | struct iphdr *iph; |
1786 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1700 | |
1701 | iph = ip_hdr(skb); | ||
1702 | |||
1703 | memset(&fl4, 0, sizeof(fl4)); | ||
1704 | fl4.daddr = iph->daddr; | ||
1705 | fl4.saddr = iph->saddr; | ||
1706 | fl4.flowi4_tos = iph->tos; | ||
1707 | fl4.flowi4_oif = rt->dst.dev->ifindex; | ||
1708 | fl4.flowi4_iif = skb->dev->ifindex; | ||
1709 | fl4.flowi4_mark = skb->mark; | ||
1710 | |||
1711 | rcu_read_lock(); | ||
1712 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) | ||
1713 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); | ||
1714 | else | ||
1715 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | ||
1787 | RT_SCOPE_UNIVERSE); | 1716 | RT_SCOPE_UNIVERSE); |
1717 | rcu_read_unlock(); | ||
1718 | } | ||
1788 | memcpy(addr, &src, 4); | 1719 | memcpy(addr, &src, 4); |
1789 | } | 1720 | } |
1790 | 1721 | ||
1791 | #ifdef CONFIG_NET_CLS_ROUTE | 1722 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1792 | static void set_class_tag(struct rtable *rt, u32 tag) | 1723 | static void set_class_tag(struct rtable *rt, u32 tag) |
1793 | { | 1724 | { |
1794 | if (!(rt->dst.tclassid & 0xFFFF)) | 1725 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1798,46 +1729,107 @@ static void set_class_tag(struct rtable *rt, u32 tag) | |||
1798 | } | 1729 | } |
1799 | #endif | 1730 | #endif |
1800 | 1731 | ||
1801 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1732 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) |
1802 | { | 1733 | { |
1803 | struct fib_info *fi = res->fi; | 1734 | unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); |
1735 | |||
1736 | if (advmss == 0) { | ||
1737 | advmss = max_t(unsigned int, dst->dev->mtu - 40, | ||
1738 | ip_rt_min_advmss); | ||
1739 | if (advmss > 65535 - 40) | ||
1740 | advmss = 65535 - 40; | ||
1741 | } | ||
1742 | return advmss; | ||
1743 | } | ||
1744 | |||
1745 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | ||
1746 | { | ||
1747 | unsigned int mtu = dst->dev->mtu; | ||
1748 | |||
1749 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | ||
1750 | const struct rtable *rt = (const struct rtable *) dst; | ||
1751 | |||
1752 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) | ||
1753 | mtu = 576; | ||
1754 | } | ||
1755 | |||
1756 | if (mtu > IP_MAX_MTU) | ||
1757 | mtu = IP_MAX_MTU; | ||
1758 | |||
1759 | return mtu; | ||
1760 | } | ||
1761 | |||
1762 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | ||
1763 | struct fib_info *fi) | ||
1764 | { | ||
1765 | struct inet_peer *peer; | ||
1766 | int create = 0; | ||
1767 | |||
1768 | /* If a peer entry exists for this destination, we must hook | ||
1769 | * it up in order to get at cached metrics. | ||
1770 | */ | ||
1771 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | ||
1772 | create = 1; | ||
1773 | |||
1774 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | ||
1775 | if (peer) { | ||
1776 | rt->rt_peer_genid = rt_peer_genid(); | ||
1777 | if (inet_metrics_new(peer)) | ||
1778 | memcpy(peer->metrics, fi->fib_metrics, | ||
1779 | sizeof(u32) * RTAX_MAX); | ||
1780 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
1781 | |||
1782 | check_peer_pmtu(&rt->dst, peer); | ||
1783 | if (peer->redirect_learned.a4 && | ||
1784 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1785 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1786 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1787 | } | ||
1788 | } else { | ||
1789 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1790 | rt->fi = fi; | ||
1791 | atomic_inc(&fi->fib_clntref); | ||
1792 | } | ||
1793 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1794 | } | ||
1795 | } | ||
1796 | |||
1797 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | ||
1798 | const struct fib_result *res, | ||
1799 | struct fib_info *fi, u16 type, u32 itag) | ||
1800 | { | ||
1801 | struct dst_entry *dst = &rt->dst; | ||
1804 | 1802 | ||
1805 | if (fi) { | 1803 | if (fi) { |
1806 | if (FIB_RES_GW(*res) && | 1804 | if (FIB_RES_GW(*res) && |
1807 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1805 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1808 | rt->rt_gateway = FIB_RES_GW(*res); | 1806 | rt->rt_gateway = FIB_RES_GW(*res); |
1809 | memcpy(rt->dst.metrics, fi->fib_metrics, | 1807 | rt_init_metrics(rt, fl4, fi); |
1810 | sizeof(rt->dst.metrics)); | 1808 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1811 | if (fi->fib_mtu == 0) { | 1809 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1812 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; | ||
1813 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && | ||
1814 | rt->rt_gateway != rt->rt_dst && | ||
1815 | rt->dst.dev->mtu > 576) | ||
1816 | rt->dst.metrics[RTAX_MTU-1] = 576; | ||
1817 | } | ||
1818 | #ifdef CONFIG_NET_CLS_ROUTE | ||
1819 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | ||
1820 | #endif | 1810 | #endif |
1821 | } else | 1811 | } |
1822 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; | 1812 | |
1823 | 1813 | if (dst_mtu(dst) > IP_MAX_MTU) | |
1824 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) | 1814 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); |
1825 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1815 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1826 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) | 1816 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1827 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | 1817 | |
1828 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) | 1818 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, | ||
1830 | ip_rt_min_advmss); | ||
1831 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) | ||
1832 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | ||
1833 | |||
1834 | #ifdef CONFIG_NET_CLS_ROUTE | ||
1835 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1819 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1836 | set_class_tag(rt, fib_rules_tclass(res)); | 1820 | set_class_tag(rt, fib_rules_tclass(res)); |
1837 | #endif | 1821 | #endif |
1838 | set_class_tag(rt, itag); | 1822 | set_class_tag(rt, itag); |
1839 | #endif | 1823 | #endif |
1840 | rt->rt_type = res->type; | 1824 | } |
1825 | |||
1826 | static struct rtable *rt_dst_alloc(struct net_device *dev, | ||
1827 | bool nopolicy, bool noxfrm) | ||
1828 | { | ||
1829 | return dst_alloc(&ipv4_dst_ops, dev, 1, -1, | ||
1830 | DST_HOST | | ||
1831 | (nopolicy ? DST_NOPOLICY : 0) | | ||
1832 | (noxfrm ? DST_NOXFRM : 0)); | ||
1841 | } | 1833 | } |
1842 | 1834 | ||
1843 | /* called in rcu_read_lock() section */ | 1835 | /* called in rcu_read_lock() section */ |
@@ -1865,42 +1857,38 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1865 | goto e_inval; | 1857 | goto e_inval; |
1866 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1858 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1867 | } else { | 1859 | } else { |
1868 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 1860 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
1869 | &itag, 0); | 1861 | &itag); |
1870 | if (err < 0) | 1862 | if (err < 0) |
1871 | goto e_err; | 1863 | goto e_err; |
1872 | } | 1864 | } |
1873 | rth = dst_alloc(&ipv4_dst_ops); | 1865 | rth = rt_dst_alloc(init_net.loopback_dev, |
1866 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | ||
1874 | if (!rth) | 1867 | if (!rth) |
1875 | goto e_nobufs; | 1868 | goto e_nobufs; |
1876 | 1869 | ||
1870 | #ifdef CONFIG_IP_ROUTE_CLASSID | ||
1871 | rth->dst.tclassid = itag; | ||
1872 | #endif | ||
1877 | rth->dst.output = ip_rt_bug; | 1873 | rth->dst.output = ip_rt_bug; |
1878 | rth->dst.obsolete = -1; | ||
1879 | 1874 | ||
1880 | atomic_set(&rth->dst.__refcnt, 1); | 1875 | rth->rt_key_dst = daddr; |
1881 | rth->dst.flags= DST_HOST; | 1876 | rth->rt_key_src = saddr; |
1882 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 1877 | rth->rt_genid = rt_genid(dev_net(dev)); |
1883 | rth->dst.flags |= DST_NOPOLICY; | 1878 | rth->rt_flags = RTCF_MULTICAST; |
1884 | rth->fl.fl4_dst = daddr; | 1879 | rth->rt_type = RTN_MULTICAST; |
1880 | rth->rt_key_tos = tos; | ||
1885 | rth->rt_dst = daddr; | 1881 | rth->rt_dst = daddr; |
1886 | rth->fl.fl4_tos = tos; | ||
1887 | rth->fl.mark = skb->mark; | ||
1888 | rth->fl.fl4_src = saddr; | ||
1889 | rth->rt_src = saddr; | 1882 | rth->rt_src = saddr; |
1890 | #ifdef CONFIG_NET_CLS_ROUTE | 1883 | rth->rt_route_iif = dev->ifindex; |
1891 | rth->dst.tclassid = itag; | 1884 | rth->rt_iif = dev->ifindex; |
1892 | #endif | 1885 | rth->rt_oif = 0; |
1893 | rth->rt_iif = | 1886 | rth->rt_mark = skb->mark; |
1894 | rth->fl.iif = dev->ifindex; | ||
1895 | rth->dst.dev = init_net.loopback_dev; | ||
1896 | dev_hold(rth->dst.dev); | ||
1897 | rth->idev = in_dev_get(rth->dst.dev); | ||
1898 | rth->fl.oif = 0; | ||
1899 | rth->rt_gateway = daddr; | 1887 | rth->rt_gateway = daddr; |
1900 | rth->rt_spec_dst= spec_dst; | 1888 | rth->rt_spec_dst= spec_dst; |
1901 | rth->rt_genid = rt_genid(dev_net(dev)); | 1889 | rth->rt_peer_genid = 0; |
1902 | rth->rt_flags = RTCF_MULTICAST; | 1890 | rth->peer = NULL; |
1903 | rth->rt_type = RTN_MULTICAST; | 1891 | rth->fi = NULL; |
1904 | if (our) { | 1892 | if (our) { |
1905 | rth->dst.input= ip_local_deliver; | 1893 | rth->dst.input= ip_local_deliver; |
1906 | rth->rt_flags |= RTCF_LOCAL; | 1894 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1913,7 +1901,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1913 | RT_CACHE_STAT_INC(in_slow_mc); | 1901 | RT_CACHE_STAT_INC(in_slow_mc); |
1914 | 1902 | ||
1915 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1903 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1916 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); | 1904 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); |
1905 | return IS_ERR(rth) ? PTR_ERR(rth) : 0; | ||
1917 | 1906 | ||
1918 | e_nobufs: | 1907 | e_nobufs: |
1919 | return -ENOBUFS; | 1908 | return -ENOBUFS; |
@@ -1956,7 +1945,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1956 | 1945 | ||
1957 | /* called in rcu_read_lock() section */ | 1946 | /* called in rcu_read_lock() section */ |
1958 | static int __mkroute_input(struct sk_buff *skb, | 1947 | static int __mkroute_input(struct sk_buff *skb, |
1959 | struct fib_result *res, | 1948 | const struct fib_result *res, |
1960 | struct in_device *in_dev, | 1949 | struct in_device *in_dev, |
1961 | __be32 daddr, __be32 saddr, u32 tos, | 1950 | __be32 daddr, __be32 saddr, u32 tos, |
1962 | struct rtable **result) | 1951 | struct rtable **result) |
@@ -1978,8 +1967,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1978 | } | 1967 | } |
1979 | 1968 | ||
1980 | 1969 | ||
1981 | err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), | 1970 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
1982 | in_dev->dev, &spec_dst, &itag, skb->mark); | 1971 | in_dev->dev, &spec_dst, &itag); |
1983 | if (err < 0) { | 1972 | if (err < 0) { |
1984 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1973 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
1985 | saddr); | 1974 | saddr); |
@@ -2010,42 +1999,36 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2010 | } | 1999 | } |
2011 | } | 2000 | } |
2012 | 2001 | ||
2013 | 2002 | rth = rt_dst_alloc(out_dev->dev, | |
2014 | rth = dst_alloc(&ipv4_dst_ops); | 2003 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
2004 | IN_DEV_CONF_GET(out_dev, NOXFRM)); | ||
2015 | if (!rth) { | 2005 | if (!rth) { |
2016 | err = -ENOBUFS; | 2006 | err = -ENOBUFS; |
2017 | goto cleanup; | 2007 | goto cleanup; |
2018 | } | 2008 | } |
2019 | 2009 | ||
2020 | atomic_set(&rth->dst.__refcnt, 1); | 2010 | rth->rt_key_dst = daddr; |
2021 | rth->dst.flags= DST_HOST; | 2011 | rth->rt_key_src = saddr; |
2022 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2012 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2023 | rth->dst.flags |= DST_NOPOLICY; | 2013 | rth->rt_flags = flags; |
2024 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | 2014 | rth->rt_type = res->type; |
2025 | rth->dst.flags |= DST_NOXFRM; | 2015 | rth->rt_key_tos = tos; |
2026 | rth->fl.fl4_dst = daddr; | ||
2027 | rth->rt_dst = daddr; | 2016 | rth->rt_dst = daddr; |
2028 | rth->fl.fl4_tos = tos; | ||
2029 | rth->fl.mark = skb->mark; | ||
2030 | rth->fl.fl4_src = saddr; | ||
2031 | rth->rt_src = saddr; | 2017 | rth->rt_src = saddr; |
2018 | rth->rt_route_iif = in_dev->dev->ifindex; | ||
2019 | rth->rt_iif = in_dev->dev->ifindex; | ||
2020 | rth->rt_oif = 0; | ||
2021 | rth->rt_mark = skb->mark; | ||
2032 | rth->rt_gateway = daddr; | 2022 | rth->rt_gateway = daddr; |
2033 | rth->rt_iif = | ||
2034 | rth->fl.iif = in_dev->dev->ifindex; | ||
2035 | rth->dst.dev = (out_dev)->dev; | ||
2036 | dev_hold(rth->dst.dev); | ||
2037 | rth->idev = in_dev_get(rth->dst.dev); | ||
2038 | rth->fl.oif = 0; | ||
2039 | rth->rt_spec_dst= spec_dst; | 2023 | rth->rt_spec_dst= spec_dst; |
2024 | rth->rt_peer_genid = 0; | ||
2025 | rth->peer = NULL; | ||
2026 | rth->fi = NULL; | ||
2040 | 2027 | ||
2041 | rth->dst.obsolete = -1; | ||
2042 | rth->dst.input = ip_forward; | 2028 | rth->dst.input = ip_forward; |
2043 | rth->dst.output = ip_output; | 2029 | rth->dst.output = ip_output; |
2044 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | ||
2045 | |||
2046 | rt_set_nexthop(rth, res, itag); | ||
2047 | 2030 | ||
2048 | rth->rt_flags = flags; | 2031 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
2049 | 2032 | ||
2050 | *result = rth; | 2033 | *result = rth; |
2051 | err = 0; | 2034 | err = 0; |
@@ -2055,7 +2038,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2055 | 2038 | ||
2056 | static int ip_mkroute_input(struct sk_buff *skb, | 2039 | static int ip_mkroute_input(struct sk_buff *skb, |
2057 | struct fib_result *res, | 2040 | struct fib_result *res, |
2058 | const struct flowi *fl, | 2041 | const struct flowi4 *fl4, |
2059 | struct in_device *in_dev, | 2042 | struct in_device *in_dev, |
2060 | __be32 daddr, __be32 saddr, u32 tos) | 2043 | __be32 daddr, __be32 saddr, u32 tos) |
2061 | { | 2044 | { |
@@ -2064,8 +2047,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2064 | unsigned hash; | 2047 | unsigned hash; |
2065 | 2048 | ||
2066 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2049 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2067 | if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | 2050 | if (res->fi && res->fi->fib_nhs > 1) |
2068 | fib_select_multipath(fl, res); | 2051 | fib_select_multipath(res); |
2069 | #endif | 2052 | #endif |
2070 | 2053 | ||
2071 | /* create a routing cache entry */ | 2054 | /* create a routing cache entry */ |
@@ -2074,9 +2057,12 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2074 | return err; | 2057 | return err; |
2075 | 2058 | ||
2076 | /* put it into the cache */ | 2059 | /* put it into the cache */ |
2077 | hash = rt_hash(daddr, saddr, fl->iif, | 2060 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, |
2078 | rt_genid(dev_net(rth->dst.dev))); | 2061 | rt_genid(dev_net(rth->dst.dev))); |
2079 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); | 2062 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); |
2063 | if (IS_ERR(rth)) | ||
2064 | return PTR_ERR(rth); | ||
2065 | return 0; | ||
2080 | } | 2066 | } |
2081 | 2067 | ||
2082 | /* | 2068 | /* |
@@ -2087,6 +2073,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2087 | * Such approach solves two big problems: | 2073 | * Such approach solves two big problems: |
2088 | * 1. Not simplex devices are handled properly. | 2074 | * 1. Not simplex devices are handled properly. |
2089 | * 2. IP spoofing attempts are filtered with 100% of guarantee. | 2075 | * 2. IP spoofing attempts are filtered with 100% of guarantee. |
2076 | * called with rcu_read_lock() | ||
2090 | */ | 2077 | */ |
2091 | 2078 | ||
2092 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2079 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2094,21 +2081,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2094 | { | 2081 | { |
2095 | struct fib_result res; | 2082 | struct fib_result res; |
2096 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2083 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2097 | struct flowi fl = { .nl_u = { .ip4_u = | 2084 | struct flowi4 fl4; |
2098 | { .daddr = daddr, | ||
2099 | .saddr = saddr, | ||
2100 | .tos = tos, | ||
2101 | .scope = RT_SCOPE_UNIVERSE, | ||
2102 | } }, | ||
2103 | .mark = skb->mark, | ||
2104 | .iif = dev->ifindex }; | ||
2105 | unsigned flags = 0; | 2085 | unsigned flags = 0; |
2106 | u32 itag = 0; | 2086 | u32 itag = 0; |
2107 | struct rtable * rth; | 2087 | struct rtable * rth; |
2108 | unsigned hash; | 2088 | unsigned hash; |
2109 | __be32 spec_dst; | 2089 | __be32 spec_dst; |
2110 | int err = -EINVAL; | 2090 | int err = -EINVAL; |
2111 | int free_res = 0; | ||
2112 | struct net * net = dev_net(dev); | 2091 | struct net * net = dev_net(dev); |
2113 | 2092 | ||
2114 | /* IP on this device is disabled. */ | 2093 | /* IP on this device is disabled. */ |
@@ -2124,7 +2103,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2124 | ipv4_is_loopback(saddr)) | 2103 | ipv4_is_loopback(saddr)) |
2125 | goto martian_source; | 2104 | goto martian_source; |
2126 | 2105 | ||
2127 | if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) | 2106 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) |
2128 | goto brd_input; | 2107 | goto brd_input; |
2129 | 2108 | ||
2130 | /* Accept zero addresses only to limited broadcast; | 2109 | /* Accept zero addresses only to limited broadcast; |
@@ -2133,19 +2112,25 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2133 | if (ipv4_is_zeronet(saddr)) | 2112 | if (ipv4_is_zeronet(saddr)) |
2134 | goto martian_source; | 2113 | goto martian_source; |
2135 | 2114 | ||
2136 | if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) || | 2115 | if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) |
2137 | ipv4_is_loopback(daddr)) | ||
2138 | goto martian_destination; | 2116 | goto martian_destination; |
2139 | 2117 | ||
2140 | /* | 2118 | /* |
2141 | * Now we are ready to route packet. | 2119 | * Now we are ready to route packet. |
2142 | */ | 2120 | */ |
2143 | if ((err = fib_lookup(net, &fl, &res)) != 0) { | 2121 | fl4.flowi4_oif = 0; |
2122 | fl4.flowi4_iif = dev->ifindex; | ||
2123 | fl4.flowi4_mark = skb->mark; | ||
2124 | fl4.flowi4_tos = tos; | ||
2125 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
2126 | fl4.daddr = daddr; | ||
2127 | fl4.saddr = saddr; | ||
2128 | err = fib_lookup(net, &fl4, &res); | ||
2129 | if (err != 0) { | ||
2144 | if (!IN_DEV_FORWARD(in_dev)) | 2130 | if (!IN_DEV_FORWARD(in_dev)) |
2145 | goto e_hostunreach; | 2131 | goto e_hostunreach; |
2146 | goto no_route; | 2132 | goto no_route; |
2147 | } | 2133 | } |
2148 | free_res = 1; | ||
2149 | 2134 | ||
2150 | RT_CACHE_STAT_INC(in_slow_tot); | 2135 | RT_CACHE_STAT_INC(in_slow_tot); |
2151 | 2136 | ||
@@ -2153,9 +2138,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2153 | goto brd_input; | 2138 | goto brd_input; |
2154 | 2139 | ||
2155 | if (res.type == RTN_LOCAL) { | 2140 | if (res.type == RTN_LOCAL) { |
2156 | err = fib_validate_source(saddr, daddr, tos, | 2141 | err = fib_validate_source(skb, saddr, daddr, tos, |
2157 | net->loopback_dev->ifindex, | 2142 | net->loopback_dev->ifindex, |
2158 | dev, &spec_dst, &itag, skb->mark); | 2143 | dev, &spec_dst, &itag); |
2159 | if (err < 0) | 2144 | if (err < 0) |
2160 | goto martian_source_keep_err; | 2145 | goto martian_source_keep_err; |
2161 | if (err) | 2146 | if (err) |
@@ -2169,10 +2154,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2169 | if (res.type != RTN_UNICAST) | 2154 | if (res.type != RTN_UNICAST) |
2170 | goto martian_destination; | 2155 | goto martian_destination; |
2171 | 2156 | ||
2172 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2157 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); |
2173 | done: | ||
2174 | if (free_res) | ||
2175 | fib_res_put(&res); | ||
2176 | out: return err; | 2158 | out: return err; |
2177 | 2159 | ||
2178 | brd_input: | 2160 | brd_input: |
@@ -2182,8 +2164,8 @@ brd_input: | |||
2182 | if (ipv4_is_zeronet(saddr)) | 2164 | if (ipv4_is_zeronet(saddr)) |
2183 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2165 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
2184 | else { | 2166 | else { |
2185 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2167 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
2186 | &itag, skb->mark); | 2168 | &itag); |
2187 | if (err < 0) | 2169 | if (err < 0) |
2188 | goto martian_source_keep_err; | 2170 | goto martian_source_keep_err; |
2189 | if (err) | 2171 | if (err) |
@@ -2194,45 +2176,48 @@ brd_input: | |||
2194 | RT_CACHE_STAT_INC(in_brd); | 2176 | RT_CACHE_STAT_INC(in_brd); |
2195 | 2177 | ||
2196 | local_input: | 2178 | local_input: |
2197 | rth = dst_alloc(&ipv4_dst_ops); | 2179 | rth = rt_dst_alloc(net->loopback_dev, |
2180 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | ||
2198 | if (!rth) | 2181 | if (!rth) |
2199 | goto e_nobufs; | 2182 | goto e_nobufs; |
2200 | 2183 | ||
2184 | rth->dst.input= ip_local_deliver; | ||
2201 | rth->dst.output= ip_rt_bug; | 2185 | rth->dst.output= ip_rt_bug; |
2202 | rth->dst.obsolete = -1; | 2186 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2203 | rth->rt_genid = rt_genid(net); | 2187 | rth->dst.tclassid = itag; |
2188 | #endif | ||
2204 | 2189 | ||
2205 | atomic_set(&rth->dst.__refcnt, 1); | 2190 | rth->rt_key_dst = daddr; |
2206 | rth->dst.flags= DST_HOST; | 2191 | rth->rt_key_src = saddr; |
2207 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2192 | rth->rt_genid = rt_genid(net); |
2208 | rth->dst.flags |= DST_NOPOLICY; | 2193 | rth->rt_flags = flags|RTCF_LOCAL; |
2209 | rth->fl.fl4_dst = daddr; | 2194 | rth->rt_type = res.type; |
2195 | rth->rt_key_tos = tos; | ||
2210 | rth->rt_dst = daddr; | 2196 | rth->rt_dst = daddr; |
2211 | rth->fl.fl4_tos = tos; | ||
2212 | rth->fl.mark = skb->mark; | ||
2213 | rth->fl.fl4_src = saddr; | ||
2214 | rth->rt_src = saddr; | 2197 | rth->rt_src = saddr; |
2215 | #ifdef CONFIG_NET_CLS_ROUTE | 2198 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2216 | rth->dst.tclassid = itag; | 2199 | rth->dst.tclassid = itag; |
2217 | #endif | 2200 | #endif |
2218 | rth->rt_iif = | 2201 | rth->rt_route_iif = dev->ifindex; |
2219 | rth->fl.iif = dev->ifindex; | 2202 | rth->rt_iif = dev->ifindex; |
2220 | rth->dst.dev = net->loopback_dev; | 2203 | rth->rt_oif = 0; |
2221 | dev_hold(rth->dst.dev); | 2204 | rth->rt_mark = skb->mark; |
2222 | rth->idev = in_dev_get(rth->dst.dev); | ||
2223 | rth->rt_gateway = daddr; | 2205 | rth->rt_gateway = daddr; |
2224 | rth->rt_spec_dst= spec_dst; | 2206 | rth->rt_spec_dst= spec_dst; |
2225 | rth->dst.input= ip_local_deliver; | 2207 | rth->rt_peer_genid = 0; |
2226 | rth->rt_flags = flags|RTCF_LOCAL; | 2208 | rth->peer = NULL; |
2209 | rth->fi = NULL; | ||
2227 | if (res.type == RTN_UNREACHABLE) { | 2210 | if (res.type == RTN_UNREACHABLE) { |
2228 | rth->dst.input= ip_error; | 2211 | rth->dst.input= ip_error; |
2229 | rth->dst.error= -err; | 2212 | rth->dst.error= -err; |
2230 | rth->rt_flags &= ~RTCF_LOCAL; | 2213 | rth->rt_flags &= ~RTCF_LOCAL; |
2231 | } | 2214 | } |
2232 | rth->rt_type = res.type; | 2215 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
2233 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2216 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
2234 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2217 | err = 0; |
2235 | goto done; | 2218 | if (IS_ERR(rth)) |
2219 | err = PTR_ERR(rth); | ||
2220 | goto out; | ||
2236 | 2221 | ||
2237 | no_route: | 2222 | no_route: |
2238 | RT_CACHE_STAT_INC(in_no_route); | 2223 | RT_CACHE_STAT_INC(in_no_route); |
@@ -2255,21 +2240,21 @@ martian_destination: | |||
2255 | 2240 | ||
2256 | e_hostunreach: | 2241 | e_hostunreach: |
2257 | err = -EHOSTUNREACH; | 2242 | err = -EHOSTUNREACH; |
2258 | goto done; | 2243 | goto out; |
2259 | 2244 | ||
2260 | e_inval: | 2245 | e_inval: |
2261 | err = -EINVAL; | 2246 | err = -EINVAL; |
2262 | goto done; | 2247 | goto out; |
2263 | 2248 | ||
2264 | e_nobufs: | 2249 | e_nobufs: |
2265 | err = -ENOBUFS; | 2250 | err = -ENOBUFS; |
2266 | goto done; | 2251 | goto out; |
2267 | 2252 | ||
2268 | martian_source: | 2253 | martian_source: |
2269 | err = -EINVAL; | 2254 | err = -EINVAL; |
2270 | martian_source_keep_err: | 2255 | martian_source_keep_err: |
2271 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2256 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2272 | goto done; | 2257 | goto out; |
2273 | } | 2258 | } |
2274 | 2259 | ||
2275 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2260 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2293,12 +2278,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2293 | 2278 | ||
2294 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2279 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2295 | rth = rcu_dereference(rth->dst.rt_next)) { | 2280 | rth = rcu_dereference(rth->dst.rt_next)) { |
2296 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | | 2281 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
2297 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | | 2282 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2298 | (rth->fl.iif ^ iif) | | 2283 | (rth->rt_iif ^ iif) | |
2299 | rth->fl.oif | | 2284 | rth->rt_oif | |
2300 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2285 | (rth->rt_key_tos ^ tos)) == 0 && |
2301 | rth->fl.mark == skb->mark && | 2286 | rth->rt_mark == skb->mark && |
2302 | net_eq(dev_net(rth->dst.dev), net) && | 2287 | net_eq(dev_net(rth->dst.dev), net) && |
2303 | !rt_is_expired(rth)) { | 2288 | !rt_is_expired(rth)) { |
2304 | if (noref) { | 2289 | if (noref) { |
@@ -2331,8 +2316,8 @@ skip_cache: | |||
2331 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2316 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2332 | 2317 | ||
2333 | if (in_dev) { | 2318 | if (in_dev) { |
2334 | int our = ip_check_mc(in_dev, daddr, saddr, | 2319 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, |
2335 | ip_hdr(skb)->protocol); | 2320 | ip_hdr(skb)->protocol); |
2336 | if (our | 2321 | if (our |
2337 | #ifdef CONFIG_IP_MROUTE | 2322 | #ifdef CONFIG_IP_MROUTE |
2338 | || | 2323 | || |
@@ -2355,108 +2340,95 @@ skip_cache: | |||
2355 | } | 2340 | } |
2356 | EXPORT_SYMBOL(ip_route_input_common); | 2341 | EXPORT_SYMBOL(ip_route_input_common); |
2357 | 2342 | ||
2358 | static int __mkroute_output(struct rtable **result, | 2343 | /* called with rcu_read_lock() */ |
2359 | struct fib_result *res, | 2344 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2360 | const struct flowi *fl, | 2345 | const struct flowi4 *fl4, |
2361 | const struct flowi *oldflp, | 2346 | __be32 orig_daddr, __be32 orig_saddr, |
2362 | struct net_device *dev_out, | 2347 | int orig_oif, struct net_device *dev_out, |
2363 | unsigned flags) | 2348 | unsigned int flags) |
2364 | { | 2349 | { |
2365 | struct rtable *rth; | 2350 | struct fib_info *fi = res->fi; |
2351 | u32 tos = RT_FL_TOS(fl4); | ||
2366 | struct in_device *in_dev; | 2352 | struct in_device *in_dev; |
2367 | u32 tos = RT_FL_TOS(oldflp); | 2353 | u16 type = res->type; |
2368 | int err = 0; | 2354 | struct rtable *rth; |
2369 | 2355 | ||
2370 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) | 2356 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) |
2371 | return -EINVAL; | 2357 | return ERR_PTR(-EINVAL); |
2372 | 2358 | ||
2373 | if (fl->fl4_dst == htonl(0xFFFFFFFF)) | 2359 | if (ipv4_is_lbcast(fl4->daddr)) |
2374 | res->type = RTN_BROADCAST; | 2360 | type = RTN_BROADCAST; |
2375 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2361 | else if (ipv4_is_multicast(fl4->daddr)) |
2376 | res->type = RTN_MULTICAST; | 2362 | type = RTN_MULTICAST; |
2377 | else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst)) | 2363 | else if (ipv4_is_zeronet(fl4->daddr)) |
2378 | return -EINVAL; | 2364 | return ERR_PTR(-EINVAL); |
2379 | 2365 | ||
2380 | if (dev_out->flags & IFF_LOOPBACK) | 2366 | if (dev_out->flags & IFF_LOOPBACK) |
2381 | flags |= RTCF_LOCAL; | 2367 | flags |= RTCF_LOCAL; |
2382 | 2368 | ||
2383 | /* get work reference to inet device */ | 2369 | in_dev = __in_dev_get_rcu(dev_out); |
2384 | in_dev = in_dev_get(dev_out); | ||
2385 | if (!in_dev) | 2370 | if (!in_dev) |
2386 | return -EINVAL; | 2371 | return ERR_PTR(-EINVAL); |
2387 | 2372 | ||
2388 | if (res->type == RTN_BROADCAST) { | 2373 | if (type == RTN_BROADCAST) { |
2389 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2374 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2390 | if (res->fi) { | 2375 | fi = NULL; |
2391 | fib_info_put(res->fi); | 2376 | } else if (type == RTN_MULTICAST) { |
2392 | res->fi = NULL; | 2377 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2393 | } | 2378 | if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, |
2394 | } else if (res->type == RTN_MULTICAST) { | 2379 | fl4->flowi4_proto)) |
2395 | flags |= RTCF_MULTICAST|RTCF_LOCAL; | ||
2396 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | ||
2397 | oldflp->proto)) | ||
2398 | flags &= ~RTCF_LOCAL; | 2380 | flags &= ~RTCF_LOCAL; |
2399 | /* If multicast route do not exist use | 2381 | /* If multicast route do not exist use |
2400 | default one, but do not gateway in this case. | 2382 | * default one, but do not gateway in this case. |
2401 | Yes, it is hack. | 2383 | * Yes, it is hack. |
2402 | */ | 2384 | */ |
2403 | if (res->fi && res->prefixlen < 4) { | 2385 | if (fi && res->prefixlen < 4) |
2404 | fib_info_put(res->fi); | 2386 | fi = NULL; |
2405 | res->fi = NULL; | ||
2406 | } | ||
2407 | } | 2387 | } |
2408 | 2388 | ||
2389 | rth = rt_dst_alloc(dev_out, | ||
2390 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | ||
2391 | IN_DEV_CONF_GET(in_dev, NOXFRM)); | ||
2392 | if (!rth) | ||
2393 | return ERR_PTR(-ENOBUFS); | ||
2409 | 2394 | ||
2410 | rth = dst_alloc(&ipv4_dst_ops); | 2395 | rth->dst.output = ip_output; |
2411 | if (!rth) { | ||
2412 | err = -ENOBUFS; | ||
2413 | goto cleanup; | ||
2414 | } | ||
2415 | 2396 | ||
2416 | atomic_set(&rth->dst.__refcnt, 1); | 2397 | rth->rt_key_dst = orig_daddr; |
2417 | rth->dst.flags= DST_HOST; | 2398 | rth->rt_key_src = orig_saddr; |
2418 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | ||
2419 | rth->dst.flags |= DST_NOXFRM; | ||
2420 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2421 | rth->dst.flags |= DST_NOPOLICY; | ||
2422 | |||
2423 | rth->fl.fl4_dst = oldflp->fl4_dst; | ||
2424 | rth->fl.fl4_tos = tos; | ||
2425 | rth->fl.fl4_src = oldflp->fl4_src; | ||
2426 | rth->fl.oif = oldflp->oif; | ||
2427 | rth->fl.mark = oldflp->mark; | ||
2428 | rth->rt_dst = fl->fl4_dst; | ||
2429 | rth->rt_src = fl->fl4_src; | ||
2430 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | ||
2431 | /* get references to the devices that are to be hold by the routing | ||
2432 | cache entry */ | ||
2433 | rth->dst.dev = dev_out; | ||
2434 | dev_hold(dev_out); | ||
2435 | rth->idev = in_dev_get(dev_out); | ||
2436 | rth->rt_gateway = fl->fl4_dst; | ||
2437 | rth->rt_spec_dst= fl->fl4_src; | ||
2438 | |||
2439 | rth->dst.output=ip_output; | ||
2440 | rth->dst.obsolete = -1; | ||
2441 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2399 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2400 | rth->rt_flags = flags; | ||
2401 | rth->rt_type = type; | ||
2402 | rth->rt_key_tos = tos; | ||
2403 | rth->rt_dst = fl4->daddr; | ||
2404 | rth->rt_src = fl4->saddr; | ||
2405 | rth->rt_route_iif = 0; | ||
2406 | rth->rt_iif = orig_oif ? : dev_out->ifindex; | ||
2407 | rth->rt_oif = orig_oif; | ||
2408 | rth->rt_mark = fl4->flowi4_mark; | ||
2409 | rth->rt_gateway = fl4->daddr; | ||
2410 | rth->rt_spec_dst= fl4->saddr; | ||
2411 | rth->rt_peer_genid = 0; | ||
2412 | rth->peer = NULL; | ||
2413 | rth->fi = NULL; | ||
2442 | 2414 | ||
2443 | RT_CACHE_STAT_INC(out_slow_tot); | 2415 | RT_CACHE_STAT_INC(out_slow_tot); |
2444 | 2416 | ||
2445 | if (flags & RTCF_LOCAL) { | 2417 | if (flags & RTCF_LOCAL) { |
2446 | rth->dst.input = ip_local_deliver; | 2418 | rth->dst.input = ip_local_deliver; |
2447 | rth->rt_spec_dst = fl->fl4_dst; | 2419 | rth->rt_spec_dst = fl4->daddr; |
2448 | } | 2420 | } |
2449 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2421 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2450 | rth->rt_spec_dst = fl->fl4_src; | 2422 | rth->rt_spec_dst = fl4->saddr; |
2451 | if (flags & RTCF_LOCAL && | 2423 | if (flags & RTCF_LOCAL && |
2452 | !(dev_out->flags & IFF_LOOPBACK)) { | 2424 | !(dev_out->flags & IFF_LOOPBACK)) { |
2453 | rth->dst.output = ip_mc_output; | 2425 | rth->dst.output = ip_mc_output; |
2454 | RT_CACHE_STAT_INC(out_slow_mc); | 2426 | RT_CACHE_STAT_INC(out_slow_mc); |
2455 | } | 2427 | } |
2456 | #ifdef CONFIG_IP_MROUTE | 2428 | #ifdef CONFIG_IP_MROUTE |
2457 | if (res->type == RTN_MULTICAST) { | 2429 | if (type == RTN_MULTICAST) { |
2458 | if (IN_DEV_MFORWARD(in_dev) && | 2430 | if (IN_DEV_MFORWARD(in_dev) && |
2459 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2431 | !ipv4_is_local_multicast(fl4->daddr)) { |
2460 | rth->dst.input = ip_mr_input; | 2432 | rth->dst.input = ip_mr_input; |
2461 | rth->dst.output = ip_mc_output; | 2433 | rth->dst.output = ip_mc_output; |
2462 | } | 2434 | } |
@@ -2464,73 +2436,47 @@ static int __mkroute_output(struct rtable **result, | |||
2464 | #endif | 2436 | #endif |
2465 | } | 2437 | } |
2466 | 2438 | ||
2467 | rt_set_nexthop(rth, res, 0); | 2439 | rt_set_nexthop(rth, fl4, res, fi, type, 0); |
2468 | |||
2469 | rth->rt_flags = flags; | ||
2470 | |||
2471 | *result = rth; | ||
2472 | cleanup: | ||
2473 | /* release work reference to inet device */ | ||
2474 | in_dev_put(in_dev); | ||
2475 | |||
2476 | return err; | ||
2477 | } | ||
2478 | |||
2479 | static int ip_mkroute_output(struct rtable **rp, | ||
2480 | struct fib_result *res, | ||
2481 | const struct flowi *fl, | ||
2482 | const struct flowi *oldflp, | ||
2483 | struct net_device *dev_out, | ||
2484 | unsigned flags) | ||
2485 | { | ||
2486 | struct rtable *rth = NULL; | ||
2487 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | ||
2488 | unsigned hash; | ||
2489 | if (err == 0) { | ||
2490 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | ||
2491 | rt_genid(dev_net(dev_out))); | ||
2492 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
2493 | } | ||
2494 | 2440 | ||
2495 | return err; | 2441 | return rth; |
2496 | } | 2442 | } |
2497 | 2443 | ||
2498 | /* | 2444 | /* |
2499 | * Major route resolver routine. | 2445 | * Major route resolver routine. |
2446 | * called with rcu_read_lock(); | ||
2500 | */ | 2447 | */ |
2501 | 2448 | ||
2502 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2449 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) |
2503 | const struct flowi *oldflp) | 2450 | { |
2504 | { | ||
2505 | u32 tos = RT_FL_TOS(oldflp); | ||
2506 | struct flowi fl = { .nl_u = { .ip4_u = | ||
2507 | { .daddr = oldflp->fl4_dst, | ||
2508 | .saddr = oldflp->fl4_src, | ||
2509 | .tos = tos & IPTOS_RT_MASK, | ||
2510 | .scope = ((tos & RTO_ONLINK) ? | ||
2511 | RT_SCOPE_LINK : | ||
2512 | RT_SCOPE_UNIVERSE), | ||
2513 | } }, | ||
2514 | .mark = oldflp->mark, | ||
2515 | .iif = net->loopback_dev->ifindex, | ||
2516 | .oif = oldflp->oif }; | ||
2517 | struct fib_result res; | ||
2518 | unsigned flags = 0; | ||
2519 | struct net_device *dev_out = NULL; | 2451 | struct net_device *dev_out = NULL; |
2520 | int free_res = 0; | 2452 | u32 tos = RT_FL_TOS(fl4); |
2521 | int err; | 2453 | unsigned int flags = 0; |
2522 | 2454 | struct fib_result res; | |
2455 | struct rtable *rth; | ||
2456 | __be32 orig_daddr; | ||
2457 | __be32 orig_saddr; | ||
2458 | int orig_oif; | ||
2523 | 2459 | ||
2524 | res.fi = NULL; | 2460 | res.fi = NULL; |
2525 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2461 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2526 | res.r = NULL; | 2462 | res.r = NULL; |
2527 | #endif | 2463 | #endif |
2528 | 2464 | ||
2529 | if (oldflp->fl4_src) { | 2465 | orig_daddr = fl4->daddr; |
2530 | err = -EINVAL; | 2466 | orig_saddr = fl4->saddr; |
2531 | if (ipv4_is_multicast(oldflp->fl4_src) || | 2467 | orig_oif = fl4->flowi4_oif; |
2532 | ipv4_is_lbcast(oldflp->fl4_src) || | 2468 | |
2533 | ipv4_is_zeronet(oldflp->fl4_src)) | 2469 | fl4->flowi4_iif = net->loopback_dev->ifindex; |
2470 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; | ||
2471 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? | ||
2472 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | ||
2473 | |||
2474 | rcu_read_lock(); | ||
2475 | if (fl4->saddr) { | ||
2476 | rth = ERR_PTR(-EINVAL); | ||
2477 | if (ipv4_is_multicast(fl4->saddr) || | ||
2478 | ipv4_is_lbcast(fl4->saddr) || | ||
2479 | ipv4_is_zeronet(fl4->saddr)) | ||
2534 | goto out; | 2480 | goto out; |
2535 | 2481 | ||
2536 | /* I removed check for oif == dev_out->oif here. | 2482 | /* I removed check for oif == dev_out->oif here. |
@@ -2541,11 +2487,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2541 | of another iface. --ANK | 2487 | of another iface. --ANK |
2542 | */ | 2488 | */ |
2543 | 2489 | ||
2544 | if (oldflp->oif == 0 && | 2490 | if (fl4->flowi4_oif == 0 && |
2545 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2491 | (ipv4_is_multicast(fl4->daddr) || |
2546 | oldflp->fl4_dst == htonl(0xFFFFFFFF))) { | 2492 | ipv4_is_lbcast(fl4->daddr))) { |
2547 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2493 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2548 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2494 | dev_out = __ip_dev_find(net, fl4->saddr, false); |
2549 | if (dev_out == NULL) | 2495 | if (dev_out == NULL) |
2550 | goto out; | 2496 | goto out; |
2551 | 2497 | ||
@@ -2564,67 +2510,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2564 | Luckily, this hack is good workaround. | 2510 | Luckily, this hack is good workaround. |
2565 | */ | 2511 | */ |
2566 | 2512 | ||
2567 | fl.oif = dev_out->ifindex; | 2513 | fl4->flowi4_oif = dev_out->ifindex; |
2568 | goto make_route; | 2514 | goto make_route; |
2569 | } | 2515 | } |
2570 | 2516 | ||
2571 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2517 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2572 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2518 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2573 | dev_out = ip_dev_find(net, oldflp->fl4_src); | 2519 | if (!__ip_dev_find(net, fl4->saddr, false)) |
2574 | if (dev_out == NULL) | ||
2575 | goto out; | 2520 | goto out; |
2576 | dev_put(dev_out); | ||
2577 | dev_out = NULL; | ||
2578 | } | 2521 | } |
2579 | } | 2522 | } |
2580 | 2523 | ||
2581 | 2524 | ||
2582 | if (oldflp->oif) { | 2525 | if (fl4->flowi4_oif) { |
2583 | dev_out = dev_get_by_index(net, oldflp->oif); | 2526 | dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); |
2584 | err = -ENODEV; | 2527 | rth = ERR_PTR(-ENODEV); |
2585 | if (dev_out == NULL) | 2528 | if (dev_out == NULL) |
2586 | goto out; | 2529 | goto out; |
2587 | 2530 | ||
2588 | /* RACE: Check return value of inet_select_addr instead. */ | 2531 | /* RACE: Check return value of inet_select_addr instead. */ |
2589 | if (__in_dev_get_rtnl(dev_out) == NULL) { | 2532 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2590 | dev_put(dev_out); | 2533 | rth = ERR_PTR(-ENETUNREACH); |
2591 | goto out; /* Wrong error code */ | 2534 | goto out; |
2592 | } | 2535 | } |
2593 | 2536 | if (ipv4_is_local_multicast(fl4->daddr) || | |
2594 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2537 | ipv4_is_lbcast(fl4->daddr)) { |
2595 | oldflp->fl4_dst == htonl(0xFFFFFFFF)) { | 2538 | if (!fl4->saddr) |
2596 | if (!fl.fl4_src) | 2539 | fl4->saddr = inet_select_addr(dev_out, 0, |
2597 | fl.fl4_src = inet_select_addr(dev_out, 0, | ||
2598 | RT_SCOPE_LINK); | 2540 | RT_SCOPE_LINK); |
2599 | goto make_route; | 2541 | goto make_route; |
2600 | } | 2542 | } |
2601 | if (!fl.fl4_src) { | 2543 | if (fl4->saddr) { |
2602 | if (ipv4_is_multicast(oldflp->fl4_dst)) | 2544 | if (ipv4_is_multicast(fl4->daddr)) |
2603 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2545 | fl4->saddr = inet_select_addr(dev_out, 0, |
2604 | fl.fl4_scope); | 2546 | fl4->flowi4_scope); |
2605 | else if (!oldflp->fl4_dst) | 2547 | else if (!fl4->daddr) |
2606 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2548 | fl4->saddr = inet_select_addr(dev_out, 0, |
2607 | RT_SCOPE_HOST); | 2549 | RT_SCOPE_HOST); |
2608 | } | 2550 | } |
2609 | } | 2551 | } |
2610 | 2552 | ||
2611 | if (!fl.fl4_dst) { | 2553 | if (!fl4->daddr) { |
2612 | fl.fl4_dst = fl.fl4_src; | 2554 | fl4->daddr = fl4->saddr; |
2613 | if (!fl.fl4_dst) | 2555 | if (!fl4->daddr) |
2614 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2556 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); |
2615 | if (dev_out) | ||
2616 | dev_put(dev_out); | ||
2617 | dev_out = net->loopback_dev; | 2557 | dev_out = net->loopback_dev; |
2618 | dev_hold(dev_out); | 2558 | fl4->flowi4_oif = net->loopback_dev->ifindex; |
2619 | fl.oif = net->loopback_dev->ifindex; | ||
2620 | res.type = RTN_LOCAL; | 2559 | res.type = RTN_LOCAL; |
2621 | flags |= RTCF_LOCAL; | 2560 | flags |= RTCF_LOCAL; |
2622 | goto make_route; | 2561 | goto make_route; |
2623 | } | 2562 | } |
2624 | 2563 | ||
2625 | if (fib_lookup(net, &fl, &res)) { | 2564 | if (fib_lookup(net, fl4, &res)) { |
2626 | res.fi = NULL; | 2565 | res.fi = NULL; |
2627 | if (oldflp->oif) { | 2566 | if (fl4->flowi4_oif) { |
2628 | /* Apparently, routing tables are wrong. Assume, | 2567 | /* Apparently, routing tables are wrong. Assume, |
2629 | that the destination is on link. | 2568 | that the destination is on link. |
2630 | 2569 | ||
@@ -2643,98 +2582,100 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2643 | likely IPv6, but we do not. | 2582 | likely IPv6, but we do not. |
2644 | */ | 2583 | */ |
2645 | 2584 | ||
2646 | if (fl.fl4_src == 0) | 2585 | if (fl4->saddr == 0) |
2647 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2586 | fl4->saddr = inet_select_addr(dev_out, 0, |
2648 | RT_SCOPE_LINK); | 2587 | RT_SCOPE_LINK); |
2649 | res.type = RTN_UNICAST; | 2588 | res.type = RTN_UNICAST; |
2650 | goto make_route; | 2589 | goto make_route; |
2651 | } | 2590 | } |
2652 | if (dev_out) | 2591 | rth = ERR_PTR(-ENETUNREACH); |
2653 | dev_put(dev_out); | ||
2654 | err = -ENETUNREACH; | ||
2655 | goto out; | 2592 | goto out; |
2656 | } | 2593 | } |
2657 | free_res = 1; | ||
2658 | 2594 | ||
2659 | if (res.type == RTN_LOCAL) { | 2595 | if (res.type == RTN_LOCAL) { |
2660 | if (!fl.fl4_src) | 2596 | if (!fl4->saddr) { |
2661 | fl.fl4_src = fl.fl4_dst; | 2597 | if (res.fi->fib_prefsrc) |
2662 | if (dev_out) | 2598 | fl4->saddr = res.fi->fib_prefsrc; |
2663 | dev_put(dev_out); | 2599 | else |
2600 | fl4->saddr = fl4->daddr; | ||
2601 | } | ||
2664 | dev_out = net->loopback_dev; | 2602 | dev_out = net->loopback_dev; |
2665 | dev_hold(dev_out); | 2603 | fl4->flowi4_oif = dev_out->ifindex; |
2666 | fl.oif = dev_out->ifindex; | ||
2667 | if (res.fi) | ||
2668 | fib_info_put(res.fi); | ||
2669 | res.fi = NULL; | 2604 | res.fi = NULL; |
2670 | flags |= RTCF_LOCAL; | 2605 | flags |= RTCF_LOCAL; |
2671 | goto make_route; | 2606 | goto make_route; |
2672 | } | 2607 | } |
2673 | 2608 | ||
2674 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2609 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2675 | if (res.fi->fib_nhs > 1 && fl.oif == 0) | 2610 | if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) |
2676 | fib_select_multipath(&fl, &res); | 2611 | fib_select_multipath(&res); |
2677 | else | 2612 | else |
2678 | #endif | 2613 | #endif |
2679 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | 2614 | if (!res.prefixlen && |
2680 | fib_select_default(net, &fl, &res); | 2615 | res.table->tb_num_default > 1 && |
2616 | res.type == RTN_UNICAST && !fl4->flowi4_oif) | ||
2617 | fib_select_default(&res); | ||
2681 | 2618 | ||
2682 | if (!fl.fl4_src) | 2619 | if (!fl4->saddr) |
2683 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2620 | fl4->saddr = FIB_RES_PREFSRC(net, res); |
2684 | 2621 | ||
2685 | if (dev_out) | ||
2686 | dev_put(dev_out); | ||
2687 | dev_out = FIB_RES_DEV(res); | 2622 | dev_out = FIB_RES_DEV(res); |
2688 | dev_hold(dev_out); | 2623 | fl4->flowi4_oif = dev_out->ifindex; |
2689 | fl.oif = dev_out->ifindex; | ||
2690 | 2624 | ||
2691 | 2625 | ||
2692 | make_route: | 2626 | make_route: |
2693 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2627 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, |
2628 | dev_out, flags); | ||
2629 | if (!IS_ERR(rth)) { | ||
2630 | unsigned int hash; | ||
2694 | 2631 | ||
2632 | hash = rt_hash(orig_daddr, orig_saddr, orig_oif, | ||
2633 | rt_genid(dev_net(dev_out))); | ||
2634 | rth = rt_intern_hash(hash, rth, NULL, orig_oif); | ||
2635 | } | ||
2695 | 2636 | ||
2696 | if (free_res) | 2637 | out: |
2697 | fib_res_put(&res); | 2638 | rcu_read_unlock(); |
2698 | if (dev_out) | 2639 | return rth; |
2699 | dev_put(dev_out); | ||
2700 | out: return err; | ||
2701 | } | 2640 | } |
2702 | 2641 | ||
2703 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2642 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) |
2704 | const struct flowi *flp) | ||
2705 | { | 2643 | { |
2706 | unsigned hash; | ||
2707 | struct rtable *rth; | 2644 | struct rtable *rth; |
2645 | unsigned int hash; | ||
2708 | 2646 | ||
2709 | if (!rt_caching(net)) | 2647 | if (!rt_caching(net)) |
2710 | goto slow_output; | 2648 | goto slow_output; |
2711 | 2649 | ||
2712 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2650 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); |
2713 | 2651 | ||
2714 | rcu_read_lock_bh(); | 2652 | rcu_read_lock_bh(); |
2715 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2653 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2716 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2654 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2717 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2655 | if (rth->rt_key_dst == flp4->daddr && |
2718 | rth->fl.fl4_src == flp->fl4_src && | 2656 | rth->rt_key_src == flp4->saddr && |
2719 | rth->fl.iif == 0 && | 2657 | rt_is_output_route(rth) && |
2720 | rth->fl.oif == flp->oif && | 2658 | rth->rt_oif == flp4->flowi4_oif && |
2721 | rth->fl.mark == flp->mark && | 2659 | rth->rt_mark == flp4->flowi4_mark && |
2722 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2660 | !((rth->rt_key_tos ^ flp4->flowi4_tos) & |
2723 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2661 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2724 | net_eq(dev_net(rth->dst.dev), net) && | 2662 | net_eq(dev_net(rth->dst.dev), net) && |
2725 | !rt_is_expired(rth)) { | 2663 | !rt_is_expired(rth)) { |
2726 | dst_use(&rth->dst, jiffies); | 2664 | dst_use(&rth->dst, jiffies); |
2727 | RT_CACHE_STAT_INC(out_hit); | 2665 | RT_CACHE_STAT_INC(out_hit); |
2728 | rcu_read_unlock_bh(); | 2666 | rcu_read_unlock_bh(); |
2729 | *rp = rth; | 2667 | if (!flp4->saddr) |
2730 | return 0; | 2668 | flp4->saddr = rth->rt_src; |
2669 | if (!flp4->daddr) | ||
2670 | flp4->daddr = rth->rt_dst; | ||
2671 | return rth; | ||
2731 | } | 2672 | } |
2732 | RT_CACHE_STAT_INC(out_hlist_search); | 2673 | RT_CACHE_STAT_INC(out_hlist_search); |
2733 | } | 2674 | } |
2734 | rcu_read_unlock_bh(); | 2675 | rcu_read_unlock_bh(); |
2735 | 2676 | ||
2736 | slow_output: | 2677 | slow_output: |
2737 | return ip_route_output_slow(net, rp, flp); | 2678 | return ip_route_output_slow(net, flp4); |
2738 | } | 2679 | } |
2739 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2680 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2740 | 2681 | ||
@@ -2743,95 +2684,96 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo | |||
2743 | return NULL; | 2684 | return NULL; |
2744 | } | 2685 | } |
2745 | 2686 | ||
2687 | static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) | ||
2688 | { | ||
2689 | return 0; | ||
2690 | } | ||
2691 | |||
2746 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2692 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
2747 | { | 2693 | { |
2748 | } | 2694 | } |
2749 | 2695 | ||
2696 | static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, | ||
2697 | unsigned long old) | ||
2698 | { | ||
2699 | return NULL; | ||
2700 | } | ||
2701 | |||
2750 | static struct dst_ops ipv4_dst_blackhole_ops = { | 2702 | static struct dst_ops ipv4_dst_blackhole_ops = { |
2751 | .family = AF_INET, | 2703 | .family = AF_INET, |
2752 | .protocol = cpu_to_be16(ETH_P_IP), | 2704 | .protocol = cpu_to_be16(ETH_P_IP), |
2753 | .destroy = ipv4_dst_destroy, | 2705 | .destroy = ipv4_dst_destroy, |
2754 | .check = ipv4_blackhole_dst_check, | 2706 | .check = ipv4_blackhole_dst_check, |
2707 | .default_mtu = ipv4_blackhole_default_mtu, | ||
2708 | .default_advmss = ipv4_default_advmss, | ||
2755 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2709 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2756 | .entries = ATOMIC_INIT(0), | 2710 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, |
2757 | }; | 2711 | }; |
2758 | 2712 | ||
2759 | 2713 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | |
2760 | static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2761 | { | 2714 | { |
2762 | struct rtable *ort = *rp; | 2715 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); |
2763 | struct rtable *rt = (struct rtable *) | 2716 | struct rtable *ort = (struct rtable *) dst_orig; |
2764 | dst_alloc(&ipv4_dst_blackhole_ops); | ||
2765 | 2717 | ||
2766 | if (rt) { | 2718 | if (rt) { |
2767 | struct dst_entry *new = &rt->dst; | 2719 | struct dst_entry *new = &rt->dst; |
2768 | 2720 | ||
2769 | atomic_set(&new->__refcnt, 1); | ||
2770 | new->__use = 1; | 2721 | new->__use = 1; |
2771 | new->input = dst_discard; | 2722 | new->input = dst_discard; |
2772 | new->output = dst_discard; | 2723 | new->output = dst_discard; |
2773 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); | 2724 | dst_copy_metrics(new, &ort->dst); |
2774 | 2725 | ||
2775 | new->dev = ort->dst.dev; | 2726 | new->dev = ort->dst.dev; |
2776 | if (new->dev) | 2727 | if (new->dev) |
2777 | dev_hold(new->dev); | 2728 | dev_hold(new->dev); |
2778 | 2729 | ||
2779 | rt->fl = ort->fl; | 2730 | rt->rt_key_dst = ort->rt_key_dst; |
2731 | rt->rt_key_src = ort->rt_key_src; | ||
2732 | rt->rt_key_tos = ort->rt_key_tos; | ||
2733 | rt->rt_route_iif = ort->rt_route_iif; | ||
2734 | rt->rt_iif = ort->rt_iif; | ||
2735 | rt->rt_oif = ort->rt_oif; | ||
2736 | rt->rt_mark = ort->rt_mark; | ||
2780 | 2737 | ||
2781 | rt->idev = ort->idev; | ||
2782 | if (rt->idev) | ||
2783 | in_dev_hold(rt->idev); | ||
2784 | rt->rt_genid = rt_genid(net); | 2738 | rt->rt_genid = rt_genid(net); |
2785 | rt->rt_flags = ort->rt_flags; | 2739 | rt->rt_flags = ort->rt_flags; |
2786 | rt->rt_type = ort->rt_type; | 2740 | rt->rt_type = ort->rt_type; |
2787 | rt->rt_dst = ort->rt_dst; | 2741 | rt->rt_dst = ort->rt_dst; |
2788 | rt->rt_src = ort->rt_src; | 2742 | rt->rt_src = ort->rt_src; |
2789 | rt->rt_iif = ort->rt_iif; | ||
2790 | rt->rt_gateway = ort->rt_gateway; | 2743 | rt->rt_gateway = ort->rt_gateway; |
2791 | rt->rt_spec_dst = ort->rt_spec_dst; | 2744 | rt->rt_spec_dst = ort->rt_spec_dst; |
2792 | rt->peer = ort->peer; | 2745 | rt->peer = ort->peer; |
2793 | if (rt->peer) | 2746 | if (rt->peer) |
2794 | atomic_inc(&rt->peer->refcnt); | 2747 | atomic_inc(&rt->peer->refcnt); |
2748 | rt->fi = ort->fi; | ||
2749 | if (rt->fi) | ||
2750 | atomic_inc(&rt->fi->fib_clntref); | ||
2795 | 2751 | ||
2796 | dst_free(new); | 2752 | dst_free(new); |
2797 | } | 2753 | } |
2798 | 2754 | ||
2799 | dst_release(&(*rp)->dst); | 2755 | dst_release(dst_orig); |
2800 | *rp = rt; | 2756 | |
2801 | return (rt ? 0 : -ENOMEM); | 2757 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); |
2802 | } | 2758 | } |
2803 | 2759 | ||
2804 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | 2760 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
2805 | struct sock *sk, int flags) | 2761 | struct sock *sk) |
2806 | { | 2762 | { |
2807 | int err; | 2763 | struct rtable *rt = __ip_route_output_key(net, flp4); |
2808 | |||
2809 | if ((err = __ip_route_output_key(net, rp, flp)) != 0) | ||
2810 | return err; | ||
2811 | 2764 | ||
2812 | if (flp->proto) { | 2765 | if (IS_ERR(rt)) |
2813 | if (!flp->fl4_src) | 2766 | return rt; |
2814 | flp->fl4_src = (*rp)->rt_src; | ||
2815 | if (!flp->fl4_dst) | ||
2816 | flp->fl4_dst = (*rp)->rt_dst; | ||
2817 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, | ||
2818 | flags ? XFRM_LOOKUP_WAIT : 0); | ||
2819 | if (err == -EREMOTE) | ||
2820 | err = ipv4_dst_blackhole(net, rp, flp); | ||
2821 | 2767 | ||
2822 | return err; | 2768 | if (flp4->flowi4_proto) |
2823 | } | 2769 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
2770 | flowi4_to_flowi(flp4), | ||
2771 | sk, 0); | ||
2824 | 2772 | ||
2825 | return 0; | 2773 | return rt; |
2826 | } | 2774 | } |
2827 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2775 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2828 | 2776 | ||
2829 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2830 | { | ||
2831 | return ip_route_output_flow(net, rp, flp, NULL, 0); | ||
2832 | } | ||
2833 | EXPORT_SYMBOL(ip_route_output_key); | ||
2834 | |||
2835 | static int rt_fill_info(struct net *net, | 2777 | static int rt_fill_info(struct net *net, |
2836 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2778 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
2837 | int nowait, unsigned int flags) | 2779 | int nowait, unsigned int flags) |
@@ -2839,7 +2781,8 @@ static int rt_fill_info(struct net *net, | |||
2839 | struct rtable *rt = skb_rtable(skb); | 2781 | struct rtable *rt = skb_rtable(skb); |
2840 | struct rtmsg *r; | 2782 | struct rtmsg *r; |
2841 | struct nlmsghdr *nlh; | 2783 | struct nlmsghdr *nlh; |
2842 | long expires; | 2784 | long expires = 0; |
2785 | const struct inet_peer *peer = rt->peer; | ||
2843 | u32 id = 0, ts = 0, tsage = 0, error; | 2786 | u32 id = 0, ts = 0, tsage = 0, error; |
2844 | 2787 | ||
2845 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); | 2788 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); |
@@ -2850,7 +2793,7 @@ static int rt_fill_info(struct net *net, | |||
2850 | r->rtm_family = AF_INET; | 2793 | r->rtm_family = AF_INET; |
2851 | r->rtm_dst_len = 32; | 2794 | r->rtm_dst_len = 32; |
2852 | r->rtm_src_len = 0; | 2795 | r->rtm_src_len = 0; |
2853 | r->rtm_tos = rt->fl.fl4_tos; | 2796 | r->rtm_tos = rt->rt_key_tos; |
2854 | r->rtm_table = RT_TABLE_MAIN; | 2797 | r->rtm_table = RT_TABLE_MAIN; |
2855 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2798 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
2856 | r->rtm_type = rt->rt_type; | 2799 | r->rtm_type = rt->rt_type; |
@@ -2862,48 +2805,52 @@ static int rt_fill_info(struct net *net, | |||
2862 | 2805 | ||
2863 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2806 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); |
2864 | 2807 | ||
2865 | if (rt->fl.fl4_src) { | 2808 | if (rt->rt_key_src) { |
2866 | r->rtm_src_len = 32; | 2809 | r->rtm_src_len = 32; |
2867 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2810 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); |
2868 | } | 2811 | } |
2869 | if (rt->dst.dev) | 2812 | if (rt->dst.dev) |
2870 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2813 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2871 | #ifdef CONFIG_NET_CLS_ROUTE | 2814 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2872 | if (rt->dst.tclassid) | 2815 | if (rt->dst.tclassid) |
2873 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2816 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2874 | #endif | 2817 | #endif |
2875 | if (rt->fl.iif) | 2818 | if (rt_is_input_route(rt)) |
2876 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2819 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2877 | else if (rt->rt_src != rt->fl.fl4_src) | 2820 | else if (rt->rt_src != rt->rt_key_src) |
2878 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2821 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
2879 | 2822 | ||
2880 | if (rt->rt_dst != rt->rt_gateway) | 2823 | if (rt->rt_dst != rt->rt_gateway) |
2881 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2824 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
2882 | 2825 | ||
2883 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) | 2826 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2884 | goto nla_put_failure; | 2827 | goto nla_put_failure; |
2885 | 2828 | ||
2886 | if (rt->fl.mark) | 2829 | if (rt->rt_mark) |
2887 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); | 2830 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); |
2888 | 2831 | ||
2889 | error = rt->dst.error; | 2832 | error = rt->dst.error; |
2890 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 2833 | if (peer) { |
2891 | if (rt->peer) { | ||
2892 | inet_peer_refcheck(rt->peer); | 2834 | inet_peer_refcheck(rt->peer); |
2893 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2835 | id = atomic_read(&peer->ip_id_count) & 0xffff; |
2894 | if (rt->peer->tcp_ts_stamp) { | 2836 | if (peer->tcp_ts_stamp) { |
2895 | ts = rt->peer->tcp_ts; | 2837 | ts = peer->tcp_ts; |
2896 | tsage = get_seconds() - rt->peer->tcp_ts_stamp; | 2838 | tsage = get_seconds() - peer->tcp_ts_stamp; |
2897 | } | 2839 | } |
2840 | expires = ACCESS_ONCE(peer->pmtu_expires); | ||
2841 | if (expires) | ||
2842 | expires -= jiffies; | ||
2898 | } | 2843 | } |
2899 | 2844 | ||
2900 | if (rt->fl.iif) { | 2845 | if (rt_is_input_route(rt)) { |
2901 | #ifdef CONFIG_IP_MROUTE | 2846 | #ifdef CONFIG_IP_MROUTE |
2902 | __be32 dst = rt->rt_dst; | 2847 | __be32 dst = rt->rt_dst; |
2903 | 2848 | ||
2904 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && | 2849 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && |
2905 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { | 2850 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
2906 | int err = ipmr_get_route(net, skb, r, nowait); | 2851 | int err = ipmr_get_route(net, skb, |
2852 | rt->rt_src, rt->rt_dst, | ||
2853 | r, nowait); | ||
2907 | if (err <= 0) { | 2854 | if (err <= 0) { |
2908 | if (!nowait) { | 2855 | if (!nowait) { |
2909 | if (err == 0) | 2856 | if (err == 0) |
@@ -2917,7 +2864,7 @@ static int rt_fill_info(struct net *net, | |||
2917 | } | 2864 | } |
2918 | } else | 2865 | } else |
2919 | #endif | 2866 | #endif |
2920 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2867 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); |
2921 | } | 2868 | } |
2922 | 2869 | ||
2923 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2870 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
@@ -2991,18 +2938,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2991 | if (err == 0 && rt->dst.error) | 2938 | if (err == 0 && rt->dst.error) |
2992 | err = -rt->dst.error; | 2939 | err = -rt->dst.error; |
2993 | } else { | 2940 | } else { |
2994 | struct flowi fl = { | 2941 | struct flowi4 fl4 = { |
2995 | .nl_u = { | 2942 | .daddr = dst, |
2996 | .ip4_u = { | 2943 | .saddr = src, |
2997 | .daddr = dst, | 2944 | .flowi4_tos = rtm->rtm_tos, |
2998 | .saddr = src, | 2945 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2999 | .tos = rtm->rtm_tos, | 2946 | .flowi4_mark = mark, |
3000 | }, | ||
3001 | }, | ||
3002 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | ||
3003 | .mark = mark, | ||
3004 | }; | 2947 | }; |
3005 | err = ip_route_output_key(net, &rt, &fl); | 2948 | rt = ip_route_output_key(net, &fl4); |
2949 | |||
2950 | err = 0; | ||
2951 | if (IS_ERR(rt)) | ||
2952 | err = PTR_ERR(rt); | ||
3006 | } | 2953 | } |
3007 | 2954 | ||
3008 | if (err) | 2955 | if (err) |
@@ -3285,6 +3232,8 @@ static __net_init int rt_genid_init(struct net *net) | |||
3285 | { | 3232 | { |
3286 | get_random_bytes(&net->ipv4.rt_genid, | 3233 | get_random_bytes(&net->ipv4.rt_genid, |
3287 | sizeof(net->ipv4.rt_genid)); | 3234 | sizeof(net->ipv4.rt_genid)); |
3235 | get_random_bytes(&net->ipv4.dev_addr_genid, | ||
3236 | sizeof(net->ipv4.dev_addr_genid)); | ||
3288 | return 0; | 3237 | return 0; |
3289 | } | 3238 | } |
3290 | 3239 | ||
@@ -3293,9 +3242,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3293 | }; | 3242 | }; |
3294 | 3243 | ||
3295 | 3244 | ||
3296 | #ifdef CONFIG_NET_CLS_ROUTE | 3245 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3297 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3246 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3298 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3247 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3299 | 3248 | ||
3300 | static __initdata unsigned long rhash_entries; | 3249 | static __initdata unsigned long rhash_entries; |
3301 | static int __init set_rhash_entries(char *str) | 3250 | static int __init set_rhash_entries(char *str) |
@@ -3311,7 +3260,7 @@ int __init ip_rt_init(void) | |||
3311 | { | 3260 | { |
3312 | int rc = 0; | 3261 | int rc = 0; |
3313 | 3262 | ||
3314 | #ifdef CONFIG_NET_CLS_ROUTE | 3263 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3315 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3264 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3316 | if (!ip_rt_acct) | 3265 | if (!ip_rt_acct) |
3317 | panic("IP: failed to allocate ip_rt_acct\n"); | 3266 | panic("IP: failed to allocate ip_rt_acct\n"); |
@@ -3323,6 +3272,12 @@ int __init ip_rt_init(void) | |||
3323 | 3272 | ||
3324 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; | 3273 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; |
3325 | 3274 | ||
3275 | if (dst_entries_init(&ipv4_dst_ops) < 0) | ||
3276 | panic("IP: failed to allocate ipv4_dst_ops counter\n"); | ||
3277 | |||
3278 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) | ||
3279 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | ||
3280 | |||
3326 | rt_hash_table = (struct rt_hash_bucket *) | 3281 | rt_hash_table = (struct rt_hash_bucket *) |
3327 | alloc_large_system_hash("IP route cache", | 3282 | alloc_large_system_hash("IP route cache", |
3328 | sizeof(struct rt_hash_bucket), | 3283 | sizeof(struct rt_hash_bucket), |
@@ -3342,14 +3297,6 @@ int __init ip_rt_init(void) | |||
3342 | devinet_init(); | 3297 | devinet_init(); |
3343 | ip_fib_init(); | 3298 | ip_fib_init(); |
3344 | 3299 | ||
3345 | /* All the timers, started at system startup tend | ||
3346 | to synchronize. Perturb it a bit. | ||
3347 | */ | ||
3348 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3349 | expires_ljiffies = jiffies; | ||
3350 | schedule_delayed_work(&expires_work, | ||
3351 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3352 | |||
3353 | if (ip_rt_proc_init()) | 3300 | if (ip_rt_proc_init()) |
3354 | printk(KERN_ERR "Unable to create route proc files\n"); | 3301 | printk(KERN_ERR "Unable to create route proc files\n"); |
3355 | #ifdef CONFIG_XFRM | 3302 | #ifdef CONFIG_XFRM |