diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 19:29:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 19:29:25 -0400 |
commit | 7a6362800cb7d1d618a697a650c7aaed3eb39320 (patch) | |
tree | 087f9bc6c13ef1fad4b392c5cf9325cd28fa8523 /net/ipv4/route.c | |
parent | 6445ced8670f37cfc2c5e24a9de9b413dbfc788d (diff) | |
parent | ceda86a108671294052cbf51660097b6534672f5 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1480 commits)
bonding: enable netpoll without checking link status
xfrm: Refcount destination entry on xfrm_lookup
net: introduce rx_handler results and logic around that
bonding: get rid of IFF_SLAVE_INACTIVE netdev->priv_flag
bonding: wrap slave state work
net: get rid of multiple bond-related netdevice->priv_flags
bonding: register slave pointer for rx_handler
be2net: Bump up the version number
be2net: Copyright notice change. Update to Emulex instead of ServerEngines
e1000e: fix kconfig for crc32 dependency
netfilter ebtables: fix xt_AUDIT to work with ebtables
xen network backend driver
bonding: Improve syslog message at device creation time
bonding: Call netif_carrier_off after register_netdevice
bonding: Incorrect TX queue offset
net_sched: fix ip_tos2prio
xfrm: fix __xfrm_route_forward()
be2net: Fix UDP packet detected status in RX compl
Phonet: fix aligned-mode pipe socket buffer header reserve
netxen: support for GbE port settings
...
Fix up conflicts in drivers/staging/brcm80211/brcmsmac/wl_mac80211.c
with the staging updates.
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 1181 |
1 files changed, 573 insertions, 608 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ed6603c2f6d..870b5182ddd8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -109,8 +109,8 @@ | |||
109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #define RT_FL_TOS(oldflp) \ | 112 | #define RT_FL_TOS(oldflp4) \ |
113 | ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
114 | 114 | ||
115 | #define IP_MAX_MTU 0xFFF0 | 115 | #define IP_MAX_MTU 0xFFF0 |
116 | 116 | ||
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | |||
131 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
133 | 133 | ||
134 | static struct delayed_work expires_work; | ||
135 | static unsigned long expires_ljiffies; | ||
136 | |||
137 | /* | 134 | /* |
138 | * Interface to generic destination cache. | 135 | * Interface to generic destination cache. |
139 | */ | 136 | */ |
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
152 | { | 149 | { |
153 | } | 150 | } |
154 | 151 | ||
152 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | ||
153 | { | ||
154 | struct rtable *rt = (struct rtable *) dst; | ||
155 | struct inet_peer *peer; | ||
156 | u32 *p = NULL; | ||
157 | |||
158 | if (!rt->peer) | ||
159 | rt_bind_peer(rt, 1); | ||
160 | |||
161 | peer = rt->peer; | ||
162 | if (peer) { | ||
163 | u32 *old_p = __DST_METRICS_PTR(old); | ||
164 | unsigned long prev, new; | ||
165 | |||
166 | p = peer->metrics; | ||
167 | if (inet_metrics_new(peer)) | ||
168 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
169 | |||
170 | new = (unsigned long) p; | ||
171 | prev = cmpxchg(&dst->_metrics, old, new); | ||
172 | |||
173 | if (prev != old) { | ||
174 | p = __DST_METRICS_PTR(prev); | ||
175 | if (prev & DST_METRICS_READ_ONLY) | ||
176 | p = NULL; | ||
177 | } else { | ||
178 | if (rt->fi) { | ||
179 | fib_info_put(rt->fi); | ||
180 | rt->fi = NULL; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return p; | ||
185 | } | ||
186 | |||
155 | static struct dst_ops ipv4_dst_ops = { | 187 | static struct dst_ops ipv4_dst_ops = { |
156 | .family = AF_INET, | 188 | .family = AF_INET, |
157 | .protocol = cpu_to_be16(ETH_P_IP), | 189 | .protocol = cpu_to_be16(ETH_P_IP), |
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
159 | .check = ipv4_dst_check, | 191 | .check = ipv4_dst_check, |
160 | .default_advmss = ipv4_default_advmss, | 192 | .default_advmss = ipv4_default_advmss, |
161 | .default_mtu = ipv4_default_mtu, | 193 | .default_mtu = ipv4_default_mtu, |
194 | .cow_metrics = ipv4_cow_metrics, | ||
162 | .destroy = ipv4_dst_destroy, | 195 | .destroy = ipv4_dst_destroy, |
163 | .ifdown = ipv4_dst_ifdown, | 196 | .ifdown = ipv4_dst_ifdown, |
164 | .negative_advice = ipv4_negative_advice, | 197 | .negative_advice = ipv4_negative_advice, |
@@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
171 | 204 | ||
172 | const __u8 ip_tos2prio[16] = { | 205 | const __u8 ip_tos2prio[16] = { |
173 | TC_PRIO_BESTEFFORT, | 206 | TC_PRIO_BESTEFFORT, |
174 | ECN_OR_COST(FILLER), | 207 | ECN_OR_COST(BESTEFFORT), |
175 | TC_PRIO_BESTEFFORT, | 208 | TC_PRIO_BESTEFFORT, |
176 | ECN_OR_COST(BESTEFFORT), | 209 | ECN_OR_COST(BESTEFFORT), |
177 | TC_PRIO_BULK, | 210 | TC_PRIO_BULK, |
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
391 | dst_metric(&r->dst, RTAX_WINDOW), | 424 | dst_metric(&r->dst, RTAX_WINDOW), |
392 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
393 | dst_metric(&r->dst, RTAX_RTTVAR)), | 426 | dst_metric(&r->dst, RTAX_RTTVAR)), |
394 | r->fl.fl4_tos, | 427 | r->rt_tos, |
395 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
396 | r->dst.hh ? (r->dst.hh->hh_output == | 429 | r->dst.hh ? (r->dst.hh->hh_output == |
397 | dev_queue_xmit) : 0, | 430 | dev_queue_xmit) : 0, |
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
514 | .release = seq_release, | 547 | .release = seq_release, |
515 | }; | 548 | }; |
516 | 549 | ||
517 | #ifdef CONFIG_NET_CLS_ROUTE | 550 | #ifdef CONFIG_IP_ROUTE_CLASSID |
518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 551 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
519 | { | 552 | { |
520 | struct ip_rt_acct *dst, *src; | 553 | struct ip_rt_acct *dst, *src; |
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | if (!pde) | 600 | if (!pde) |
568 | goto err2; | 601 | goto err2; |
569 | 602 | ||
570 | #ifdef CONFIG_NET_CLS_ROUTE | 603 | #ifdef CONFIG_IP_ROUTE_CLASSID |
571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 604 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
572 | if (!pde) | 605 | if (!pde) |
573 | goto err3; | 606 | goto err3; |
574 | #endif | 607 | #endif |
575 | return 0; | 608 | return 0; |
576 | 609 | ||
577 | #ifdef CONFIG_NET_CLS_ROUTE | 610 | #ifdef CONFIG_IP_ROUTE_CLASSID |
578 | err3: | 611 | err3: |
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 612 | remove_proc_entry("rt_cache", net->proc_net_stat); |
580 | #endif | 613 | #endif |
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 621 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 622 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 623 | remove_proc_entry("rt_cache", net->proc_net); |
591 | #ifdef CONFIG_NET_CLS_ROUTE | 624 | #ifdef CONFIG_IP_ROUTE_CLASSID |
592 | remove_proc_entry("rt_acct", net->proc_net); | 625 | remove_proc_entry("rt_acct", net->proc_net); |
593 | #endif | 626 | #endif |
594 | } | 627 | } |
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
632 | static inline int rt_valuable(struct rtable *rth) | 665 | static inline int rt_valuable(struct rtable *rth) |
633 | { | 666 | { |
634 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 667 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
635 | rth->dst.expires; | 668 | (rth->peer && rth->peer->pmtu_expires); |
636 | } | 669 | } |
637 | 670 | ||
638 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 671 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
643 | if (atomic_read(&rth->dst.__refcnt)) | 676 | if (atomic_read(&rth->dst.__refcnt)) |
644 | goto out; | 677 | goto out; |
645 | 678 | ||
646 | ret = 1; | ||
647 | if (rth->dst.expires && | ||
648 | time_after_eq(jiffies, rth->dst.expires)) | ||
649 | goto out; | ||
650 | |||
651 | age = jiffies - rth->dst.lastuse; | 679 | age = jiffies - rth->dst.lastuse; |
652 | ret = 0; | ||
653 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 680 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
654 | (age <= tmo2 && rt_valuable(rth))) | 681 | (age <= tmo2 && rt_valuable(rth))) |
655 | goto out; | 682 | goto out; |
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net) | |||
684 | net->ipv4.sysctl_rt_cache_rebuild_count; | 711 | net->ipv4.sysctl_rt_cache_rebuild_count; |
685 | } | 712 | } |
686 | 713 | ||
687 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 714 | static inline bool compare_hash_inputs(const struct rtable *rt1, |
688 | const struct flowi *fl2) | 715 | const struct rtable *rt2) |
689 | { | 716 | { |
690 | return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 717 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
691 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 718 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
692 | (fl1->iif ^ fl2->iif)) == 0); | 719 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); |
693 | } | 720 | } |
694 | 721 | ||
695 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 722 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
696 | { | 723 | { |
697 | return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
698 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
699 | (fl1->mark ^ fl2->mark) | | 726 | (rt1->rt_mark ^ rt2->rt_mark) | |
700 | (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | | 727 | (rt1->rt_tos ^ rt2->rt_tos) | |
701 | (fl1->oif ^ fl2->oif) | | 728 | (rt1->rt_oif ^ rt2->rt_oif) | |
702 | (fl1->iif ^ fl2->iif)) == 0; | 729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
703 | } | 730 | } |
704 | 731 | ||
705 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 732 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
@@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
786 | const struct rtable *aux = head; | 813 | const struct rtable *aux = head; |
787 | 814 | ||
788 | while (aux != rth) { | 815 | while (aux != rth) { |
789 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 816 | if (compare_hash_inputs(aux, rth)) |
790 | return 0; | 817 | return 0; |
791 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); | 818 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
792 | } | 819 | } |
793 | return ONE; | 820 | return ONE; |
794 | } | 821 | } |
795 | 822 | ||
796 | static void rt_check_expire(void) | ||
797 | { | ||
798 | static unsigned int rover; | ||
799 | unsigned int i = rover, goal; | ||
800 | struct rtable *rth; | ||
801 | struct rtable __rcu **rthp; | ||
802 | unsigned long samples = 0; | ||
803 | unsigned long sum = 0, sum2 = 0; | ||
804 | unsigned long delta; | ||
805 | u64 mult; | ||
806 | |||
807 | delta = jiffies - expires_ljiffies; | ||
808 | expires_ljiffies = jiffies; | ||
809 | mult = ((u64)delta) << rt_hash_log; | ||
810 | if (ip_rt_gc_timeout > 1) | ||
811 | do_div(mult, ip_rt_gc_timeout); | ||
812 | goal = (unsigned int)mult; | ||
813 | if (goal > rt_hash_mask) | ||
814 | goal = rt_hash_mask + 1; | ||
815 | for (; goal > 0; goal--) { | ||
816 | unsigned long tmo = ip_rt_gc_timeout; | ||
817 | unsigned long length; | ||
818 | |||
819 | i = (i + 1) & rt_hash_mask; | ||
820 | rthp = &rt_hash_table[i].chain; | ||
821 | |||
822 | if (need_resched()) | ||
823 | cond_resched(); | ||
824 | |||
825 | samples++; | ||
826 | |||
827 | if (rcu_dereference_raw(*rthp) == NULL) | ||
828 | continue; | ||
829 | length = 0; | ||
830 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
831 | while ((rth = rcu_dereference_protected(*rthp, | ||
832 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
833 | prefetch(rth->dst.rt_next); | ||
834 | if (rt_is_expired(rth)) { | ||
835 | *rthp = rth->dst.rt_next; | ||
836 | rt_free(rth); | ||
837 | continue; | ||
838 | } | ||
839 | if (rth->dst.expires) { | ||
840 | /* Entry is expired even if it is in use */ | ||
841 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
842 | nofree: | ||
843 | tmo >>= 1; | ||
844 | rthp = &rth->dst.rt_next; | ||
845 | /* | ||
846 | * We only count entries on | ||
847 | * a chain with equal hash inputs once | ||
848 | * so that entries for different QOS | ||
849 | * levels, and other non-hash input | ||
850 | * attributes don't unfairly skew | ||
851 | * the length computation | ||
852 | */ | ||
853 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
854 | continue; | ||
855 | } | ||
856 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
857 | goto nofree; | ||
858 | |||
859 | /* Cleanup aged off entries. */ | ||
860 | *rthp = rth->dst.rt_next; | ||
861 | rt_free(rth); | ||
862 | } | ||
863 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
864 | sum += length; | ||
865 | sum2 += length*length; | ||
866 | } | ||
867 | if (samples) { | ||
868 | unsigned long avg = sum / samples; | ||
869 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
870 | rt_chain_length_max = max_t(unsigned long, | ||
871 | ip_rt_gc_elasticity, | ||
872 | (avg + 4*sd) >> FRACT_BITS); | ||
873 | } | ||
874 | rover = i; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * rt_worker_func() is run in process context. | ||
879 | * we call rt_check_expire() to scan part of the hash table | ||
880 | */ | ||
881 | static void rt_worker_func(struct work_struct *work) | ||
882 | { | ||
883 | rt_check_expire(); | ||
884 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
885 | } | ||
886 | |||
887 | /* | 823 | /* |
888 | * Pertubation of rt_genid by a small quantity [1..256] | 824 | * Pertubation of rt_genid by a small quantity [1..256] |
889 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head) | |||
1078 | return length >> FRACT_BITS; | 1014 | return length >> FRACT_BITS; |
1079 | } | 1015 | } |
1080 | 1016 | ||
1081 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1017 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1082 | struct rtable **rp, struct sk_buff *skb, int ifindex) | 1018 | struct sk_buff *skb, int ifindex) |
1083 | { | 1019 | { |
1084 | struct rtable *rth, *cand; | 1020 | struct rtable *rth, *cand; |
1085 | struct rtable __rcu **rthp, **candp; | 1021 | struct rtable __rcu **rthp, **candp; |
@@ -1120,7 +1056,7 @@ restart: | |||
1120 | printk(KERN_WARNING | 1056 | printk(KERN_WARNING |
1121 | "Neighbour table failure & not caching routes.\n"); | 1057 | "Neighbour table failure & not caching routes.\n"); |
1122 | ip_rt_put(rt); | 1058 | ip_rt_put(rt); |
1123 | return err; | 1059 | return ERR_PTR(err); |
1124 | } | 1060 | } |
1125 | } | 1061 | } |
1126 | 1062 | ||
@@ -1137,7 +1073,7 @@ restart: | |||
1137 | rt_free(rth); | 1073 | rt_free(rth); |
1138 | continue; | 1074 | continue; |
1139 | } | 1075 | } |
1140 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1076 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { |
1141 | /* Put it first */ | 1077 | /* Put it first */ |
1142 | *rthp = rth->dst.rt_next; | 1078 | *rthp = rth->dst.rt_next; |
1143 | /* | 1079 | /* |
@@ -1157,11 +1093,9 @@ restart: | |||
1157 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1093 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1158 | 1094 | ||
1159 | rt_drop(rt); | 1095 | rt_drop(rt); |
1160 | if (rp) | 1096 | if (skb) |
1161 | *rp = rth; | ||
1162 | else | ||
1163 | skb_dst_set(skb, &rth->dst); | 1097 | skb_dst_set(skb, &rth->dst); |
1164 | return 0; | 1098 | return rth; |
1165 | } | 1099 | } |
1166 | 1100 | ||
1167 | if (!atomic_read(&rth->dst.__refcnt)) { | 1101 | if (!atomic_read(&rth->dst.__refcnt)) { |
@@ -1202,7 +1136,7 @@ restart: | |||
1202 | rt_emergency_hash_rebuild(net); | 1136 | rt_emergency_hash_rebuild(net); |
1203 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1137 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1204 | 1138 | ||
1205 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1139 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1206 | ifindex, rt_genid(net)); | 1140 | ifindex, rt_genid(net)); |
1207 | goto restart; | 1141 | goto restart; |
1208 | } | 1142 | } |
@@ -1218,7 +1152,7 @@ restart: | |||
1218 | 1152 | ||
1219 | if (err != -ENOBUFS) { | 1153 | if (err != -ENOBUFS) { |
1220 | rt_drop(rt); | 1154 | rt_drop(rt); |
1221 | return err; | 1155 | return ERR_PTR(err); |
1222 | } | 1156 | } |
1223 | 1157 | ||
1224 | /* Neighbour tables are full and nothing | 1158 | /* Neighbour tables are full and nothing |
@@ -1239,7 +1173,7 @@ restart: | |||
1239 | if (net_ratelimit()) | 1173 | if (net_ratelimit()) |
1240 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); | 1174 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
1241 | rt_drop(rt); | 1175 | rt_drop(rt); |
1242 | return -ENOBUFS; | 1176 | return ERR_PTR(-ENOBUFS); |
1243 | } | 1177 | } |
1244 | } | 1178 | } |
1245 | 1179 | ||
@@ -1265,11 +1199,16 @@ restart: | |||
1265 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1199 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1266 | 1200 | ||
1267 | skip_hashing: | 1201 | skip_hashing: |
1268 | if (rp) | 1202 | if (skb) |
1269 | *rp = rt; | ||
1270 | else | ||
1271 | skb_dst_set(skb, &rt->dst); | 1203 | skb_dst_set(skb, &rt->dst); |
1272 | return 0; | 1204 | return rt; |
1205 | } | ||
1206 | |||
1207 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
1208 | |||
1209 | static u32 rt_peer_genid(void) | ||
1210 | { | ||
1211 | return atomic_read(&__rt_peer_genid); | ||
1273 | } | 1212 | } |
1274 | 1213 | ||
1275 | void rt_bind_peer(struct rtable *rt, int create) | 1214 | void rt_bind_peer(struct rtable *rt, int create) |
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
1280 | 1219 | ||
1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1220 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1282 | inet_putpeer(peer); | 1221 | inet_putpeer(peer); |
1222 | else | ||
1223 | rt->rt_peer_genid = rt_peer_genid(); | ||
1283 | } | 1224 | } |
1284 | 1225 | ||
1285 | /* | 1226 | /* |
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1349 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1290 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1350 | __be32 saddr, struct net_device *dev) | 1291 | __be32 saddr, struct net_device *dev) |
1351 | { | 1292 | { |
1352 | int i, k; | ||
1353 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1293 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1354 | struct rtable *rth; | 1294 | struct inet_peer *peer; |
1355 | struct rtable __rcu **rthp; | ||
1356 | __be32 skeys[2] = { saddr, 0 }; | ||
1357 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1358 | struct netevent_redirect netevent; | ||
1359 | struct net *net; | 1295 | struct net *net; |
1360 | 1296 | ||
1361 | if (!in_dev) | 1297 | if (!in_dev) |
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1367 | ipv4_is_zeronet(new_gw)) | 1303 | ipv4_is_zeronet(new_gw)) |
1368 | goto reject_redirect; | 1304 | goto reject_redirect; |
1369 | 1305 | ||
1370 | if (!rt_caching(net)) | ||
1371 | goto reject_redirect; | ||
1372 | |||
1373 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1306 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1374 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1307 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1375 | goto reject_redirect; | 1308 | goto reject_redirect; |
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1380 | goto reject_redirect; | 1313 | goto reject_redirect; |
1381 | } | 1314 | } |
1382 | 1315 | ||
1383 | for (i = 0; i < 2; i++) { | 1316 | peer = inet_getpeer_v4(daddr, 1); |
1384 | for (k = 0; k < 2; k++) { | 1317 | if (peer) { |
1385 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1318 | peer->redirect_learned.a4 = new_gw; |
1386 | rt_genid(net)); | ||
1387 | |||
1388 | rthp = &rt_hash_table[hash].chain; | ||
1389 | |||
1390 | while ((rth = rcu_dereference(*rthp)) != NULL) { | ||
1391 | struct rtable *rt; | ||
1392 | |||
1393 | if (rth->fl.fl4_dst != daddr || | ||
1394 | rth->fl.fl4_src != skeys[i] || | ||
1395 | rth->fl.oif != ikeys[k] || | ||
1396 | rt_is_input_route(rth) || | ||
1397 | rt_is_expired(rth) || | ||
1398 | !net_eq(dev_net(rth->dst.dev), net)) { | ||
1399 | rthp = &rth->dst.rt_next; | ||
1400 | continue; | ||
1401 | } | ||
1402 | |||
1403 | if (rth->rt_dst != daddr || | ||
1404 | rth->rt_src != saddr || | ||
1405 | rth->dst.error || | ||
1406 | rth->rt_gateway != old_gw || | ||
1407 | rth->dst.dev != dev) | ||
1408 | break; | ||
1409 | |||
1410 | dst_hold(&rth->dst); | ||
1411 | |||
1412 | rt = dst_alloc(&ipv4_dst_ops); | ||
1413 | if (rt == NULL) { | ||
1414 | ip_rt_put(rth); | ||
1415 | return; | ||
1416 | } | ||
1417 | |||
1418 | /* Copy all the information. */ | ||
1419 | *rt = *rth; | ||
1420 | rt->dst.__use = 1; | ||
1421 | atomic_set(&rt->dst.__refcnt, 1); | ||
1422 | rt->dst.child = NULL; | ||
1423 | if (rt->dst.dev) | ||
1424 | dev_hold(rt->dst.dev); | ||
1425 | rt->dst.obsolete = -1; | ||
1426 | rt->dst.lastuse = jiffies; | ||
1427 | rt->dst.path = &rt->dst; | ||
1428 | rt->dst.neighbour = NULL; | ||
1429 | rt->dst.hh = NULL; | ||
1430 | #ifdef CONFIG_XFRM | ||
1431 | rt->dst.xfrm = NULL; | ||
1432 | #endif | ||
1433 | rt->rt_genid = rt_genid(net); | ||
1434 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1435 | |||
1436 | /* Gateway is different ... */ | ||
1437 | rt->rt_gateway = new_gw; | ||
1438 | |||
1439 | /* Redirect received -> path was valid */ | ||
1440 | dst_confirm(&rth->dst); | ||
1441 | |||
1442 | if (rt->peer) | ||
1443 | atomic_inc(&rt->peer->refcnt); | ||
1444 | |||
1445 | if (arp_bind_neighbour(&rt->dst) || | ||
1446 | !(rt->dst.neighbour->nud_state & | ||
1447 | NUD_VALID)) { | ||
1448 | if (rt->dst.neighbour) | ||
1449 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1450 | ip_rt_put(rth); | ||
1451 | rt_drop(rt); | ||
1452 | goto do_next; | ||
1453 | } | ||
1454 | 1319 | ||
1455 | netevent.old = &rth->dst; | 1320 | inet_putpeer(peer); |
1456 | netevent.new = &rt->dst; | ||
1457 | call_netevent_notifiers(NETEVENT_REDIRECT, | ||
1458 | &netevent); | ||
1459 | 1321 | ||
1460 | rt_del(hash, rth); | 1322 | atomic_inc(&__rt_peer_genid); |
1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) | ||
1462 | ip_rt_put(rt); | ||
1463 | goto do_next; | ||
1464 | } | ||
1465 | do_next: | ||
1466 | ; | ||
1467 | } | ||
1468 | } | 1323 | } |
1469 | return; | 1324 | return; |
1470 | 1325 | ||
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1488 | if (dst->obsolete > 0) { | 1343 | if (dst->obsolete > 0) { |
1489 | ip_rt_put(rt); | 1344 | ip_rt_put(rt); |
1490 | ret = NULL; | 1345 | ret = NULL; |
1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1346 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1492 | (rt->dst.expires && | 1347 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1493 | time_after_eq(jiffies, rt->dst.expires))) { | 1348 | rt->rt_oif, |
1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
1495 | rt->fl.oif, | ||
1496 | rt_genid(dev_net(dst->dev))); | 1349 | rt_genid(dev_net(dst->dev))); |
1497 | #if RT_CACHE_DEBUG >= 1 | 1350 | #if RT_CACHE_DEBUG >= 1 |
1498 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", | 1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", |
1499 | &rt->rt_dst, rt->fl.fl4_tos); | 1352 | &rt->rt_dst, rt->rt_tos); |
1500 | #endif | 1353 | #endif |
1501 | rt_del(hash, rt); | 1354 | rt_del(hash, rt); |
1502 | ret = NULL; | 1355 | ret = NULL; |
1356 | } else if (rt->peer && | ||
1357 | rt->peer->pmtu_expires && | ||
1358 | time_after_eq(jiffies, rt->peer->pmtu_expires)) { | ||
1359 | unsigned long orig = rt->peer->pmtu_expires; | ||
1360 | |||
1361 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1362 | dst_metric_set(dst, RTAX_MTU, | ||
1363 | rt->peer->pmtu_orig); | ||
1503 | } | 1364 | } |
1504 | } | 1365 | } |
1505 | return ret; | 1366 | return ret; |
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1525 | { | 1386 | { |
1526 | struct rtable *rt = skb_rtable(skb); | 1387 | struct rtable *rt = skb_rtable(skb); |
1527 | struct in_device *in_dev; | 1388 | struct in_device *in_dev; |
1389 | struct inet_peer *peer; | ||
1528 | int log_martians; | 1390 | int log_martians; |
1529 | 1391 | ||
1530 | rcu_read_lock(); | 1392 | rcu_read_lock(); |
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1536 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1398 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1537 | rcu_read_unlock(); | 1399 | rcu_read_unlock(); |
1538 | 1400 | ||
1401 | if (!rt->peer) | ||
1402 | rt_bind_peer(rt, 1); | ||
1403 | peer = rt->peer; | ||
1404 | if (!peer) { | ||
1405 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
1406 | return; | ||
1407 | } | ||
1408 | |||
1539 | /* No redirected packets during ip_rt_redirect_silence; | 1409 | /* No redirected packets during ip_rt_redirect_silence; |
1540 | * reset the algorithm. | 1410 | * reset the algorithm. |
1541 | */ | 1411 | */ |
1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1412 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1543 | rt->dst.rate_tokens = 0; | 1413 | peer->rate_tokens = 0; |
1544 | 1414 | ||
1545 | /* Too many ignored redirects; do not send anything | 1415 | /* Too many ignored redirects; do not send anything |
1546 | * set dst.rate_last to the last seen redirected packet. | 1416 | * set dst.rate_last to the last seen redirected packet. |
1547 | */ | 1417 | */ |
1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1418 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1549 | rt->dst.rate_last = jiffies; | 1419 | peer->rate_last = jiffies; |
1550 | return; | 1420 | return; |
1551 | } | 1421 | } |
1552 | 1422 | ||
1553 | /* Check for load limit; set rate_last to the latest sent | 1423 | /* Check for load limit; set rate_last to the latest sent |
1554 | * redirect. | 1424 | * redirect. |
1555 | */ | 1425 | */ |
1556 | if (rt->dst.rate_tokens == 0 || | 1426 | if (peer->rate_tokens == 0 || |
1557 | time_after(jiffies, | 1427 | time_after(jiffies, |
1558 | (rt->dst.rate_last + | 1428 | (peer->rate_last + |
1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1429 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1430 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1561 | rt->dst.rate_last = jiffies; | 1431 | peer->rate_last = jiffies; |
1562 | ++rt->dst.rate_tokens; | 1432 | ++peer->rate_tokens; |
1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1433 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1564 | if (log_martians && | 1434 | if (log_martians && |
1565 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1435 | peer->rate_tokens == ip_rt_redirect_number && |
1566 | net_ratelimit()) | 1436 | net_ratelimit()) |
1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1437 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1568 | &rt->rt_src, rt->rt_iif, | 1438 | &rt->rt_src, rt->rt_iif, |
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1574 | static int ip_error(struct sk_buff *skb) | 1444 | static int ip_error(struct sk_buff *skb) |
1575 | { | 1445 | { |
1576 | struct rtable *rt = skb_rtable(skb); | 1446 | struct rtable *rt = skb_rtable(skb); |
1447 | struct inet_peer *peer; | ||
1577 | unsigned long now; | 1448 | unsigned long now; |
1449 | bool send; | ||
1578 | int code; | 1450 | int code; |
1579 | 1451 | ||
1580 | switch (rt->dst.error) { | 1452 | switch (rt->dst.error) { |
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb) | |||
1594 | break; | 1466 | break; |
1595 | } | 1467 | } |
1596 | 1468 | ||
1597 | now = jiffies; | 1469 | if (!rt->peer) |
1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1470 | rt_bind_peer(rt, 1); |
1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1471 | peer = rt->peer; |
1600 | rt->dst.rate_tokens = ip_rt_error_burst; | 1472 | |
1601 | rt->dst.rate_last = now; | 1473 | send = true; |
1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1474 | if (peer) { |
1603 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1475 | now = jiffies; |
1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1476 | peer->rate_tokens += now - peer->rate_last; |
1477 | if (peer->rate_tokens > ip_rt_error_burst) | ||
1478 | peer->rate_tokens = ip_rt_error_burst; | ||
1479 | peer->rate_last = now; | ||
1480 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
1481 | peer->rate_tokens -= ip_rt_error_cost; | ||
1482 | else | ||
1483 | send = false; | ||
1605 | } | 1484 | } |
1485 | if (send) | ||
1486 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | ||
1606 | 1487 | ||
1607 | out: kfree_skb(skb); | 1488 | out: kfree_skb(skb); |
1608 | return 0; | 1489 | return 0; |
@@ -1630,88 +1511,142 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1630 | unsigned short new_mtu, | 1511 | unsigned short new_mtu, |
1631 | struct net_device *dev) | 1512 | struct net_device *dev) |
1632 | { | 1513 | { |
1633 | int i, k; | ||
1634 | unsigned short old_mtu = ntohs(iph->tot_len); | 1514 | unsigned short old_mtu = ntohs(iph->tot_len); |
1635 | struct rtable *rth; | ||
1636 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1637 | __be32 skeys[2] = { iph->saddr, 0, }; | ||
1638 | __be32 daddr = iph->daddr; | ||
1639 | unsigned short est_mtu = 0; | 1515 | unsigned short est_mtu = 0; |
1516 | struct inet_peer *peer; | ||
1640 | 1517 | ||
1641 | for (k = 0; k < 2; k++) { | 1518 | peer = inet_getpeer_v4(iph->daddr, 1); |
1642 | for (i = 0; i < 2; i++) { | 1519 | if (peer) { |
1643 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1520 | unsigned short mtu = new_mtu; |
1644 | rt_genid(net)); | ||
1645 | |||
1646 | rcu_read_lock(); | ||
1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | ||
1648 | rth = rcu_dereference(rth->dst.rt_next)) { | ||
1649 | unsigned short mtu = new_mtu; | ||
1650 | |||
1651 | if (rth->fl.fl4_dst != daddr || | ||
1652 | rth->fl.fl4_src != skeys[i] || | ||
1653 | rth->rt_dst != daddr || | ||
1654 | rth->rt_src != iph->saddr || | ||
1655 | rth->fl.oif != ikeys[k] || | ||
1656 | rt_is_input_route(rth) || | ||
1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || | ||
1658 | !net_eq(dev_net(rth->dst.dev), net) || | ||
1659 | rt_is_expired(rth)) | ||
1660 | continue; | ||
1661 | 1521 | ||
1662 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1522 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
1523 | /* BSD 4.2 derived systems incorrectly adjust | ||
1524 | * tot_len by the IP header length, and report | ||
1525 | * a zero MTU in the ICMP message. | ||
1526 | */ | ||
1527 | if (mtu == 0 && | ||
1528 | old_mtu >= 68 + (iph->ihl << 2)) | ||
1529 | old_mtu -= iph->ihl << 2; | ||
1530 | mtu = guess_mtu(old_mtu); | ||
1531 | } | ||
1663 | 1532 | ||
1664 | /* BSD 4.2 compatibility hack :-( */ | 1533 | if (mtu < ip_rt_min_pmtu) |
1665 | if (mtu == 0 && | 1534 | mtu = ip_rt_min_pmtu; |
1666 | old_mtu >= dst_mtu(&rth->dst) && | 1535 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1667 | old_mtu >= 68 + (iph->ihl << 2)) | 1536 | unsigned long pmtu_expires; |
1668 | old_mtu -= iph->ihl << 2; | ||
1669 | 1537 | ||
1670 | mtu = guess_mtu(old_mtu); | 1538 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
1671 | } | 1539 | if (!pmtu_expires) |
1672 | if (mtu <= dst_mtu(&rth->dst)) { | 1540 | pmtu_expires = 1UL; |
1673 | if (mtu < dst_mtu(&rth->dst)) { | 1541 | |
1674 | dst_confirm(&rth->dst); | 1542 | est_mtu = mtu; |
1675 | if (mtu < ip_rt_min_pmtu) { | 1543 | peer->pmtu_learned = mtu; |
1676 | u32 lock = dst_metric(&rth->dst, | 1544 | peer->pmtu_expires = pmtu_expires; |
1677 | RTAX_LOCK); | ||
1678 | mtu = ip_rt_min_pmtu; | ||
1679 | lock |= (1 << RTAX_MTU); | ||
1680 | dst_metric_set(&rth->dst, RTAX_LOCK, | ||
1681 | lock); | ||
1682 | } | ||
1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); | ||
1684 | dst_set_expires(&rth->dst, | ||
1685 | ip_rt_mtu_expires); | ||
1686 | } | ||
1687 | est_mtu = mtu; | ||
1688 | } | ||
1689 | } | ||
1690 | rcu_read_unlock(); | ||
1691 | } | 1545 | } |
1546 | |||
1547 | inet_putpeer(peer); | ||
1548 | |||
1549 | atomic_inc(&__rt_peer_genid); | ||
1692 | } | 1550 | } |
1693 | return est_mtu ? : new_mtu; | 1551 | return est_mtu ? : new_mtu; |
1694 | } | 1552 | } |
1695 | 1553 | ||
1554 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
1555 | { | ||
1556 | unsigned long expires = peer->pmtu_expires; | ||
1557 | |||
1558 | if (time_before(jiffies, expires)) { | ||
1559 | u32 orig_dst_mtu = dst_mtu(dst); | ||
1560 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
1561 | if (!peer->pmtu_orig) | ||
1562 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
1563 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
1564 | } | ||
1565 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
1566 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1567 | } | ||
1568 | |||
1696 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1569 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1697 | { | 1570 | { |
1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1571 | struct rtable *rt = (struct rtable *) dst; |
1699 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1572 | struct inet_peer *peer; |
1700 | if (mtu < ip_rt_min_pmtu) { | 1573 | |
1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | 1574 | dst_confirm(dst); |
1575 | |||
1576 | if (!rt->peer) | ||
1577 | rt_bind_peer(rt, 1); | ||
1578 | peer = rt->peer; | ||
1579 | if (peer) { | ||
1580 | if (mtu < ip_rt_min_pmtu) | ||
1702 | mtu = ip_rt_min_pmtu; | 1581 | mtu = ip_rt_min_pmtu; |
1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); | 1582 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1583 | unsigned long pmtu_expires; | ||
1584 | |||
1585 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1586 | if (!pmtu_expires) | ||
1587 | pmtu_expires = 1UL; | ||
1588 | |||
1589 | peer->pmtu_learned = mtu; | ||
1590 | peer->pmtu_expires = pmtu_expires; | ||
1591 | |||
1592 | atomic_inc(&__rt_peer_genid); | ||
1593 | rt->rt_peer_genid = rt_peer_genid(); | ||
1704 | } | 1594 | } |
1705 | dst_metric_set(dst, RTAX_MTU, mtu); | 1595 | check_peer_pmtu(dst, peer); |
1706 | dst_set_expires(dst, ip_rt_mtu_expires); | 1596 | |
1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1597 | inet_putpeer(peer); |
1598 | } | ||
1599 | } | ||
1600 | |||
1601 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1602 | { | ||
1603 | struct rtable *rt = (struct rtable *) dst; | ||
1604 | __be32 orig_gw = rt->rt_gateway; | ||
1605 | |||
1606 | dst_confirm(&rt->dst); | ||
1607 | |||
1608 | neigh_release(rt->dst.neighbour); | ||
1609 | rt->dst.neighbour = NULL; | ||
1610 | |||
1611 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1612 | if (arp_bind_neighbour(&rt->dst) || | ||
1613 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
1614 | if (rt->dst.neighbour) | ||
1615 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1616 | rt->rt_gateway = orig_gw; | ||
1617 | return -EAGAIN; | ||
1618 | } else { | ||
1619 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1620 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
1621 | rt->dst.neighbour); | ||
1708 | } | 1622 | } |
1623 | return 0; | ||
1709 | } | 1624 | } |
1710 | 1625 | ||
1711 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1626 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1712 | { | 1627 | { |
1713 | if (rt_is_expired((struct rtable *)dst)) | 1628 | struct rtable *rt = (struct rtable *) dst; |
1629 | |||
1630 | if (rt_is_expired(rt)) | ||
1714 | return NULL; | 1631 | return NULL; |
1632 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
1633 | struct inet_peer *peer; | ||
1634 | |||
1635 | if (!rt->peer) | ||
1636 | rt_bind_peer(rt, 0); | ||
1637 | |||
1638 | peer = rt->peer; | ||
1639 | if (peer && peer->pmtu_expires) | ||
1640 | check_peer_pmtu(dst, peer); | ||
1641 | |||
1642 | if (peer && peer->redirect_learned.a4 && | ||
1643 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1644 | if (check_peer_redir(dst, peer)) | ||
1645 | return NULL; | ||
1646 | } | ||
1647 | |||
1648 | rt->rt_peer_genid = rt_peer_genid(); | ||
1649 | } | ||
1715 | return dst; | 1650 | return dst; |
1716 | } | 1651 | } |
1717 | 1652 | ||
@@ -1720,6 +1655,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1720 | struct rtable *rt = (struct rtable *) dst; | 1655 | struct rtable *rt = (struct rtable *) dst; |
1721 | struct inet_peer *peer = rt->peer; | 1656 | struct inet_peer *peer = rt->peer; |
1722 | 1657 | ||
1658 | if (rt->fi) { | ||
1659 | fib_info_put(rt->fi); | ||
1660 | rt->fi = NULL; | ||
1661 | } | ||
1723 | if (peer) { | 1662 | if (peer) { |
1724 | rt->peer = NULL; | 1663 | rt->peer = NULL; |
1725 | inet_putpeer(peer); | 1664 | inet_putpeer(peer); |
@@ -1734,8 +1673,14 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1734 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1673 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1735 | 1674 | ||
1736 | rt = skb_rtable(skb); | 1675 | rt = skb_rtable(skb); |
1737 | if (rt) | 1676 | if (rt && |
1738 | dst_set_expires(&rt->dst, 0); | 1677 | rt->peer && |
1678 | rt->peer->pmtu_expires) { | ||
1679 | unsigned long orig = rt->peer->pmtu_expires; | ||
1680 | |||
1681 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1682 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | ||
1683 | } | ||
1739 | } | 1684 | } |
1740 | 1685 | ||
1741 | static int ip_rt_bug(struct sk_buff *skb) | 1686 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1764,8 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1764 | if (rt_is_output_route(rt)) | 1709 | if (rt_is_output_route(rt)) |
1765 | src = rt->rt_src; | 1710 | src = rt->rt_src; |
1766 | else { | 1711 | else { |
1712 | struct flowi4 fl4 = { | ||
1713 | .daddr = rt->rt_key_dst, | ||
1714 | .saddr = rt->rt_key_src, | ||
1715 | .flowi4_tos = rt->rt_tos, | ||
1716 | .flowi4_oif = rt->rt_oif, | ||
1717 | .flowi4_iif = rt->rt_iif, | ||
1718 | .flowi4_mark = rt->rt_mark, | ||
1719 | }; | ||
1720 | |||
1767 | rcu_read_lock(); | 1721 | rcu_read_lock(); |
1768 | if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) | 1722 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
1769 | src = FIB_RES_PREFSRC(res); | 1723 | src = FIB_RES_PREFSRC(res); |
1770 | else | 1724 | else |
1771 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1725 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
@@ -1775,7 +1729,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1775 | memcpy(addr, &src, 4); | 1729 | memcpy(addr, &src, 4); |
1776 | } | 1730 | } |
1777 | 1731 | ||
1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1732 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1733 | static void set_class_tag(struct rtable *rt, u32 tag) |
1780 | { | 1734 | { |
1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1735 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1815,17 +1769,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | |||
1815 | return mtu; | 1769 | return mtu; |
1816 | } | 1770 | } |
1817 | 1771 | ||
1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1772 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, |
1773 | struct fib_info *fi) | ||
1774 | { | ||
1775 | struct inet_peer *peer; | ||
1776 | int create = 0; | ||
1777 | |||
1778 | /* If a peer entry exists for this destination, we must hook | ||
1779 | * it up in order to get at cached metrics. | ||
1780 | */ | ||
1781 | if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | ||
1782 | create = 1; | ||
1783 | |||
1784 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | ||
1785 | if (peer) { | ||
1786 | rt->rt_peer_genid = rt_peer_genid(); | ||
1787 | if (inet_metrics_new(peer)) | ||
1788 | memcpy(peer->metrics, fi->fib_metrics, | ||
1789 | sizeof(u32) * RTAX_MAX); | ||
1790 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
1791 | |||
1792 | if (peer->pmtu_expires) | ||
1793 | check_peer_pmtu(&rt->dst, peer); | ||
1794 | if (peer->redirect_learned.a4 && | ||
1795 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1796 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1797 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1798 | } | ||
1799 | } else { | ||
1800 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1801 | rt->fi = fi; | ||
1802 | atomic_inc(&fi->fib_clntref); | ||
1803 | } | ||
1804 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1805 | } | ||
1806 | } | ||
1807 | |||
1808 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | ||
1809 | const struct fib_result *res, | ||
1810 | struct fib_info *fi, u16 type, u32 itag) | ||
1819 | { | 1811 | { |
1820 | struct dst_entry *dst = &rt->dst; | 1812 | struct dst_entry *dst = &rt->dst; |
1821 | struct fib_info *fi = res->fi; | ||
1822 | 1813 | ||
1823 | if (fi) { | 1814 | if (fi) { |
1824 | if (FIB_RES_GW(*res) && | 1815 | if (FIB_RES_GW(*res) && |
1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1816 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1817 | rt->rt_gateway = FIB_RES_GW(*res); |
1827 | dst_import_metrics(dst, fi->fib_metrics); | 1818 | rt_init_metrics(rt, oldflp4, fi); |
1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1819 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1820 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1830 | #endif | 1821 | #endif |
1831 | } | 1822 | } |
@@ -1835,13 +1826,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1826 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1827 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1837 | 1828 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1829 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1830 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1831 | set_class_tag(rt, fib_rules_tclass(res)); |
1841 | #endif | 1832 | #endif |
1842 | set_class_tag(rt, itag); | 1833 | set_class_tag(rt, itag); |
1843 | #endif | 1834 | #endif |
1844 | rt->rt_type = res->type; | 1835 | rt->rt_type = type; |
1836 | } | ||
1837 | |||
1838 | static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) | ||
1839 | { | ||
1840 | struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); | ||
1841 | if (rt) { | ||
1842 | rt->dst.obsolete = -1; | ||
1843 | |||
1844 | rt->dst.flags = DST_HOST | | ||
1845 | (nopolicy ? DST_NOPOLICY : 0) | | ||
1846 | (noxfrm ? DST_NOXFRM : 0); | ||
1847 | } | ||
1848 | return rt; | ||
1845 | } | 1849 | } |
1846 | 1850 | ||
1847 | /* called in rcu_read_lock() section */ | 1851 | /* called in rcu_read_lock() section */ |
@@ -1874,31 +1878,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1874 | if (err < 0) | 1878 | if (err < 0) |
1875 | goto e_err; | 1879 | goto e_err; |
1876 | } | 1880 | } |
1877 | rth = dst_alloc(&ipv4_dst_ops); | 1881 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
1878 | if (!rth) | 1882 | if (!rth) |
1879 | goto e_nobufs; | 1883 | goto e_nobufs; |
1880 | 1884 | ||
1881 | rth->dst.output = ip_rt_bug; | 1885 | rth->dst.output = ip_rt_bug; |
1882 | rth->dst.obsolete = -1; | ||
1883 | 1886 | ||
1884 | atomic_set(&rth->dst.__refcnt, 1); | 1887 | rth->rt_key_dst = daddr; |
1885 | rth->dst.flags= DST_HOST; | ||
1886 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
1887 | rth->dst.flags |= DST_NOPOLICY; | ||
1888 | rth->fl.fl4_dst = daddr; | ||
1889 | rth->rt_dst = daddr; | 1888 | rth->rt_dst = daddr; |
1890 | rth->fl.fl4_tos = tos; | 1889 | rth->rt_tos = tos; |
1891 | rth->fl.mark = skb->mark; | 1890 | rth->rt_mark = skb->mark; |
1892 | rth->fl.fl4_src = saddr; | 1891 | rth->rt_key_src = saddr; |
1893 | rth->rt_src = saddr; | 1892 | rth->rt_src = saddr; |
1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1893 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1895 | rth->dst.tclassid = itag; | 1894 | rth->dst.tclassid = itag; |
1896 | #endif | 1895 | #endif |
1897 | rth->rt_iif = | 1896 | rth->rt_iif = dev->ifindex; |
1898 | rth->fl.iif = dev->ifindex; | ||
1899 | rth->dst.dev = init_net.loopback_dev; | 1897 | rth->dst.dev = init_net.loopback_dev; |
1900 | dev_hold(rth->dst.dev); | 1898 | dev_hold(rth->dst.dev); |
1901 | rth->fl.oif = 0; | 1899 | rth->rt_oif = 0; |
1902 | rth->rt_gateway = daddr; | 1900 | rth->rt_gateway = daddr; |
1903 | rth->rt_spec_dst= spec_dst; | 1901 | rth->rt_spec_dst= spec_dst; |
1904 | rth->rt_genid = rt_genid(dev_net(dev)); | 1902 | rth->rt_genid = rt_genid(dev_net(dev)); |
@@ -1916,7 +1914,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1916 | RT_CACHE_STAT_INC(in_slow_mc); | 1914 | RT_CACHE_STAT_INC(in_slow_mc); |
1917 | 1915 | ||
1918 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1916 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1919 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); | 1917 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); |
1918 | err = 0; | ||
1919 | if (IS_ERR(rth)) | ||
1920 | err = PTR_ERR(rth); | ||
1920 | 1921 | ||
1921 | e_nobufs: | 1922 | e_nobufs: |
1922 | return -ENOBUFS; | 1923 | return -ENOBUFS; |
@@ -1959,7 +1960,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1959 | 1960 | ||
1960 | /* called in rcu_read_lock() section */ | 1961 | /* called in rcu_read_lock() section */ |
1961 | static int __mkroute_input(struct sk_buff *skb, | 1962 | static int __mkroute_input(struct sk_buff *skb, |
1962 | struct fib_result *res, | 1963 | const struct fib_result *res, |
1963 | struct in_device *in_dev, | 1964 | struct in_device *in_dev, |
1964 | __be32 daddr, __be32 saddr, u32 tos, | 1965 | __be32 daddr, __be32 saddr, u32 tos, |
1965 | struct rtable **result) | 1966 | struct rtable **result) |
@@ -2013,39 +2014,31 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2013 | } | 2014 | } |
2014 | } | 2015 | } |
2015 | 2016 | ||
2016 | 2017 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2017 | rth = dst_alloc(&ipv4_dst_ops); | 2018 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
2018 | if (!rth) { | 2019 | if (!rth) { |
2019 | err = -ENOBUFS; | 2020 | err = -ENOBUFS; |
2020 | goto cleanup; | 2021 | goto cleanup; |
2021 | } | 2022 | } |
2022 | 2023 | ||
2023 | atomic_set(&rth->dst.__refcnt, 1); | 2024 | rth->rt_key_dst = daddr; |
2024 | rth->dst.flags= DST_HOST; | ||
2025 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2026 | rth->dst.flags |= DST_NOPOLICY; | ||
2027 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | ||
2028 | rth->dst.flags |= DST_NOXFRM; | ||
2029 | rth->fl.fl4_dst = daddr; | ||
2030 | rth->rt_dst = daddr; | 2025 | rth->rt_dst = daddr; |
2031 | rth->fl.fl4_tos = tos; | 2026 | rth->rt_tos = tos; |
2032 | rth->fl.mark = skb->mark; | 2027 | rth->rt_mark = skb->mark; |
2033 | rth->fl.fl4_src = saddr; | 2028 | rth->rt_key_src = saddr; |
2034 | rth->rt_src = saddr; | 2029 | rth->rt_src = saddr; |
2035 | rth->rt_gateway = daddr; | 2030 | rth->rt_gateway = daddr; |
2036 | rth->rt_iif = | 2031 | rth->rt_iif = in_dev->dev->ifindex; |
2037 | rth->fl.iif = in_dev->dev->ifindex; | ||
2038 | rth->dst.dev = (out_dev)->dev; | 2032 | rth->dst.dev = (out_dev)->dev; |
2039 | dev_hold(rth->dst.dev); | 2033 | dev_hold(rth->dst.dev); |
2040 | rth->fl.oif = 0; | 2034 | rth->rt_oif = 0; |
2041 | rth->rt_spec_dst= spec_dst; | 2035 | rth->rt_spec_dst= spec_dst; |
2042 | 2036 | ||
2043 | rth->dst.obsolete = -1; | ||
2044 | rth->dst.input = ip_forward; | 2037 | rth->dst.input = ip_forward; |
2045 | rth->dst.output = ip_output; | 2038 | rth->dst.output = ip_output; |
2046 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 2039 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2047 | 2040 | ||
2048 | rt_set_nexthop(rth, res, itag); | 2041 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
2049 | 2042 | ||
2050 | rth->rt_flags = flags; | 2043 | rth->rt_flags = flags; |
2051 | 2044 | ||
@@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2057 | 2050 | ||
2058 | static int ip_mkroute_input(struct sk_buff *skb, | 2051 | static int ip_mkroute_input(struct sk_buff *skb, |
2059 | struct fib_result *res, | 2052 | struct fib_result *res, |
2060 | const struct flowi *fl, | 2053 | const struct flowi4 *fl4, |
2061 | struct in_device *in_dev, | 2054 | struct in_device *in_dev, |
2062 | __be32 daddr, __be32 saddr, u32 tos) | 2055 | __be32 daddr, __be32 saddr, u32 tos) |
2063 | { | 2056 | { |
@@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2066 | unsigned hash; | 2059 | unsigned hash; |
2067 | 2060 | ||
2068 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2061 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2069 | if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | 2062 | if (res->fi && res->fi->fib_nhs > 1) |
2070 | fib_select_multipath(fl, res); | 2063 | fib_select_multipath(res); |
2071 | #endif | 2064 | #endif |
2072 | 2065 | ||
2073 | /* create a routing cache entry */ | 2066 | /* create a routing cache entry */ |
@@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2076 | return err; | 2069 | return err; |
2077 | 2070 | ||
2078 | /* put it into the cache */ | 2071 | /* put it into the cache */ |
2079 | hash = rt_hash(daddr, saddr, fl->iif, | 2072 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, |
2080 | rt_genid(dev_net(rth->dst.dev))); | 2073 | rt_genid(dev_net(rth->dst.dev))); |
2081 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); | 2074 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); |
2075 | if (IS_ERR(rth)) | ||
2076 | return PTR_ERR(rth); | ||
2077 | return 0; | ||
2082 | } | 2078 | } |
2083 | 2079 | ||
2084 | /* | 2080 | /* |
@@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2097 | { | 2093 | { |
2098 | struct fib_result res; | 2094 | struct fib_result res; |
2099 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2095 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2100 | struct flowi fl = { .fl4_dst = daddr, | 2096 | struct flowi4 fl4; |
2101 | .fl4_src = saddr, | ||
2102 | .fl4_tos = tos, | ||
2103 | .fl4_scope = RT_SCOPE_UNIVERSE, | ||
2104 | .mark = skb->mark, | ||
2105 | .iif = dev->ifindex }; | ||
2106 | unsigned flags = 0; | 2097 | unsigned flags = 0; |
2107 | u32 itag = 0; | 2098 | u32 itag = 0; |
2108 | struct rtable * rth; | 2099 | struct rtable * rth; |
@@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2139 | /* | 2130 | /* |
2140 | * Now we are ready to route packet. | 2131 | * Now we are ready to route packet. |
2141 | */ | 2132 | */ |
2142 | err = fib_lookup(net, &fl, &res); | 2133 | fl4.flowi4_oif = 0; |
2134 | fl4.flowi4_iif = dev->ifindex; | ||
2135 | fl4.flowi4_mark = skb->mark; | ||
2136 | fl4.flowi4_tos = tos; | ||
2137 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
2138 | fl4.daddr = daddr; | ||
2139 | fl4.saddr = saddr; | ||
2140 | err = fib_lookup(net, &fl4, &res); | ||
2143 | if (err != 0) { | 2141 | if (err != 0) { |
2144 | if (!IN_DEV_FORWARD(in_dev)) | 2142 | if (!IN_DEV_FORWARD(in_dev)) |
2145 | goto e_hostunreach; | 2143 | goto e_hostunreach; |
@@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2168 | if (res.type != RTN_UNICAST) | 2166 | if (res.type != RTN_UNICAST) |
2169 | goto martian_destination; | 2167 | goto martian_destination; |
2170 | 2168 | ||
2171 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2169 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); |
2172 | out: return err; | 2170 | out: return err; |
2173 | 2171 | ||
2174 | brd_input: | 2172 | brd_input: |
@@ -2190,29 +2188,23 @@ brd_input: | |||
2190 | RT_CACHE_STAT_INC(in_brd); | 2188 | RT_CACHE_STAT_INC(in_brd); |
2191 | 2189 | ||
2192 | local_input: | 2190 | local_input: |
2193 | rth = dst_alloc(&ipv4_dst_ops); | 2191 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
2194 | if (!rth) | 2192 | if (!rth) |
2195 | goto e_nobufs; | 2193 | goto e_nobufs; |
2196 | 2194 | ||
2197 | rth->dst.output= ip_rt_bug; | 2195 | rth->dst.output= ip_rt_bug; |
2198 | rth->dst.obsolete = -1; | ||
2199 | rth->rt_genid = rt_genid(net); | 2196 | rth->rt_genid = rt_genid(net); |
2200 | 2197 | ||
2201 | atomic_set(&rth->dst.__refcnt, 1); | 2198 | rth->rt_key_dst = daddr; |
2202 | rth->dst.flags= DST_HOST; | ||
2203 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2204 | rth->dst.flags |= DST_NOPOLICY; | ||
2205 | rth->fl.fl4_dst = daddr; | ||
2206 | rth->rt_dst = daddr; | 2199 | rth->rt_dst = daddr; |
2207 | rth->fl.fl4_tos = tos; | 2200 | rth->rt_tos = tos; |
2208 | rth->fl.mark = skb->mark; | 2201 | rth->rt_mark = skb->mark; |
2209 | rth->fl.fl4_src = saddr; | 2202 | rth->rt_key_src = saddr; |
2210 | rth->rt_src = saddr; | 2203 | rth->rt_src = saddr; |
2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2204 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2212 | rth->dst.tclassid = itag; | 2205 | rth->dst.tclassid = itag; |
2213 | #endif | 2206 | #endif |
2214 | rth->rt_iif = | 2207 | rth->rt_iif = dev->ifindex; |
2215 | rth->fl.iif = dev->ifindex; | ||
2216 | rth->dst.dev = net->loopback_dev; | 2208 | rth->dst.dev = net->loopback_dev; |
2217 | dev_hold(rth->dst.dev); | 2209 | dev_hold(rth->dst.dev); |
2218 | rth->rt_gateway = daddr; | 2210 | rth->rt_gateway = daddr; |
@@ -2225,8 +2217,11 @@ local_input: | |||
2225 | rth->rt_flags &= ~RTCF_LOCAL; | 2217 | rth->rt_flags &= ~RTCF_LOCAL; |
2226 | } | 2218 | } |
2227 | rth->rt_type = res.type; | 2219 | rth->rt_type = res.type; |
2228 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2220 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
2229 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2221 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
2222 | err = 0; | ||
2223 | if (IS_ERR(rth)) | ||
2224 | err = PTR_ERR(rth); | ||
2230 | goto out; | 2225 | goto out; |
2231 | 2226 | ||
2232 | no_route: | 2227 | no_route: |
@@ -2288,12 +2283,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2288 | 2283 | ||
2289 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2284 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2290 | rth = rcu_dereference(rth->dst.rt_next)) { | 2285 | rth = rcu_dereference(rth->dst.rt_next)) { |
2291 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | | 2286 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
2292 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | | 2287 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2293 | (rth->fl.iif ^ iif) | | 2288 | (rth->rt_iif ^ iif) | |
2294 | rth->fl.oif | | 2289 | rth->rt_oif | |
2295 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2290 | (rth->rt_tos ^ tos)) == 0 && |
2296 | rth->fl.mark == skb->mark && | 2291 | rth->rt_mark == skb->mark && |
2297 | net_eq(dev_net(rth->dst.dev), net) && | 2292 | net_eq(dev_net(rth->dst.dev), net) && |
2298 | !rt_is_expired(rth)) { | 2293 | !rt_is_expired(rth)) { |
2299 | if (noref) { | 2294 | if (noref) { |
@@ -2326,8 +2321,8 @@ skip_cache: | |||
2326 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2321 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2327 | 2322 | ||
2328 | if (in_dev) { | 2323 | if (in_dev) { |
2329 | int our = ip_check_mc(in_dev, daddr, saddr, | 2324 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, |
2330 | ip_hdr(skb)->protocol); | 2325 | ip_hdr(skb)->protocol); |
2331 | if (our | 2326 | if (our |
2332 | #ifdef CONFIG_IP_MROUTE | 2327 | #ifdef CONFIG_IP_MROUTE |
2333 | || | 2328 | || |
@@ -2351,98 +2346,91 @@ skip_cache: | |||
2351 | EXPORT_SYMBOL(ip_route_input_common); | 2346 | EXPORT_SYMBOL(ip_route_input_common); |
2352 | 2347 | ||
2353 | /* called with rcu_read_lock() */ | 2348 | /* called with rcu_read_lock() */ |
2354 | static int __mkroute_output(struct rtable **result, | 2349 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2355 | struct fib_result *res, | 2350 | const struct flowi4 *fl4, |
2356 | const struct flowi *fl, | 2351 | const struct flowi4 *oldflp4, |
2357 | const struct flowi *oldflp, | 2352 | struct net_device *dev_out, |
2358 | struct net_device *dev_out, | 2353 | unsigned int flags) |
2359 | unsigned flags) | ||
2360 | { | 2354 | { |
2361 | struct rtable *rth; | 2355 | struct fib_info *fi = res->fi; |
2356 | u32 tos = RT_FL_TOS(oldflp4); | ||
2362 | struct in_device *in_dev; | 2357 | struct in_device *in_dev; |
2363 | u32 tos = RT_FL_TOS(oldflp); | 2358 | u16 type = res->type; |
2359 | struct rtable *rth; | ||
2364 | 2360 | ||
2365 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) | 2361 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) |
2366 | return -EINVAL; | 2362 | return ERR_PTR(-EINVAL); |
2367 | 2363 | ||
2368 | if (ipv4_is_lbcast(fl->fl4_dst)) | 2364 | if (ipv4_is_lbcast(fl4->daddr)) |
2369 | res->type = RTN_BROADCAST; | 2365 | type = RTN_BROADCAST; |
2370 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2366 | else if (ipv4_is_multicast(fl4->daddr)) |
2371 | res->type = RTN_MULTICAST; | 2367 | type = RTN_MULTICAST; |
2372 | else if (ipv4_is_zeronet(fl->fl4_dst)) | 2368 | else if (ipv4_is_zeronet(fl4->daddr)) |
2373 | return -EINVAL; | 2369 | return ERR_PTR(-EINVAL); |
2374 | 2370 | ||
2375 | if (dev_out->flags & IFF_LOOPBACK) | 2371 | if (dev_out->flags & IFF_LOOPBACK) |
2376 | flags |= RTCF_LOCAL; | 2372 | flags |= RTCF_LOCAL; |
2377 | 2373 | ||
2378 | in_dev = __in_dev_get_rcu(dev_out); | 2374 | in_dev = __in_dev_get_rcu(dev_out); |
2379 | if (!in_dev) | 2375 | if (!in_dev) |
2380 | return -EINVAL; | 2376 | return ERR_PTR(-EINVAL); |
2381 | 2377 | ||
2382 | if (res->type == RTN_BROADCAST) { | 2378 | if (type == RTN_BROADCAST) { |
2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2379 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2384 | res->fi = NULL; | 2380 | fi = NULL; |
2385 | } else if (res->type == RTN_MULTICAST) { | 2381 | } else if (type == RTN_MULTICAST) { |
2386 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2382 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2387 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2383 | if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, |
2388 | oldflp->proto)) | 2384 | oldflp4->flowi4_proto)) |
2389 | flags &= ~RTCF_LOCAL; | 2385 | flags &= ~RTCF_LOCAL; |
2390 | /* If multicast route do not exist use | 2386 | /* If multicast route do not exist use |
2391 | * default one, but do not gateway in this case. | 2387 | * default one, but do not gateway in this case. |
2392 | * Yes, it is hack. | 2388 | * Yes, it is hack. |
2393 | */ | 2389 | */ |
2394 | if (res->fi && res->prefixlen < 4) | 2390 | if (fi && res->prefixlen < 4) |
2395 | res->fi = NULL; | 2391 | fi = NULL; |
2396 | } | 2392 | } |
2397 | 2393 | ||
2398 | 2394 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2399 | rth = dst_alloc(&ipv4_dst_ops); | 2395 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
2400 | if (!rth) | 2396 | if (!rth) |
2401 | return -ENOBUFS; | 2397 | return ERR_PTR(-ENOBUFS); |
2402 | 2398 | ||
2403 | atomic_set(&rth->dst.__refcnt, 1); | 2399 | rth->rt_key_dst = oldflp4->daddr; |
2404 | rth->dst.flags= DST_HOST; | 2400 | rth->rt_tos = tos; |
2405 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2401 | rth->rt_key_src = oldflp4->saddr; |
2406 | rth->dst.flags |= DST_NOXFRM; | 2402 | rth->rt_oif = oldflp4->flowi4_oif; |
2407 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2403 | rth->rt_mark = oldflp4->flowi4_mark; |
2408 | rth->dst.flags |= DST_NOPOLICY; | 2404 | rth->rt_dst = fl4->daddr; |
2409 | 2405 | rth->rt_src = fl4->saddr; | |
2410 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2406 | rth->rt_iif = 0; |
2411 | rth->fl.fl4_tos = tos; | ||
2412 | rth->fl.fl4_src = oldflp->fl4_src; | ||
2413 | rth->fl.oif = oldflp->oif; | ||
2414 | rth->fl.mark = oldflp->mark; | ||
2415 | rth->rt_dst = fl->fl4_dst; | ||
2416 | rth->rt_src = fl->fl4_src; | ||
2417 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | ||
2418 | /* get references to the devices that are to be hold by the routing | 2407 | /* get references to the devices that are to be hold by the routing |
2419 | cache entry */ | 2408 | cache entry */ |
2420 | rth->dst.dev = dev_out; | 2409 | rth->dst.dev = dev_out; |
2421 | dev_hold(dev_out); | 2410 | dev_hold(dev_out); |
2422 | rth->rt_gateway = fl->fl4_dst; | 2411 | rth->rt_gateway = fl4->daddr; |
2423 | rth->rt_spec_dst= fl->fl4_src; | 2412 | rth->rt_spec_dst= fl4->saddr; |
2424 | 2413 | ||
2425 | rth->dst.output=ip_output; | 2414 | rth->dst.output=ip_output; |
2426 | rth->dst.obsolete = -1; | ||
2427 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2415 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2428 | 2416 | ||
2429 | RT_CACHE_STAT_INC(out_slow_tot); | 2417 | RT_CACHE_STAT_INC(out_slow_tot); |
2430 | 2418 | ||
2431 | if (flags & RTCF_LOCAL) { | 2419 | if (flags & RTCF_LOCAL) { |
2432 | rth->dst.input = ip_local_deliver; | 2420 | rth->dst.input = ip_local_deliver; |
2433 | rth->rt_spec_dst = fl->fl4_dst; | 2421 | rth->rt_spec_dst = fl4->daddr; |
2434 | } | 2422 | } |
2435 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2423 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2436 | rth->rt_spec_dst = fl->fl4_src; | 2424 | rth->rt_spec_dst = fl4->saddr; |
2437 | if (flags & RTCF_LOCAL && | 2425 | if (flags & RTCF_LOCAL && |
2438 | !(dev_out->flags & IFF_LOOPBACK)) { | 2426 | !(dev_out->flags & IFF_LOOPBACK)) { |
2439 | rth->dst.output = ip_mc_output; | 2427 | rth->dst.output = ip_mc_output; |
2440 | RT_CACHE_STAT_INC(out_slow_mc); | 2428 | RT_CACHE_STAT_INC(out_slow_mc); |
2441 | } | 2429 | } |
2442 | #ifdef CONFIG_IP_MROUTE | 2430 | #ifdef CONFIG_IP_MROUTE |
2443 | if (res->type == RTN_MULTICAST) { | 2431 | if (type == RTN_MULTICAST) { |
2444 | if (IN_DEV_MFORWARD(in_dev) && | 2432 | if (IN_DEV_MFORWARD(in_dev) && |
2445 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2433 | !ipv4_is_local_multicast(oldflp4->daddr)) { |
2446 | rth->dst.input = ip_mr_input; | 2434 | rth->dst.input = ip_mr_input; |
2447 | rth->dst.output = ip_mc_output; | 2435 | rth->dst.output = ip_mc_output; |
2448 | } | 2436 | } |
@@ -2450,31 +2438,10 @@ static int __mkroute_output(struct rtable **result, | |||
2450 | #endif | 2438 | #endif |
2451 | } | 2439 | } |
2452 | 2440 | ||
2453 | rt_set_nexthop(rth, res, 0); | 2441 | rt_set_nexthop(rth, oldflp4, res, fi, type, 0); |
2454 | 2442 | ||
2455 | rth->rt_flags = flags; | 2443 | rth->rt_flags = flags; |
2456 | *result = rth; | 2444 | return rth; |
2457 | return 0; | ||
2458 | } | ||
2459 | |||
2460 | /* called with rcu_read_lock() */ | ||
2461 | static int ip_mkroute_output(struct rtable **rp, | ||
2462 | struct fib_result *res, | ||
2463 | const struct flowi *fl, | ||
2464 | const struct flowi *oldflp, | ||
2465 | struct net_device *dev_out, | ||
2466 | unsigned flags) | ||
2467 | { | ||
2468 | struct rtable *rth = NULL; | ||
2469 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | ||
2470 | unsigned hash; | ||
2471 | if (err == 0) { | ||
2472 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | ||
2473 | rt_genid(dev_net(dev_out))); | ||
2474 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
2475 | } | ||
2476 | |||
2477 | return err; | ||
2478 | } | 2445 | } |
2479 | 2446 | ||
2480 | /* | 2447 | /* |
@@ -2482,34 +2449,36 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2482 | * called with rcu_read_lock(); | 2449 | * called with rcu_read_lock(); |
2483 | */ | 2450 | */ |
2484 | 2451 | ||
2485 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2452 | static struct rtable *ip_route_output_slow(struct net *net, |
2486 | const struct flowi *oldflp) | 2453 | const struct flowi4 *oldflp4) |
2487 | { | 2454 | { |
2488 | u32 tos = RT_FL_TOS(oldflp); | 2455 | u32 tos = RT_FL_TOS(oldflp4); |
2489 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, | 2456 | struct flowi4 fl4; |
2490 | .fl4_src = oldflp->fl4_src, | ||
2491 | .fl4_tos = tos & IPTOS_RT_MASK, | ||
2492 | .fl4_scope = ((tos & RTO_ONLINK) ? | ||
2493 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), | ||
2494 | .mark = oldflp->mark, | ||
2495 | .iif = net->loopback_dev->ifindex, | ||
2496 | .oif = oldflp->oif }; | ||
2497 | struct fib_result res; | 2457 | struct fib_result res; |
2498 | unsigned int flags = 0; | 2458 | unsigned int flags = 0; |
2499 | struct net_device *dev_out = NULL; | 2459 | struct net_device *dev_out = NULL; |
2500 | int err; | 2460 | struct rtable *rth; |
2501 | |||
2502 | 2461 | ||
2503 | res.fi = NULL; | 2462 | res.fi = NULL; |
2504 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2463 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2505 | res.r = NULL; | 2464 | res.r = NULL; |
2506 | #endif | 2465 | #endif |
2507 | 2466 | ||
2508 | if (oldflp->fl4_src) { | 2467 | fl4.flowi4_oif = oldflp4->flowi4_oif; |
2509 | err = -EINVAL; | 2468 | fl4.flowi4_iif = net->loopback_dev->ifindex; |
2510 | if (ipv4_is_multicast(oldflp->fl4_src) || | 2469 | fl4.flowi4_mark = oldflp4->flowi4_mark; |
2511 | ipv4_is_lbcast(oldflp->fl4_src) || | 2470 | fl4.daddr = oldflp4->daddr; |
2512 | ipv4_is_zeronet(oldflp->fl4_src)) | 2471 | fl4.saddr = oldflp4->saddr; |
2472 | fl4.flowi4_tos = tos & IPTOS_RT_MASK; | ||
2473 | fl4.flowi4_scope = ((tos & RTO_ONLINK) ? | ||
2474 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | ||
2475 | |||
2476 | rcu_read_lock(); | ||
2477 | if (oldflp4->saddr) { | ||
2478 | rth = ERR_PTR(-EINVAL); | ||
2479 | if (ipv4_is_multicast(oldflp4->saddr) || | ||
2480 | ipv4_is_lbcast(oldflp4->saddr) || | ||
2481 | ipv4_is_zeronet(oldflp4->saddr)) | ||
2513 | goto out; | 2482 | goto out; |
2514 | 2483 | ||
2515 | /* I removed check for oif == dev_out->oif here. | 2484 | /* I removed check for oif == dev_out->oif here. |
@@ -2520,11 +2489,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2520 | of another iface. --ANK | 2489 | of another iface. --ANK |
2521 | */ | 2490 | */ |
2522 | 2491 | ||
2523 | if (oldflp->oif == 0 && | 2492 | if (oldflp4->flowi4_oif == 0 && |
2524 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2493 | (ipv4_is_multicast(oldflp4->daddr) || |
2525 | ipv4_is_lbcast(oldflp->fl4_dst))) { | 2494 | ipv4_is_lbcast(oldflp4->daddr))) { |
2526 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2495 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2527 | dev_out = __ip_dev_find(net, oldflp->fl4_src, false); | 2496 | dev_out = __ip_dev_find(net, oldflp4->saddr, false); |
2528 | if (dev_out == NULL) | 2497 | if (dev_out == NULL) |
2529 | goto out; | 2498 | goto out; |
2530 | 2499 | ||
@@ -2543,60 +2512,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2543 | Luckily, this hack is good workaround. | 2512 | Luckily, this hack is good workaround. |
2544 | */ | 2513 | */ |
2545 | 2514 | ||
2546 | fl.oif = dev_out->ifindex; | 2515 | fl4.flowi4_oif = dev_out->ifindex; |
2547 | goto make_route; | 2516 | goto make_route; |
2548 | } | 2517 | } |
2549 | 2518 | ||
2550 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2519 | if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2551 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2520 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2552 | if (!__ip_dev_find(net, oldflp->fl4_src, false)) | 2521 | if (!__ip_dev_find(net, oldflp4->saddr, false)) |
2553 | goto out; | 2522 | goto out; |
2554 | } | 2523 | } |
2555 | } | 2524 | } |
2556 | 2525 | ||
2557 | 2526 | ||
2558 | if (oldflp->oif) { | 2527 | if (oldflp4->flowi4_oif) { |
2559 | dev_out = dev_get_by_index_rcu(net, oldflp->oif); | 2528 | dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); |
2560 | err = -ENODEV; | 2529 | rth = ERR_PTR(-ENODEV); |
2561 | if (dev_out == NULL) | 2530 | if (dev_out == NULL) |
2562 | goto out; | 2531 | goto out; |
2563 | 2532 | ||
2564 | /* RACE: Check return value of inet_select_addr instead. */ | 2533 | /* RACE: Check return value of inet_select_addr instead. */ |
2565 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { | 2534 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2566 | err = -ENETUNREACH; | 2535 | rth = ERR_PTR(-ENETUNREACH); |
2567 | goto out; | 2536 | goto out; |
2568 | } | 2537 | } |
2569 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2538 | if (ipv4_is_local_multicast(oldflp4->daddr) || |
2570 | ipv4_is_lbcast(oldflp->fl4_dst)) { | 2539 | ipv4_is_lbcast(oldflp4->daddr)) { |
2571 | if (!fl.fl4_src) | 2540 | if (!fl4.saddr) |
2572 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2541 | fl4.saddr = inet_select_addr(dev_out, 0, |
2573 | RT_SCOPE_LINK); | 2542 | RT_SCOPE_LINK); |
2574 | goto make_route; | 2543 | goto make_route; |
2575 | } | 2544 | } |
2576 | if (!fl.fl4_src) { | 2545 | if (!fl4.saddr) { |
2577 | if (ipv4_is_multicast(oldflp->fl4_dst)) | 2546 | if (ipv4_is_multicast(oldflp4->daddr)) |
2578 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2547 | fl4.saddr = inet_select_addr(dev_out, 0, |
2579 | fl.fl4_scope); | 2548 | fl4.flowi4_scope); |
2580 | else if (!oldflp->fl4_dst) | 2549 | else if (!oldflp4->daddr) |
2581 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2550 | fl4.saddr = inet_select_addr(dev_out, 0, |
2582 | RT_SCOPE_HOST); | 2551 | RT_SCOPE_HOST); |
2583 | } | 2552 | } |
2584 | } | 2553 | } |
2585 | 2554 | ||
2586 | if (!fl.fl4_dst) { | 2555 | if (!fl4.daddr) { |
2587 | fl.fl4_dst = fl.fl4_src; | 2556 | fl4.daddr = fl4.saddr; |
2588 | if (!fl.fl4_dst) | 2557 | if (!fl4.daddr) |
2589 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2558 | fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); |
2590 | dev_out = net->loopback_dev; | 2559 | dev_out = net->loopback_dev; |
2591 | fl.oif = net->loopback_dev->ifindex; | 2560 | fl4.flowi4_oif = net->loopback_dev->ifindex; |
2592 | res.type = RTN_LOCAL; | 2561 | res.type = RTN_LOCAL; |
2593 | flags |= RTCF_LOCAL; | 2562 | flags |= RTCF_LOCAL; |
2594 | goto make_route; | 2563 | goto make_route; |
2595 | } | 2564 | } |
2596 | 2565 | ||
2597 | if (fib_lookup(net, &fl, &res)) { | 2566 | if (fib_lookup(net, &fl4, &res)) { |
2598 | res.fi = NULL; | 2567 | res.fi = NULL; |
2599 | if (oldflp->oif) { | 2568 | if (oldflp4->flowi4_oif) { |
2600 | /* Apparently, routing tables are wrong. Assume, | 2569 | /* Apparently, routing tables are wrong. Assume, |
2601 | that the destination is on link. | 2570 | that the destination is on link. |
2602 | 2571 | ||
@@ -2615,90 +2584,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2615 | likely IPv6, but we do not. | 2584 | likely IPv6, but we do not. |
2616 | */ | 2585 | */ |
2617 | 2586 | ||
2618 | if (fl.fl4_src == 0) | 2587 | if (fl4.saddr == 0) |
2619 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2588 | fl4.saddr = inet_select_addr(dev_out, 0, |
2620 | RT_SCOPE_LINK); | 2589 | RT_SCOPE_LINK); |
2621 | res.type = RTN_UNICAST; | 2590 | res.type = RTN_UNICAST; |
2622 | goto make_route; | 2591 | goto make_route; |
2623 | } | 2592 | } |
2624 | err = -ENETUNREACH; | 2593 | rth = ERR_PTR(-ENETUNREACH); |
2625 | goto out; | 2594 | goto out; |
2626 | } | 2595 | } |
2627 | 2596 | ||
2628 | if (res.type == RTN_LOCAL) { | 2597 | if (res.type == RTN_LOCAL) { |
2629 | if (!fl.fl4_src) { | 2598 | if (!fl4.saddr) { |
2630 | if (res.fi->fib_prefsrc) | 2599 | if (res.fi->fib_prefsrc) |
2631 | fl.fl4_src = res.fi->fib_prefsrc; | 2600 | fl4.saddr = res.fi->fib_prefsrc; |
2632 | else | 2601 | else |
2633 | fl.fl4_src = fl.fl4_dst; | 2602 | fl4.saddr = fl4.daddr; |
2634 | } | 2603 | } |
2635 | dev_out = net->loopback_dev; | 2604 | dev_out = net->loopback_dev; |
2636 | fl.oif = dev_out->ifindex; | 2605 | fl4.flowi4_oif = dev_out->ifindex; |
2637 | res.fi = NULL; | 2606 | res.fi = NULL; |
2638 | flags |= RTCF_LOCAL; | 2607 | flags |= RTCF_LOCAL; |
2639 | goto make_route; | 2608 | goto make_route; |
2640 | } | 2609 | } |
2641 | 2610 | ||
2642 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2611 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2643 | if (res.fi->fib_nhs > 1 && fl.oif == 0) | 2612 | if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) |
2644 | fib_select_multipath(&fl, &res); | 2613 | fib_select_multipath(&res); |
2645 | else | 2614 | else |
2646 | #endif | 2615 | #endif |
2647 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | 2616 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) |
2648 | fib_select_default(net, &fl, &res); | 2617 | fib_select_default(&res); |
2649 | 2618 | ||
2650 | if (!fl.fl4_src) | 2619 | if (!fl4.saddr) |
2651 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2620 | fl4.saddr = FIB_RES_PREFSRC(res); |
2652 | 2621 | ||
2653 | dev_out = FIB_RES_DEV(res); | 2622 | dev_out = FIB_RES_DEV(res); |
2654 | fl.oif = dev_out->ifindex; | 2623 | fl4.flowi4_oif = dev_out->ifindex; |
2655 | 2624 | ||
2656 | 2625 | ||
2657 | make_route: | 2626 | make_route: |
2658 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2627 | rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); |
2628 | if (!IS_ERR(rth)) { | ||
2629 | unsigned int hash; | ||
2659 | 2630 | ||
2660 | out: return err; | 2631 | hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, |
2632 | rt_genid(dev_net(dev_out))); | ||
2633 | rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); | ||
2634 | } | ||
2635 | |||
2636 | out: | ||
2637 | rcu_read_unlock(); | ||
2638 | return rth; | ||
2661 | } | 2639 | } |
2662 | 2640 | ||
2663 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2641 | struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) |
2664 | const struct flowi *flp) | ||
2665 | { | 2642 | { |
2666 | unsigned int hash; | ||
2667 | int res; | ||
2668 | struct rtable *rth; | 2643 | struct rtable *rth; |
2644 | unsigned int hash; | ||
2669 | 2645 | ||
2670 | if (!rt_caching(net)) | 2646 | if (!rt_caching(net)) |
2671 | goto slow_output; | 2647 | goto slow_output; |
2672 | 2648 | ||
2673 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2649 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); |
2674 | 2650 | ||
2675 | rcu_read_lock_bh(); | 2651 | rcu_read_lock_bh(); |
2676 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2652 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2677 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2653 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2678 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2654 | if (rth->rt_key_dst == flp4->daddr && |
2679 | rth->fl.fl4_src == flp->fl4_src && | 2655 | rth->rt_key_src == flp4->saddr && |
2680 | rt_is_output_route(rth) && | 2656 | rt_is_output_route(rth) && |
2681 | rth->fl.oif == flp->oif && | 2657 | rth->rt_oif == flp4->flowi4_oif && |
2682 | rth->fl.mark == flp->mark && | 2658 | rth->rt_mark == flp4->flowi4_mark && |
2683 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2659 | !((rth->rt_tos ^ flp4->flowi4_tos) & |
2684 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2660 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2685 | net_eq(dev_net(rth->dst.dev), net) && | 2661 | net_eq(dev_net(rth->dst.dev), net) && |
2686 | !rt_is_expired(rth)) { | 2662 | !rt_is_expired(rth)) { |
2687 | dst_use(&rth->dst, jiffies); | 2663 | dst_use(&rth->dst, jiffies); |
2688 | RT_CACHE_STAT_INC(out_hit); | 2664 | RT_CACHE_STAT_INC(out_hit); |
2689 | rcu_read_unlock_bh(); | 2665 | rcu_read_unlock_bh(); |
2690 | *rp = rth; | 2666 | return rth; |
2691 | return 0; | ||
2692 | } | 2667 | } |
2693 | RT_CACHE_STAT_INC(out_hlist_search); | 2668 | RT_CACHE_STAT_INC(out_hlist_search); |
2694 | } | 2669 | } |
2695 | rcu_read_unlock_bh(); | 2670 | rcu_read_unlock_bh(); |
2696 | 2671 | ||
2697 | slow_output: | 2672 | slow_output: |
2698 | rcu_read_lock(); | 2673 | return ip_route_output_slow(net, flp4); |
2699 | res = ip_route_output_slow(net, rp, flp); | ||
2700 | rcu_read_unlock(); | ||
2701 | return res; | ||
2702 | } | 2674 | } |
2703 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2675 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2704 | 2676 | ||
@@ -2726,17 +2698,14 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2726 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2698 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2727 | }; | 2699 | }; |
2728 | 2700 | ||
2729 | 2701 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | |
2730 | static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2731 | { | 2702 | { |
2732 | struct rtable *ort = *rp; | 2703 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); |
2733 | struct rtable *rt = (struct rtable *) | 2704 | struct rtable *ort = (struct rtable *) dst_orig; |
2734 | dst_alloc(&ipv4_dst_blackhole_ops); | ||
2735 | 2705 | ||
2736 | if (rt) { | 2706 | if (rt) { |
2737 | struct dst_entry *new = &rt->dst; | 2707 | struct dst_entry *new = &rt->dst; |
2738 | 2708 | ||
2739 | atomic_set(&new->__refcnt, 1); | ||
2740 | new->__use = 1; | 2709 | new->__use = 1; |
2741 | new->input = dst_discard; | 2710 | new->input = dst_discard; |
2742 | new->output = dst_discard; | 2711 | new->output = dst_discard; |
@@ -2746,7 +2715,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2746 | if (new->dev) | 2715 | if (new->dev) |
2747 | dev_hold(new->dev); | 2716 | dev_hold(new->dev); |
2748 | 2717 | ||
2749 | rt->fl = ort->fl; | 2718 | rt->rt_key_dst = ort->rt_key_dst; |
2719 | rt->rt_key_src = ort->rt_key_src; | ||
2720 | rt->rt_tos = ort->rt_tos; | ||
2721 | rt->rt_iif = ort->rt_iif; | ||
2722 | rt->rt_oif = ort->rt_oif; | ||
2723 | rt->rt_mark = ort->rt_mark; | ||
2750 | 2724 | ||
2751 | rt->rt_genid = rt_genid(net); | 2725 | rt->rt_genid = rt_genid(net); |
2752 | rt->rt_flags = ort->rt_flags; | 2726 | rt->rt_flags = ort->rt_flags; |
@@ -2759,46 +2733,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2759 | rt->peer = ort->peer; | 2733 | rt->peer = ort->peer; |
2760 | if (rt->peer) | 2734 | if (rt->peer) |
2761 | atomic_inc(&rt->peer->refcnt); | 2735 | atomic_inc(&rt->peer->refcnt); |
2736 | rt->fi = ort->fi; | ||
2737 | if (rt->fi) | ||
2738 | atomic_inc(&rt->fi->fib_clntref); | ||
2762 | 2739 | ||
2763 | dst_free(new); | 2740 | dst_free(new); |
2764 | } | 2741 | } |
2765 | 2742 | ||
2766 | dst_release(&(*rp)->dst); | 2743 | dst_release(dst_orig); |
2767 | *rp = rt; | 2744 | |
2768 | return rt ? 0 : -ENOMEM; | 2745 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); |
2769 | } | 2746 | } |
2770 | 2747 | ||
2771 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | 2748 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
2772 | struct sock *sk, int flags) | 2749 | struct sock *sk) |
2773 | { | 2750 | { |
2774 | int err; | 2751 | struct rtable *rt = __ip_route_output_key(net, flp4); |
2775 | 2752 | ||
2776 | if ((err = __ip_route_output_key(net, rp, flp)) != 0) | 2753 | if (IS_ERR(rt)) |
2777 | return err; | 2754 | return rt; |
2778 | 2755 | ||
2779 | if (flp->proto) { | 2756 | if (flp4->flowi4_proto) { |
2780 | if (!flp->fl4_src) | 2757 | if (!flp4->saddr) |
2781 | flp->fl4_src = (*rp)->rt_src; | 2758 | flp4->saddr = rt->rt_src; |
2782 | if (!flp->fl4_dst) | 2759 | if (!flp4->daddr) |
2783 | flp->fl4_dst = (*rp)->rt_dst; | 2760 | flp4->daddr = rt->rt_dst; |
2784 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, | 2761 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
2785 | flags ? XFRM_LOOKUP_WAIT : 0); | 2762 | flowi4_to_flowi(flp4), |
2786 | if (err == -EREMOTE) | 2763 | sk, 0); |
2787 | err = ipv4_dst_blackhole(net, rp, flp); | ||
2788 | |||
2789 | return err; | ||
2790 | } | 2764 | } |
2791 | 2765 | ||
2792 | return 0; | 2766 | return rt; |
2793 | } | 2767 | } |
2794 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2768 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2795 | 2769 | ||
2796 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2797 | { | ||
2798 | return ip_route_output_flow(net, rp, flp, NULL, 0); | ||
2799 | } | ||
2800 | EXPORT_SYMBOL(ip_route_output_key); | ||
2801 | |||
2802 | static int rt_fill_info(struct net *net, | 2770 | static int rt_fill_info(struct net *net, |
2803 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2771 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
2804 | int nowait, unsigned int flags) | 2772 | int nowait, unsigned int flags) |
@@ -2817,7 +2785,7 @@ static int rt_fill_info(struct net *net, | |||
2817 | r->rtm_family = AF_INET; | 2785 | r->rtm_family = AF_INET; |
2818 | r->rtm_dst_len = 32; | 2786 | r->rtm_dst_len = 32; |
2819 | r->rtm_src_len = 0; | 2787 | r->rtm_src_len = 0; |
2820 | r->rtm_tos = rt->fl.fl4_tos; | 2788 | r->rtm_tos = rt->rt_tos; |
2821 | r->rtm_table = RT_TABLE_MAIN; | 2789 | r->rtm_table = RT_TABLE_MAIN; |
2822 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2790 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
2823 | r->rtm_type = rt->rt_type; | 2791 | r->rtm_type = rt->rt_type; |
@@ -2829,19 +2797,19 @@ static int rt_fill_info(struct net *net, | |||
2829 | 2797 | ||
2830 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2798 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); |
2831 | 2799 | ||
2832 | if (rt->fl.fl4_src) { | 2800 | if (rt->rt_key_src) { |
2833 | r->rtm_src_len = 32; | 2801 | r->rtm_src_len = 32; |
2834 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2802 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); |
2835 | } | 2803 | } |
2836 | if (rt->dst.dev) | 2804 | if (rt->dst.dev) |
2837 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2805 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2838 | #ifdef CONFIG_NET_CLS_ROUTE | 2806 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2839 | if (rt->dst.tclassid) | 2807 | if (rt->dst.tclassid) |
2840 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2808 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2841 | #endif | 2809 | #endif |
2842 | if (rt_is_input_route(rt)) | 2810 | if (rt_is_input_route(rt)) |
2843 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2811 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2844 | else if (rt->rt_src != rt->fl.fl4_src) | 2812 | else if (rt->rt_src != rt->rt_key_src) |
2845 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2813 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
2846 | 2814 | ||
2847 | if (rt->rt_dst != rt->rt_gateway) | 2815 | if (rt->rt_dst != rt->rt_gateway) |
@@ -2850,11 +2818,12 @@ static int rt_fill_info(struct net *net, | |||
2850 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 2818 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2851 | goto nla_put_failure; | 2819 | goto nla_put_failure; |
2852 | 2820 | ||
2853 | if (rt->fl.mark) | 2821 | if (rt->rt_mark) |
2854 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); | 2822 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); |
2855 | 2823 | ||
2856 | error = rt->dst.error; | 2824 | error = rt->dst.error; |
2857 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 2825 | expires = (rt->peer && rt->peer->pmtu_expires) ? |
2826 | rt->peer->pmtu_expires - jiffies : 0; | ||
2858 | if (rt->peer) { | 2827 | if (rt->peer) { |
2859 | inet_peer_refcheck(rt->peer); | 2828 | inet_peer_refcheck(rt->peer); |
2860 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2829 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
@@ -2884,7 +2853,7 @@ static int rt_fill_info(struct net *net, | |||
2884 | } | 2853 | } |
2885 | } else | 2854 | } else |
2886 | #endif | 2855 | #endif |
2887 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2856 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); |
2888 | } | 2857 | } |
2889 | 2858 | ||
2890 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2859 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
@@ -2958,14 +2927,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2958 | if (err == 0 && rt->dst.error) | 2927 | if (err == 0 && rt->dst.error) |
2959 | err = -rt->dst.error; | 2928 | err = -rt->dst.error; |
2960 | } else { | 2929 | } else { |
2961 | struct flowi fl = { | 2930 | struct flowi4 fl4 = { |
2962 | .fl4_dst = dst, | 2931 | .daddr = dst, |
2963 | .fl4_src = src, | 2932 | .saddr = src, |
2964 | .fl4_tos = rtm->rtm_tos, | 2933 | .flowi4_tos = rtm->rtm_tos, |
2965 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2934 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2966 | .mark = mark, | 2935 | .flowi4_mark = mark, |
2967 | }; | 2936 | }; |
2968 | err = ip_route_output_key(net, &rt, &fl); | 2937 | rt = ip_route_output_key(net, &fl4); |
2938 | |||
2939 | err = 0; | ||
2940 | if (IS_ERR(rt)) | ||
2941 | err = PTR_ERR(rt); | ||
2969 | } | 2942 | } |
2970 | 2943 | ||
2971 | if (err) | 2944 | if (err) |
@@ -3256,9 +3229,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3256 | }; | 3229 | }; |
3257 | 3230 | ||
3258 | 3231 | ||
3259 | #ifdef CONFIG_NET_CLS_ROUTE | 3232 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3260 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3233 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3261 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3234 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3262 | 3235 | ||
3263 | static __initdata unsigned long rhash_entries; | 3236 | static __initdata unsigned long rhash_entries; |
3264 | static int __init set_rhash_entries(char *str) | 3237 | static int __init set_rhash_entries(char *str) |
@@ -3274,7 +3247,7 @@ int __init ip_rt_init(void) | |||
3274 | { | 3247 | { |
3275 | int rc = 0; | 3248 | int rc = 0; |
3276 | 3249 | ||
3277 | #ifdef CONFIG_NET_CLS_ROUTE | 3250 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3278 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3251 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3279 | if (!ip_rt_acct) | 3252 | if (!ip_rt_acct) |
3280 | panic("IP: failed to allocate ip_rt_acct\n"); | 3253 | panic("IP: failed to allocate ip_rt_acct\n"); |
@@ -3311,14 +3284,6 @@ int __init ip_rt_init(void) | |||
3311 | devinet_init(); | 3284 | devinet_init(); |
3312 | ip_fib_init(); | 3285 | ip_fib_init(); |
3313 | 3286 | ||
3314 | /* All the timers, started at system startup tend | ||
3315 | to synchronize. Perturb it a bit. | ||
3316 | */ | ||
3317 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3318 | expires_ljiffies = jiffies; | ||
3319 | schedule_delayed_work(&expires_work, | ||
3320 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3321 | |||
3322 | if (ip_rt_proc_init()) | 3287 | if (ip_rt_proc_init()) |
3323 | printk(KERN_ERR "Unable to create route proc files\n"); | 3288 | printk(KERN_ERR "Unable to create route proc files\n"); |
3324 | #ifdef CONFIG_XFRM | 3289 | #ifdef CONFIG_XFRM |