aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-16 19:29:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-16 19:29:25 -0400
commit7a6362800cb7d1d618a697a650c7aaed3eb39320 (patch)
tree087f9bc6c13ef1fad4b392c5cf9325cd28fa8523 /net/ipv4/route.c
parent6445ced8670f37cfc2c5e24a9de9b413dbfc788d (diff)
parentceda86a108671294052cbf51660097b6534672f5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1480 commits) bonding: enable netpoll without checking link status xfrm: Refcount destination entry on xfrm_lookup net: introduce rx_handler results and logic around that bonding: get rid of IFF_SLAVE_INACTIVE netdev->priv_flag bonding: wrap slave state work net: get rid of multiple bond-related netdevice->priv_flags bonding: register slave pointer for rx_handler be2net: Bump up the version number be2net: Copyright notice change. Update to Emulex instead of ServerEngines e1000e: fix kconfig for crc32 dependency netfilter ebtables: fix xt_AUDIT to work with ebtables xen network backend driver bonding: Improve syslog message at device creation time bonding: Call netif_carrier_off after register_netdevice bonding: Incorrect TX queue offset net_sched: fix ip_tos2prio xfrm: fix __xfrm_route_forward() be2net: Fix UDP packet detected status in RX compl Phonet: fix aligned-mode pipe socket buffer header reserve netxen: support for GbE port settings ... Fix up conflicts in drivers/staging/brcm80211/brcmsmac/wl_mac80211.c with the staging updates.
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c1181
1 files changed, 573 insertions, 608 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603c2f6d..870b5182ddd8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,8 +109,8 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111 111
112#define RT_FL_TOS(oldflp) \ 112#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114 114
115#define IP_MAX_MTU 0xFFF0 115#define IP_MAX_MTU 0xFFF0
116 116
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 131static int ip_rt_min_advmss __read_mostly = 256;
132static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
133 133
134static struct delayed_work expires_work;
135static unsigned long expires_ljiffies;
136
137/* 134/*
138 * Interface to generic destination cache. 135 * Interface to generic destination cache.
139 */ 136 */
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152{ 149{
153} 150}
154 151
152static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
153{
154 struct rtable *rt = (struct rtable *) dst;
155 struct inet_peer *peer;
156 u32 *p = NULL;
157
158 if (!rt->peer)
159 rt_bind_peer(rt, 1);
160
161 peer = rt->peer;
162 if (peer) {
163 u32 *old_p = __DST_METRICS_PTR(old);
164 unsigned long prev, new;
165
166 p = peer->metrics;
167 if (inet_metrics_new(peer))
168 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
169
170 new = (unsigned long) p;
171 prev = cmpxchg(&dst->_metrics, old, new);
172
173 if (prev != old) {
174 p = __DST_METRICS_PTR(prev);
175 if (prev & DST_METRICS_READ_ONLY)
176 p = NULL;
177 } else {
178 if (rt->fi) {
179 fib_info_put(rt->fi);
180 rt->fi = NULL;
181 }
182 }
183 }
184 return p;
185}
186
155static struct dst_ops ipv4_dst_ops = { 187static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET, 188 .family = AF_INET,
157 .protocol = cpu_to_be16(ETH_P_IP), 189 .protocol = cpu_to_be16(ETH_P_IP),
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = {
159 .check = ipv4_dst_check, 191 .check = ipv4_dst_check,
160 .default_advmss = ipv4_default_advmss, 192 .default_advmss = ipv4_default_advmss,
161 .default_mtu = ipv4_default_mtu, 193 .default_mtu = ipv4_default_mtu,
194 .cow_metrics = ipv4_cow_metrics,
162 .destroy = ipv4_dst_destroy, 195 .destroy = ipv4_dst_destroy,
163 .ifdown = ipv4_dst_ifdown, 196 .ifdown = ipv4_dst_ifdown,
164 .negative_advice = ipv4_negative_advice, 197 .negative_advice = ipv4_negative_advice,
@@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = {
171 204
172const __u8 ip_tos2prio[16] = { 205const __u8 ip_tos2prio[16] = {
173 TC_PRIO_BESTEFFORT, 206 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(FILLER), 207 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BESTEFFORT, 208 TC_PRIO_BESTEFFORT,
176 ECN_OR_COST(BESTEFFORT), 209 ECN_OR_COST(BESTEFFORT),
177 TC_PRIO_BULK, 210 TC_PRIO_BULK,
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
391 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
392 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
393 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
394 r->fl.fl4_tos, 427 r->rt_tos,
395 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
396 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
397 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = {
514 .release = seq_release, 547 .release = seq_release,
515}; 548};
516 549
517#ifdef CONFIG_NET_CLS_ROUTE 550#ifdef CONFIG_IP_ROUTE_CLASSID
518static int rt_acct_proc_show(struct seq_file *m, void *v) 551static int rt_acct_proc_show(struct seq_file *m, void *v)
519{ 552{
520 struct ip_rt_acct *dst, *src; 553 struct ip_rt_acct *dst, *src;
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 if (!pde) 600 if (!pde)
568 goto err2; 601 goto err2;
569 602
570#ifdef CONFIG_NET_CLS_ROUTE 603#ifdef CONFIG_IP_ROUTE_CLASSID
571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 604 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572 if (!pde) 605 if (!pde)
573 goto err3; 606 goto err3;
574#endif 607#endif
575 return 0; 608 return 0;
576 609
577#ifdef CONFIG_NET_CLS_ROUTE 610#ifdef CONFIG_IP_ROUTE_CLASSID
578err3: 611err3:
579 remove_proc_entry("rt_cache", net->proc_net_stat); 612 remove_proc_entry("rt_cache", net->proc_net_stat);
580#endif 613#endif
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 621{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 622 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 623 remove_proc_entry("rt_cache", net->proc_net);
591#ifdef CONFIG_NET_CLS_ROUTE 624#ifdef CONFIG_IP_ROUTE_CLASSID
592 remove_proc_entry("rt_acct", net->proc_net); 625 remove_proc_entry("rt_acct", net->proc_net);
593#endif 626#endif
594} 627}
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth)
632static inline int rt_valuable(struct rtable *rth) 665static inline int rt_valuable(struct rtable *rth)
633{ 666{
634 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 667 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
635 rth->dst.expires; 668 (rth->peer && rth->peer->pmtu_expires);
636} 669}
637 670
638static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 671static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
643 if (atomic_read(&rth->dst.__refcnt)) 676 if (atomic_read(&rth->dst.__refcnt))
644 goto out; 677 goto out;
645 678
646 ret = 1;
647 if (rth->dst.expires &&
648 time_after_eq(jiffies, rth->dst.expires))
649 goto out;
650
651 age = jiffies - rth->dst.lastuse; 679 age = jiffies - rth->dst.lastuse;
652 ret = 0;
653 if ((age <= tmo1 && !rt_fast_clean(rth)) || 680 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
654 (age <= tmo2 && rt_valuable(rth))) 681 (age <= tmo2 && rt_valuable(rth)))
655 goto out; 682 goto out;
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net)
684 net->ipv4.sysctl_rt_cache_rebuild_count; 711 net->ipv4.sysctl_rt_cache_rebuild_count;
685} 712}
686 713
687static inline bool compare_hash_inputs(const struct flowi *fl1, 714static inline bool compare_hash_inputs(const struct rtable *rt1,
688 const struct flowi *fl2) 715 const struct rtable *rt2)
689{ 716{
690 return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 717 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
691 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 718 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
692 (fl1->iif ^ fl2->iif)) == 0); 719 (rt1->rt_iif ^ rt2->rt_iif)) == 0);
693} 720}
694 721
695static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 722static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
696{ 723{
697 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
698 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
699 (fl1->mark ^ fl2->mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
700 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | 727 (rt1->rt_tos ^ rt2->rt_tos) |
701 (fl1->oif ^ fl2->oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
702 (fl1->iif ^ fl2->iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
703} 730}
704 731
705static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 732static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
786 const struct rtable *aux = head; 813 const struct rtable *aux = head;
787 814
788 while (aux != rth) { 815 while (aux != rth) {
789 if (compare_hash_inputs(&aux->fl, &rth->fl)) 816 if (compare_hash_inputs(aux, rth))
790 return 0; 817 return 0;
791 aux = rcu_dereference_protected(aux->dst.rt_next, 1); 818 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
792 } 819 }
793 return ONE; 820 return ONE;
794} 821}
795 822
796static void rt_check_expire(void)
797{
798 static unsigned int rover;
799 unsigned int i = rover, goal;
800 struct rtable *rth;
801 struct rtable __rcu **rthp;
802 unsigned long samples = 0;
803 unsigned long sum = 0, sum2 = 0;
804 unsigned long delta;
805 u64 mult;
806
807 delta = jiffies - expires_ljiffies;
808 expires_ljiffies = jiffies;
809 mult = ((u64)delta) << rt_hash_log;
810 if (ip_rt_gc_timeout > 1)
811 do_div(mult, ip_rt_gc_timeout);
812 goal = (unsigned int)mult;
813 if (goal > rt_hash_mask)
814 goal = rt_hash_mask + 1;
815 for (; goal > 0; goal--) {
816 unsigned long tmo = ip_rt_gc_timeout;
817 unsigned long length;
818
819 i = (i + 1) & rt_hash_mask;
820 rthp = &rt_hash_table[i].chain;
821
822 if (need_resched())
823 cond_resched();
824
825 samples++;
826
827 if (rcu_dereference_raw(*rthp) == NULL)
828 continue;
829 length = 0;
830 spin_lock_bh(rt_hash_lock_addr(i));
831 while ((rth = rcu_dereference_protected(*rthp,
832 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next;
836 rt_free(rth);
837 continue;
838 }
839 if (rth->dst.expires) {
840 /* Entry is expired even if it is in use */
841 if (time_before_eq(jiffies, rth->dst.expires)) {
842nofree:
843 tmo >>= 1;
844 rthp = &rth->dst.rt_next;
845 /*
846 * We only count entries on
847 * a chain with equal hash inputs once
848 * so that entries for different QOS
849 * levels, and other non-hash input
850 * attributes don't unfairly skew
851 * the length computation
852 */
853 length += has_noalias(rt_hash_table[i].chain, rth);
854 continue;
855 }
856 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
857 goto nofree;
858
859 /* Cleanup aged off entries. */
860 *rthp = rth->dst.rt_next;
861 rt_free(rth);
862 }
863 spin_unlock_bh(rt_hash_lock_addr(i));
864 sum += length;
865 sum2 += length*length;
866 }
867 if (samples) {
868 unsigned long avg = sum / samples;
869 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870 rt_chain_length_max = max_t(unsigned long,
871 ip_rt_gc_elasticity,
872 (avg + 4*sd) >> FRACT_BITS);
873 }
874 rover = i;
875}
876
877/*
878 * rt_worker_func() is run in process context.
879 * we call rt_check_expire() to scan part of the hash table
880 */
881static void rt_worker_func(struct work_struct *work)
882{
883 rt_check_expire();
884 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
885}
886
887/* 823/*
888 * Pertubation of rt_genid by a small quantity [1..256] 824 * Pertubation of rt_genid by a small quantity [1..256]
889 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 825 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head)
1078 return length >> FRACT_BITS; 1014 return length >> FRACT_BITS;
1079} 1015}
1080 1016
1081static int rt_intern_hash(unsigned hash, struct rtable *rt, 1017static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1082 struct rtable **rp, struct sk_buff *skb, int ifindex) 1018 struct sk_buff *skb, int ifindex)
1083{ 1019{
1084 struct rtable *rth, *cand; 1020 struct rtable *rth, *cand;
1085 struct rtable __rcu **rthp, **candp; 1021 struct rtable __rcu **rthp, **candp;
@@ -1120,7 +1056,7 @@ restart:
1120 printk(KERN_WARNING 1056 printk(KERN_WARNING
1121 "Neighbour table failure & not caching routes.\n"); 1057 "Neighbour table failure & not caching routes.\n");
1122 ip_rt_put(rt); 1058 ip_rt_put(rt);
1123 return err; 1059 return ERR_PTR(err);
1124 } 1060 }
1125 } 1061 }
1126 1062
@@ -1137,7 +1073,7 @@ restart:
1137 rt_free(rth); 1073 rt_free(rth);
1138 continue; 1074 continue;
1139 } 1075 }
1140 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1076 if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
1141 /* Put it first */ 1077 /* Put it first */
1142 *rthp = rth->dst.rt_next; 1078 *rthp = rth->dst.rt_next;
1143 /* 1079 /*
@@ -1157,11 +1093,9 @@ restart:
1157 spin_unlock_bh(rt_hash_lock_addr(hash)); 1093 spin_unlock_bh(rt_hash_lock_addr(hash));
1158 1094
1159 rt_drop(rt); 1095 rt_drop(rt);
1160 if (rp) 1096 if (skb)
1161 *rp = rth;
1162 else
1163 skb_dst_set(skb, &rth->dst); 1097 skb_dst_set(skb, &rth->dst);
1164 return 0; 1098 return rth;
1165 } 1099 }
1166 1100
1167 if (!atomic_read(&rth->dst.__refcnt)) { 1101 if (!atomic_read(&rth->dst.__refcnt)) {
@@ -1202,7 +1136,7 @@ restart:
1202 rt_emergency_hash_rebuild(net); 1136 rt_emergency_hash_rebuild(net);
1203 spin_unlock_bh(rt_hash_lock_addr(hash)); 1137 spin_unlock_bh(rt_hash_lock_addr(hash));
1204 1138
1205 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1139 hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1206 ifindex, rt_genid(net)); 1140 ifindex, rt_genid(net));
1207 goto restart; 1141 goto restart;
1208 } 1142 }
@@ -1218,7 +1152,7 @@ restart:
1218 1152
1219 if (err != -ENOBUFS) { 1153 if (err != -ENOBUFS) {
1220 rt_drop(rt); 1154 rt_drop(rt);
1221 return err; 1155 return ERR_PTR(err);
1222 } 1156 }
1223 1157
1224 /* Neighbour tables are full and nothing 1158 /* Neighbour tables are full and nothing
@@ -1239,7 +1173,7 @@ restart:
1239 if (net_ratelimit()) 1173 if (net_ratelimit())
1240 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); 1174 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
1241 rt_drop(rt); 1175 rt_drop(rt);
1242 return -ENOBUFS; 1176 return ERR_PTR(-ENOBUFS);
1243 } 1177 }
1244 } 1178 }
1245 1179
@@ -1265,11 +1199,16 @@ restart:
1265 spin_unlock_bh(rt_hash_lock_addr(hash)); 1199 spin_unlock_bh(rt_hash_lock_addr(hash));
1266 1200
1267skip_hashing: 1201skip_hashing:
1268 if (rp) 1202 if (skb)
1269 *rp = rt;
1270 else
1271 skb_dst_set(skb, &rt->dst); 1203 skb_dst_set(skb, &rt->dst);
1272 return 0; 1204 return rt;
1205}
1206
1207static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
1208
1209static u32 rt_peer_genid(void)
1210{
1211 return atomic_read(&__rt_peer_genid);
1273} 1212}
1274 1213
1275void rt_bind_peer(struct rtable *rt, int create) 1214void rt_bind_peer(struct rtable *rt, int create)
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create)
1280 1219
1281 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1220 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1282 inet_putpeer(peer); 1221 inet_putpeer(peer);
1222 else
1223 rt->rt_peer_genid = rt_peer_genid();
1283} 1224}
1284 1225
1285/* 1226/*
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt)
1349void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1290void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350 __be32 saddr, struct net_device *dev) 1291 __be32 saddr, struct net_device *dev)
1351{ 1292{
1352 int i, k;
1353 struct in_device *in_dev = __in_dev_get_rcu(dev); 1293 struct in_device *in_dev = __in_dev_get_rcu(dev);
1354 struct rtable *rth; 1294 struct inet_peer *peer;
1355 struct rtable __rcu **rthp;
1356 __be32 skeys[2] = { saddr, 0 };
1357 int ikeys[2] = { dev->ifindex, 0 };
1358 struct netevent_redirect netevent;
1359 struct net *net; 1295 struct net *net;
1360 1296
1361 if (!in_dev) 1297 if (!in_dev)
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1367 ipv4_is_zeronet(new_gw)) 1303 ipv4_is_zeronet(new_gw))
1368 goto reject_redirect; 1304 goto reject_redirect;
1369 1305
1370 if (!rt_caching(net))
1371 goto reject_redirect;
1372
1373 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1306 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1374 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1307 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1375 goto reject_redirect; 1308 goto reject_redirect;
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1380 goto reject_redirect; 1313 goto reject_redirect;
1381 } 1314 }
1382 1315
1383 for (i = 0; i < 2; i++) { 1316 peer = inet_getpeer_v4(daddr, 1);
1384 for (k = 0; k < 2; k++) { 1317 if (peer) {
1385 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1318 peer->redirect_learned.a4 = new_gw;
1386 rt_genid(net));
1387
1388 rthp = &rt_hash_table[hash].chain;
1389
1390 while ((rth = rcu_dereference(*rthp)) != NULL) {
1391 struct rtable *rt;
1392
1393 if (rth->fl.fl4_dst != daddr ||
1394 rth->fl.fl4_src != skeys[i] ||
1395 rth->fl.oif != ikeys[k] ||
1396 rt_is_input_route(rth) ||
1397 rt_is_expired(rth) ||
1398 !net_eq(dev_net(rth->dst.dev), net)) {
1399 rthp = &rth->dst.rt_next;
1400 continue;
1401 }
1402
1403 if (rth->rt_dst != daddr ||
1404 rth->rt_src != saddr ||
1405 rth->dst.error ||
1406 rth->rt_gateway != old_gw ||
1407 rth->dst.dev != dev)
1408 break;
1409
1410 dst_hold(&rth->dst);
1411
1412 rt = dst_alloc(&ipv4_dst_ops);
1413 if (rt == NULL) {
1414 ip_rt_put(rth);
1415 return;
1416 }
1417
1418 /* Copy all the information. */
1419 *rt = *rth;
1420 rt->dst.__use = 1;
1421 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->dst.child = NULL;
1423 if (rt->dst.dev)
1424 dev_hold(rt->dst.dev);
1425 rt->dst.obsolete = -1;
1426 rt->dst.lastuse = jiffies;
1427 rt->dst.path = &rt->dst;
1428 rt->dst.neighbour = NULL;
1429 rt->dst.hh = NULL;
1430#ifdef CONFIG_XFRM
1431 rt->dst.xfrm = NULL;
1432#endif
1433 rt->rt_genid = rt_genid(net);
1434 rt->rt_flags |= RTCF_REDIRECTED;
1435
1436 /* Gateway is different ... */
1437 rt->rt_gateway = new_gw;
1438
1439 /* Redirect received -> path was valid */
1440 dst_confirm(&rth->dst);
1441
1442 if (rt->peer)
1443 atomic_inc(&rt->peer->refcnt);
1444
1445 if (arp_bind_neighbour(&rt->dst) ||
1446 !(rt->dst.neighbour->nud_state &
1447 NUD_VALID)) {
1448 if (rt->dst.neighbour)
1449 neigh_event_send(rt->dst.neighbour, NULL);
1450 ip_rt_put(rth);
1451 rt_drop(rt);
1452 goto do_next;
1453 }
1454 1319
1455 netevent.old = &rth->dst; 1320 inet_putpeer(peer);
1456 netevent.new = &rt->dst;
1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1458 &netevent);
1459 1321
1460 rt_del(hash, rth); 1322 atomic_inc(&__rt_peer_genid);
1461 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1462 ip_rt_put(rt);
1463 goto do_next;
1464 }
1465 do_next:
1466 ;
1467 }
1468 } 1323 }
1469 return; 1324 return;
1470 1325
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1488 if (dst->obsolete > 0) { 1343 if (dst->obsolete > 0) {
1489 ip_rt_put(rt); 1344 ip_rt_put(rt);
1490 ret = NULL; 1345 ret = NULL;
1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1346 } else if (rt->rt_flags & RTCF_REDIRECTED) {
1492 (rt->dst.expires && 1347 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1493 time_after_eq(jiffies, rt->dst.expires))) { 1348 rt->rt_oif,
1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1495 rt->fl.oif,
1496 rt_genid(dev_net(dst->dev))); 1349 rt_genid(dev_net(dst->dev)));
1497#if RT_CACHE_DEBUG >= 1 1350#if RT_CACHE_DEBUG >= 1
1498 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", 1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1499 &rt->rt_dst, rt->fl.fl4_tos); 1352 &rt->rt_dst, rt->rt_tos);
1500#endif 1353#endif
1501 rt_del(hash, rt); 1354 rt_del(hash, rt);
1502 ret = NULL; 1355 ret = NULL;
1356 } else if (rt->peer &&
1357 rt->peer->pmtu_expires &&
1358 time_after_eq(jiffies, rt->peer->pmtu_expires)) {
1359 unsigned long orig = rt->peer->pmtu_expires;
1360
1361 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1362 dst_metric_set(dst, RTAX_MTU,
1363 rt->peer->pmtu_orig);
1503 } 1364 }
1504 } 1365 }
1505 return ret; 1366 return ret;
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1525{ 1386{
1526 struct rtable *rt = skb_rtable(skb); 1387 struct rtable *rt = skb_rtable(skb);
1527 struct in_device *in_dev; 1388 struct in_device *in_dev;
1389 struct inet_peer *peer;
1528 int log_martians; 1390 int log_martians;
1529 1391
1530 rcu_read_lock(); 1392 rcu_read_lock();
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1536 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1398 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537 rcu_read_unlock(); 1399 rcu_read_unlock();
1538 1400
1401 if (!rt->peer)
1402 rt_bind_peer(rt, 1);
1403 peer = rt->peer;
1404 if (!peer) {
1405 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1406 return;
1407 }
1408
1539 /* No redirected packets during ip_rt_redirect_silence; 1409 /* No redirected packets during ip_rt_redirect_silence;
1540 * reset the algorithm. 1410 * reset the algorithm.
1541 */ 1411 */
1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1412 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1543 rt->dst.rate_tokens = 0; 1413 peer->rate_tokens = 0;
1544 1414
1545 /* Too many ignored redirects; do not send anything 1415 /* Too many ignored redirects; do not send anything
1546 * set dst.rate_last to the last seen redirected packet. 1416 * set dst.rate_last to the last seen redirected packet.
1547 */ 1417 */
1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1418 if (peer->rate_tokens >= ip_rt_redirect_number) {
1549 rt->dst.rate_last = jiffies; 1419 peer->rate_last = jiffies;
1550 return; 1420 return;
1551 } 1421 }
1552 1422
1553 /* Check for load limit; set rate_last to the latest sent 1423 /* Check for load limit; set rate_last to the latest sent
1554 * redirect. 1424 * redirect.
1555 */ 1425 */
1556 if (rt->dst.rate_tokens == 0 || 1426 if (peer->rate_tokens == 0 ||
1557 time_after(jiffies, 1427 time_after(jiffies,
1558 (rt->dst.rate_last + 1428 (peer->rate_last +
1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1429 (ip_rt_redirect_load << peer->rate_tokens)))) {
1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1430 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561 rt->dst.rate_last = jiffies; 1431 peer->rate_last = jiffies;
1562 ++rt->dst.rate_tokens; 1432 ++peer->rate_tokens;
1563#ifdef CONFIG_IP_ROUTE_VERBOSE 1433#ifdef CONFIG_IP_ROUTE_VERBOSE
1564 if (log_martians && 1434 if (log_martians &&
1565 rt->dst.rate_tokens == ip_rt_redirect_number && 1435 peer->rate_tokens == ip_rt_redirect_number &&
1566 net_ratelimit()) 1436 net_ratelimit())
1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1437 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568 &rt->rt_src, rt->rt_iif, 1438 &rt->rt_src, rt->rt_iif,
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1574static int ip_error(struct sk_buff *skb) 1444static int ip_error(struct sk_buff *skb)
1575{ 1445{
1576 struct rtable *rt = skb_rtable(skb); 1446 struct rtable *rt = skb_rtable(skb);
1447 struct inet_peer *peer;
1577 unsigned long now; 1448 unsigned long now;
1449 bool send;
1578 int code; 1450 int code;
1579 1451
1580 switch (rt->dst.error) { 1452 switch (rt->dst.error) {
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb)
1594 break; 1466 break;
1595 } 1467 }
1596 1468
1597 now = jiffies; 1469 if (!rt->peer)
1598 rt->dst.rate_tokens += now - rt->dst.rate_last; 1470 rt_bind_peer(rt, 1);
1599 if (rt->dst.rate_tokens > ip_rt_error_burst) 1471 peer = rt->peer;
1600 rt->dst.rate_tokens = ip_rt_error_burst; 1472
1601 rt->dst.rate_last = now; 1473 send = true;
1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1474 if (peer) {
1603 rt->dst.rate_tokens -= ip_rt_error_cost; 1475 now = jiffies;
1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1476 peer->rate_tokens += now - peer->rate_last;
1477 if (peer->rate_tokens > ip_rt_error_burst)
1478 peer->rate_tokens = ip_rt_error_burst;
1479 peer->rate_last = now;
1480 if (peer->rate_tokens >= ip_rt_error_cost)
1481 peer->rate_tokens -= ip_rt_error_cost;
1482 else
1483 send = false;
1605 } 1484 }
1485 if (send)
1486 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1606 1487
1607out: kfree_skb(skb); 1488out: kfree_skb(skb);
1608 return 0; 1489 return 0;
@@ -1630,88 +1511,142 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1630 unsigned short new_mtu, 1511 unsigned short new_mtu,
1631 struct net_device *dev) 1512 struct net_device *dev)
1632{ 1513{
1633 int i, k;
1634 unsigned short old_mtu = ntohs(iph->tot_len); 1514 unsigned short old_mtu = ntohs(iph->tot_len);
1635 struct rtable *rth;
1636 int ikeys[2] = { dev->ifindex, 0 };
1637 __be32 skeys[2] = { iph->saddr, 0, };
1638 __be32 daddr = iph->daddr;
1639 unsigned short est_mtu = 0; 1515 unsigned short est_mtu = 0;
1516 struct inet_peer *peer;
1640 1517
1641 for (k = 0; k < 2; k++) { 1518 peer = inet_getpeer_v4(iph->daddr, 1);
1642 for (i = 0; i < 2; i++) { 1519 if (peer) {
1643 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1520 unsigned short mtu = new_mtu;
1644 rt_genid(net));
1645
1646 rcu_read_lock();
1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648 rth = rcu_dereference(rth->dst.rt_next)) {
1649 unsigned short mtu = new_mtu;
1650
1651 if (rth->fl.fl4_dst != daddr ||
1652 rth->fl.fl4_src != skeys[i] ||
1653 rth->rt_dst != daddr ||
1654 rth->rt_src != iph->saddr ||
1655 rth->fl.oif != ikeys[k] ||
1656 rt_is_input_route(rth) ||
1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658 !net_eq(dev_net(rth->dst.dev), net) ||
1659 rt_is_expired(rth))
1660 continue;
1661 1521
1662 if (new_mtu < 68 || new_mtu >= old_mtu) { 1522 if (new_mtu < 68 || new_mtu >= old_mtu) {
1523 /* BSD 4.2 derived systems incorrectly adjust
1524 * tot_len by the IP header length, and report
1525 * a zero MTU in the ICMP message.
1526 */
1527 if (mtu == 0 &&
1528 old_mtu >= 68 + (iph->ihl << 2))
1529 old_mtu -= iph->ihl << 2;
1530 mtu = guess_mtu(old_mtu);
1531 }
1663 1532
1664 /* BSD 4.2 compatibility hack :-( */ 1533 if (mtu < ip_rt_min_pmtu)
1665 if (mtu == 0 && 1534 mtu = ip_rt_min_pmtu;
1666 old_mtu >= dst_mtu(&rth->dst) && 1535 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1667 old_mtu >= 68 + (iph->ihl << 2)) 1536 unsigned long pmtu_expires;
1668 old_mtu -= iph->ihl << 2;
1669 1537
1670 mtu = guess_mtu(old_mtu); 1538 pmtu_expires = jiffies + ip_rt_mtu_expires;
1671 } 1539 if (!pmtu_expires)
1672 if (mtu <= dst_mtu(&rth->dst)) { 1540 pmtu_expires = 1UL;
1673 if (mtu < dst_mtu(&rth->dst)) { 1541
1674 dst_confirm(&rth->dst); 1542 est_mtu = mtu;
1675 if (mtu < ip_rt_min_pmtu) { 1543 peer->pmtu_learned = mtu;
1676 u32 lock = dst_metric(&rth->dst, 1544 peer->pmtu_expires = pmtu_expires;
1677 RTAX_LOCK);
1678 mtu = ip_rt_min_pmtu;
1679 lock |= (1 << RTAX_MTU);
1680 dst_metric_set(&rth->dst, RTAX_LOCK,
1681 lock);
1682 }
1683 dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684 dst_set_expires(&rth->dst,
1685 ip_rt_mtu_expires);
1686 }
1687 est_mtu = mtu;
1688 }
1689 }
1690 rcu_read_unlock();
1691 } 1545 }
1546
1547 inet_putpeer(peer);
1548
1549 atomic_inc(&__rt_peer_genid);
1692 } 1550 }
1693 return est_mtu ? : new_mtu; 1551 return est_mtu ? : new_mtu;
1694} 1552}
1695 1553
1554static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1555{
1556 unsigned long expires = peer->pmtu_expires;
1557
1558 if (time_before(jiffies, expires)) {
1559 u32 orig_dst_mtu = dst_mtu(dst);
1560 if (peer->pmtu_learned < orig_dst_mtu) {
1561 if (!peer->pmtu_orig)
1562 peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1563 dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1564 }
1565 } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1566 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1567}
1568
1696static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1569static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1697{ 1570{
1698 if (dst_mtu(dst) > mtu && mtu >= 68 && 1571 struct rtable *rt = (struct rtable *) dst;
1699 !(dst_metric_locked(dst, RTAX_MTU))) { 1572 struct inet_peer *peer;
1700 if (mtu < ip_rt_min_pmtu) { 1573
1701 u32 lock = dst_metric(dst, RTAX_LOCK); 1574 dst_confirm(dst);
1575
1576 if (!rt->peer)
1577 rt_bind_peer(rt, 1);
1578 peer = rt->peer;
1579 if (peer) {
1580 if (mtu < ip_rt_min_pmtu)
1702 mtu = ip_rt_min_pmtu; 1581 mtu = ip_rt_min_pmtu;
1703 dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); 1582 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1583 unsigned long pmtu_expires;
1584
1585 pmtu_expires = jiffies + ip_rt_mtu_expires;
1586 if (!pmtu_expires)
1587 pmtu_expires = 1UL;
1588
1589 peer->pmtu_learned = mtu;
1590 peer->pmtu_expires = pmtu_expires;
1591
1592 atomic_inc(&__rt_peer_genid);
1593 rt->rt_peer_genid = rt_peer_genid();
1704 } 1594 }
1705 dst_metric_set(dst, RTAX_MTU, mtu); 1595 check_peer_pmtu(dst, peer);
1706 dst_set_expires(dst, ip_rt_mtu_expires); 1596
1707 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 1597 inet_putpeer(peer);
1598 }
1599}
1600
1601static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1602{
1603 struct rtable *rt = (struct rtable *) dst;
1604 __be32 orig_gw = rt->rt_gateway;
1605
1606 dst_confirm(&rt->dst);
1607
1608 neigh_release(rt->dst.neighbour);
1609 rt->dst.neighbour = NULL;
1610
1611 rt->rt_gateway = peer->redirect_learned.a4;
1612 if (arp_bind_neighbour(&rt->dst) ||
1613 !(rt->dst.neighbour->nud_state & NUD_VALID)) {
1614 if (rt->dst.neighbour)
1615 neigh_event_send(rt->dst.neighbour, NULL);
1616 rt->rt_gateway = orig_gw;
1617 return -EAGAIN;
1618 } else {
1619 rt->rt_flags |= RTCF_REDIRECTED;
1620 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1621 rt->dst.neighbour);
1708 } 1622 }
1623 return 0;
1709} 1624}
1710 1625
1711static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1626static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1712{ 1627{
1713 if (rt_is_expired((struct rtable *)dst)) 1628 struct rtable *rt = (struct rtable *) dst;
1629
1630 if (rt_is_expired(rt))
1714 return NULL; 1631 return NULL;
1632 if (rt->rt_peer_genid != rt_peer_genid()) {
1633 struct inet_peer *peer;
1634
1635 if (!rt->peer)
1636 rt_bind_peer(rt, 0);
1637
1638 peer = rt->peer;
1639 if (peer && peer->pmtu_expires)
1640 check_peer_pmtu(dst, peer);
1641
1642 if (peer && peer->redirect_learned.a4 &&
1643 peer->redirect_learned.a4 != rt->rt_gateway) {
1644 if (check_peer_redir(dst, peer))
1645 return NULL;
1646 }
1647
1648 rt->rt_peer_genid = rt_peer_genid();
1649 }
1715 return dst; 1650 return dst;
1716} 1651}
1717 1652
@@ -1720,6 +1655,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1720 struct rtable *rt = (struct rtable *) dst; 1655 struct rtable *rt = (struct rtable *) dst;
1721 struct inet_peer *peer = rt->peer; 1656 struct inet_peer *peer = rt->peer;
1722 1657
1658 if (rt->fi) {
1659 fib_info_put(rt->fi);
1660 rt->fi = NULL;
1661 }
1723 if (peer) { 1662 if (peer) {
1724 rt->peer = NULL; 1663 rt->peer = NULL;
1725 inet_putpeer(peer); 1664 inet_putpeer(peer);
@@ -1734,8 +1673,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1734 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1673 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1735 1674
1736 rt = skb_rtable(skb); 1675 rt = skb_rtable(skb);
1737 if (rt) 1676 if (rt &&
1738 dst_set_expires(&rt->dst, 0); 1677 rt->peer &&
1678 rt->peer->pmtu_expires) {
1679 unsigned long orig = rt->peer->pmtu_expires;
1680
1681 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1682 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1683 }
1739} 1684}
1740 1685
1741static int ip_rt_bug(struct sk_buff *skb) 1686static int ip_rt_bug(struct sk_buff *skb)
@@ -1764,8 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1764 if (rt_is_output_route(rt)) 1709 if (rt_is_output_route(rt))
1765 src = rt->rt_src; 1710 src = rt->rt_src;
1766 else { 1711 else {
1712 struct flowi4 fl4 = {
1713 .daddr = rt->rt_key_dst,
1714 .saddr = rt->rt_key_src,
1715 .flowi4_tos = rt->rt_tos,
1716 .flowi4_oif = rt->rt_oif,
1717 .flowi4_iif = rt->rt_iif,
1718 .flowi4_mark = rt->rt_mark,
1719 };
1720
1767 rcu_read_lock(); 1721 rcu_read_lock();
1768 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) 1722 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1769 src = FIB_RES_PREFSRC(res); 1723 src = FIB_RES_PREFSRC(res);
1770 else 1724 else
1771 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1725 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
@@ -1775,7 +1729,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1775 memcpy(addr, &src, 4); 1729 memcpy(addr, &src, 4);
1776} 1730}
1777 1731
1778#ifdef CONFIG_NET_CLS_ROUTE 1732#ifdef CONFIG_IP_ROUTE_CLASSID
1779static void set_class_tag(struct rtable *rt, u32 tag) 1733static void set_class_tag(struct rtable *rt, u32 tag)
1780{ 1734{
1781 if (!(rt->dst.tclassid & 0xFFFF)) 1735 if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,17 +1769,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1815 return mtu; 1769 return mtu;
1816} 1770}
1817 1771
1818static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1772static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1773 struct fib_info *fi)
1774{
1775 struct inet_peer *peer;
1776 int create = 0;
1777
1778 /* If a peer entry exists for this destination, we must hook
1779 * it up in order to get at cached metrics.
1780 */
1781 if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1782 create = 1;
1783
1784 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
1785 if (peer) {
1786 rt->rt_peer_genid = rt_peer_genid();
1787 if (inet_metrics_new(peer))
1788 memcpy(peer->metrics, fi->fib_metrics,
1789 sizeof(u32) * RTAX_MAX);
1790 dst_init_metrics(&rt->dst, peer->metrics, false);
1791
1792 if (peer->pmtu_expires)
1793 check_peer_pmtu(&rt->dst, peer);
1794 if (peer->redirect_learned.a4 &&
1795 peer->redirect_learned.a4 != rt->rt_gateway) {
1796 rt->rt_gateway = peer->redirect_learned.a4;
1797 rt->rt_flags |= RTCF_REDIRECTED;
1798 }
1799 } else {
1800 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1801 rt->fi = fi;
1802 atomic_inc(&fi->fib_clntref);
1803 }
1804 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1805 }
1806}
1807
1808static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1809 const struct fib_result *res,
1810 struct fib_info *fi, u16 type, u32 itag)
1819{ 1811{
1820 struct dst_entry *dst = &rt->dst; 1812 struct dst_entry *dst = &rt->dst;
1821 struct fib_info *fi = res->fi;
1822 1813
1823 if (fi) { 1814 if (fi) {
1824 if (FIB_RES_GW(*res) && 1815 if (FIB_RES_GW(*res) &&
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1816 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1817 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1818 rt_init_metrics(rt, oldflp4, fi);
1828#ifdef CONFIG_NET_CLS_ROUTE 1819#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1820 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1821#endif
1831 } 1822 }
@@ -1835,13 +1826,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1826 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1827 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837 1828
1838#ifdef CONFIG_NET_CLS_ROUTE 1829#ifdef CONFIG_IP_ROUTE_CLASSID
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1830#ifdef CONFIG_IP_MULTIPLE_TABLES
1840 set_class_tag(rt, fib_rules_tclass(res)); 1831 set_class_tag(rt, fib_rules_tclass(res));
1841#endif 1832#endif
1842 set_class_tag(rt, itag); 1833 set_class_tag(rt, itag);
1843#endif 1834#endif
1844 rt->rt_type = res->type; 1835 rt->rt_type = type;
1836}
1837
1838static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm)
1839{
1840 struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1);
1841 if (rt) {
1842 rt->dst.obsolete = -1;
1843
1844 rt->dst.flags = DST_HOST |
1845 (nopolicy ? DST_NOPOLICY : 0) |
1846 (noxfrm ? DST_NOXFRM : 0);
1847 }
1848 return rt;
1845} 1849}
1846 1850
1847/* called in rcu_read_lock() section */ 1851/* called in rcu_read_lock() section */
@@ -1874,31 +1878,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1874 if (err < 0) 1878 if (err < 0)
1875 goto e_err; 1879 goto e_err;
1876 } 1880 }
1877 rth = dst_alloc(&ipv4_dst_ops); 1881 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1878 if (!rth) 1882 if (!rth)
1879 goto e_nobufs; 1883 goto e_nobufs;
1880 1884
1881 rth->dst.output = ip_rt_bug; 1885 rth->dst.output = ip_rt_bug;
1882 rth->dst.obsolete = -1;
1883 1886
1884 atomic_set(&rth->dst.__refcnt, 1); 1887 rth->rt_key_dst = daddr;
1885 rth->dst.flags= DST_HOST;
1886 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1887 rth->dst.flags |= DST_NOPOLICY;
1888 rth->fl.fl4_dst = daddr;
1889 rth->rt_dst = daddr; 1888 rth->rt_dst = daddr;
1890 rth->fl.fl4_tos = tos; 1889 rth->rt_tos = tos;
1891 rth->fl.mark = skb->mark; 1890 rth->rt_mark = skb->mark;
1892 rth->fl.fl4_src = saddr; 1891 rth->rt_key_src = saddr;
1893 rth->rt_src = saddr; 1892 rth->rt_src = saddr;
1894#ifdef CONFIG_NET_CLS_ROUTE 1893#ifdef CONFIG_IP_ROUTE_CLASSID
1895 rth->dst.tclassid = itag; 1894 rth->dst.tclassid = itag;
1896#endif 1895#endif
1897 rth->rt_iif = 1896 rth->rt_iif = dev->ifindex;
1898 rth->fl.iif = dev->ifindex;
1899 rth->dst.dev = init_net.loopback_dev; 1897 rth->dst.dev = init_net.loopback_dev;
1900 dev_hold(rth->dst.dev); 1898 dev_hold(rth->dst.dev);
1901 rth->fl.oif = 0; 1899 rth->rt_oif = 0;
1902 rth->rt_gateway = daddr; 1900 rth->rt_gateway = daddr;
1903 rth->rt_spec_dst= spec_dst; 1901 rth->rt_spec_dst= spec_dst;
1904 rth->rt_genid = rt_genid(dev_net(dev)); 1902 rth->rt_genid = rt_genid(dev_net(dev));
@@ -1916,7 +1914,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1916 RT_CACHE_STAT_INC(in_slow_mc); 1914 RT_CACHE_STAT_INC(in_slow_mc);
1917 1915
1918 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1919 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1917 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
1918 err = 0;
1919 if (IS_ERR(rth))
1920 err = PTR_ERR(rth);
1920 1921
1921e_nobufs: 1922e_nobufs:
1922 return -ENOBUFS; 1923 return -ENOBUFS;
@@ -1959,7 +1960,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1959 1960
1960/* called in rcu_read_lock() section */ 1961/* called in rcu_read_lock() section */
1961static int __mkroute_input(struct sk_buff *skb, 1962static int __mkroute_input(struct sk_buff *skb,
1962 struct fib_result *res, 1963 const struct fib_result *res,
1963 struct in_device *in_dev, 1964 struct in_device *in_dev,
1964 __be32 daddr, __be32 saddr, u32 tos, 1965 __be32 daddr, __be32 saddr, u32 tos,
1965 struct rtable **result) 1966 struct rtable **result)
@@ -2013,39 +2014,31 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 2014 }
2014 } 2015 }
2015 2016
2016 2017 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2017 rth = dst_alloc(&ipv4_dst_ops); 2018 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 2019 if (!rth) {
2019 err = -ENOBUFS; 2020 err = -ENOBUFS;
2020 goto cleanup; 2021 goto cleanup;
2021 } 2022 }
2022 2023
2023 atomic_set(&rth->dst.__refcnt, 1); 2024 rth->rt_key_dst = daddr;
2024 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2025 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2026 rth->rt_tos = tos;
2032 rth->fl.mark = skb->mark; 2027 rth->rt_mark = skb->mark;
2033 rth->fl.fl4_src = saddr; 2028 rth->rt_key_src = saddr;
2034 rth->rt_src = saddr; 2029 rth->rt_src = saddr;
2035 rth->rt_gateway = daddr; 2030 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2031 rth->rt_iif = in_dev->dev->ifindex;
2037 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->dst.dev = (out_dev)->dev; 2032 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->dst.dev); 2033 dev_hold(rth->dst.dev);
2040 rth->fl.oif = 0; 2034 rth->rt_oif = 0;
2041 rth->rt_spec_dst= spec_dst; 2035 rth->rt_spec_dst= spec_dst;
2042 2036
2043 rth->dst.obsolete = -1;
2044 rth->dst.input = ip_forward; 2037 rth->dst.input = ip_forward;
2045 rth->dst.output = ip_output; 2038 rth->dst.output = ip_output;
2046 rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); 2039 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2047 2040
2048 rt_set_nexthop(rth, res, itag); 2041 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2049 2042
2050 rth->rt_flags = flags; 2043 rth->rt_flags = flags;
2051 2044
@@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb,
2057 2050
2058static int ip_mkroute_input(struct sk_buff *skb, 2051static int ip_mkroute_input(struct sk_buff *skb,
2059 struct fib_result *res, 2052 struct fib_result *res,
2060 const struct flowi *fl, 2053 const struct flowi4 *fl4,
2061 struct in_device *in_dev, 2054 struct in_device *in_dev,
2062 __be32 daddr, __be32 saddr, u32 tos) 2055 __be32 daddr, __be32 saddr, u32 tos)
2063{ 2056{
@@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
2066 unsigned hash; 2059 unsigned hash;
2067 2060
2068#ifdef CONFIG_IP_ROUTE_MULTIPATH 2061#ifdef CONFIG_IP_ROUTE_MULTIPATH
2069 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 2062 if (res->fi && res->fi->fib_nhs > 1)
2070 fib_select_multipath(fl, res); 2063 fib_select_multipath(res);
2071#endif 2064#endif
2072 2065
2073 /* create a routing cache entry */ 2066 /* create a routing cache entry */
@@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb,
2076 return err; 2069 return err;
2077 2070
2078 /* put it into the cache */ 2071 /* put it into the cache */
2079 hash = rt_hash(daddr, saddr, fl->iif, 2072 hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
2080 rt_genid(dev_net(rth->dst.dev))); 2073 rt_genid(dev_net(rth->dst.dev)));
2081 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2074 rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
2075 if (IS_ERR(rth))
2076 return PTR_ERR(rth);
2077 return 0;
2082} 2078}
2083 2079
2084/* 2080/*
@@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2097{ 2093{
2098 struct fib_result res; 2094 struct fib_result res;
2099 struct in_device *in_dev = __in_dev_get_rcu(dev); 2095 struct in_device *in_dev = __in_dev_get_rcu(dev);
2100 struct flowi fl = { .fl4_dst = daddr, 2096 struct flowi4 fl4;
2101 .fl4_src = saddr,
2102 .fl4_tos = tos,
2103 .fl4_scope = RT_SCOPE_UNIVERSE,
2104 .mark = skb->mark,
2105 .iif = dev->ifindex };
2106 unsigned flags = 0; 2097 unsigned flags = 0;
2107 u32 itag = 0; 2098 u32 itag = 0;
2108 struct rtable * rth; 2099 struct rtable * rth;
@@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2139 /* 2130 /*
2140 * Now we are ready to route packet. 2131 * Now we are ready to route packet.
2141 */ 2132 */
2142 err = fib_lookup(net, &fl, &res); 2133 fl4.flowi4_oif = 0;
2134 fl4.flowi4_iif = dev->ifindex;
2135 fl4.flowi4_mark = skb->mark;
2136 fl4.flowi4_tos = tos;
2137 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2138 fl4.daddr = daddr;
2139 fl4.saddr = saddr;
2140 err = fib_lookup(net, &fl4, &res);
2143 if (err != 0) { 2141 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2142 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2143 goto e_hostunreach;
@@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2168 if (res.type != RTN_UNICAST) 2166 if (res.type != RTN_UNICAST)
2169 goto martian_destination; 2167 goto martian_destination;
2170 2168
2171 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2169 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
2172out: return err; 2170out: return err;
2173 2171
2174brd_input: 2172brd_input:
@@ -2190,29 +2188,23 @@ brd_input:
2190 RT_CACHE_STAT_INC(in_brd); 2188 RT_CACHE_STAT_INC(in_brd);
2191 2189
2192local_input: 2190local_input:
2193 rth = dst_alloc(&ipv4_dst_ops); 2191 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2194 if (!rth) 2192 if (!rth)
2195 goto e_nobufs; 2193 goto e_nobufs;
2196 2194
2197 rth->dst.output= ip_rt_bug; 2195 rth->dst.output= ip_rt_bug;
2198 rth->dst.obsolete = -1;
2199 rth->rt_genid = rt_genid(net); 2196 rth->rt_genid = rt_genid(net);
2200 2197
2201 atomic_set(&rth->dst.__refcnt, 1); 2198 rth->rt_key_dst = daddr;
2202 rth->dst.flags= DST_HOST;
2203 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2204 rth->dst.flags |= DST_NOPOLICY;
2205 rth->fl.fl4_dst = daddr;
2206 rth->rt_dst = daddr; 2199 rth->rt_dst = daddr;
2207 rth->fl.fl4_tos = tos; 2200 rth->rt_tos = tos;
2208 rth->fl.mark = skb->mark; 2201 rth->rt_mark = skb->mark;
2209 rth->fl.fl4_src = saddr; 2202 rth->rt_key_src = saddr;
2210 rth->rt_src = saddr; 2203 rth->rt_src = saddr;
2211#ifdef CONFIG_NET_CLS_ROUTE 2204#ifdef CONFIG_IP_ROUTE_CLASSID
2212 rth->dst.tclassid = itag; 2205 rth->dst.tclassid = itag;
2213#endif 2206#endif
2214 rth->rt_iif = 2207 rth->rt_iif = dev->ifindex;
2215 rth->fl.iif = dev->ifindex;
2216 rth->dst.dev = net->loopback_dev; 2208 rth->dst.dev = net->loopback_dev;
2217 dev_hold(rth->dst.dev); 2209 dev_hold(rth->dst.dev);
2218 rth->rt_gateway = daddr; 2210 rth->rt_gateway = daddr;
@@ -2225,8 +2217,11 @@ local_input:
2225 rth->rt_flags &= ~RTCF_LOCAL; 2217 rth->rt_flags &= ~RTCF_LOCAL;
2226 } 2218 }
2227 rth->rt_type = res.type; 2219 rth->rt_type = res.type;
2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2220 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2221 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2222 err = 0;
2223 if (IS_ERR(rth))
2224 err = PTR_ERR(rth);
2230 goto out; 2225 goto out;
2231 2226
2232no_route: 2227no_route:
@@ -2288,12 +2283,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 2283
2289 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2284 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2290 rth = rcu_dereference(rth->dst.rt_next)) { 2285 rth = rcu_dereference(rth->dst.rt_next)) {
2291 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2286 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2292 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2287 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2293 (rth->fl.iif ^ iif) | 2288 (rth->rt_iif ^ iif) |
2294 rth->fl.oif | 2289 rth->rt_oif |
2295 (rth->fl.fl4_tos ^ tos)) == 0 && 2290 (rth->rt_tos ^ tos)) == 0 &&
2296 rth->fl.mark == skb->mark && 2291 rth->rt_mark == skb->mark &&
2297 net_eq(dev_net(rth->dst.dev), net) && 2292 net_eq(dev_net(rth->dst.dev), net) &&
2298 !rt_is_expired(rth)) { 2293 !rt_is_expired(rth)) {
2299 if (noref) { 2294 if (noref) {
@@ -2326,8 +2321,8 @@ skip_cache:
2326 struct in_device *in_dev = __in_dev_get_rcu(dev); 2321 struct in_device *in_dev = __in_dev_get_rcu(dev);
2327 2322
2328 if (in_dev) { 2323 if (in_dev) {
2329 int our = ip_check_mc(in_dev, daddr, saddr, 2324 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
2330 ip_hdr(skb)->protocol); 2325 ip_hdr(skb)->protocol);
2331 if (our 2326 if (our
2332#ifdef CONFIG_IP_MROUTE 2327#ifdef CONFIG_IP_MROUTE
2333 || 2328 ||
@@ -2351,98 +2346,91 @@ skip_cache:
2351EXPORT_SYMBOL(ip_route_input_common); 2346EXPORT_SYMBOL(ip_route_input_common);
2352 2347
2353/* called with rcu_read_lock() */ 2348/* called with rcu_read_lock() */
2354static int __mkroute_output(struct rtable **result, 2349static struct rtable *__mkroute_output(const struct fib_result *res,
2355 struct fib_result *res, 2350 const struct flowi4 *fl4,
2356 const struct flowi *fl, 2351 const struct flowi4 *oldflp4,
2357 const struct flowi *oldflp, 2352 struct net_device *dev_out,
2358 struct net_device *dev_out, 2353 unsigned int flags)
2359 unsigned flags)
2360{ 2354{
2361 struct rtable *rth; 2355 struct fib_info *fi = res->fi;
2356 u32 tos = RT_FL_TOS(oldflp4);
2362 struct in_device *in_dev; 2357 struct in_device *in_dev;
2363 u32 tos = RT_FL_TOS(oldflp); 2358 u16 type = res->type;
2359 struct rtable *rth;
2364 2360
2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) 2361 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2366 return -EINVAL; 2362 return ERR_PTR(-EINVAL);
2367 2363
2368 if (ipv4_is_lbcast(fl->fl4_dst)) 2364 if (ipv4_is_lbcast(fl4->daddr))
2369 res->type = RTN_BROADCAST; 2365 type = RTN_BROADCAST;
2370 else if (ipv4_is_multicast(fl->fl4_dst)) 2366 else if (ipv4_is_multicast(fl4->daddr))
2371 res->type = RTN_MULTICAST; 2367 type = RTN_MULTICAST;
2372 else if (ipv4_is_zeronet(fl->fl4_dst)) 2368 else if (ipv4_is_zeronet(fl4->daddr))
2373 return -EINVAL; 2369 return ERR_PTR(-EINVAL);
2374 2370
2375 if (dev_out->flags & IFF_LOOPBACK) 2371 if (dev_out->flags & IFF_LOOPBACK)
2376 flags |= RTCF_LOCAL; 2372 flags |= RTCF_LOCAL;
2377 2373
2378 in_dev = __in_dev_get_rcu(dev_out); 2374 in_dev = __in_dev_get_rcu(dev_out);
2379 if (!in_dev) 2375 if (!in_dev)
2380 return -EINVAL; 2376 return ERR_PTR(-EINVAL);
2381 2377
2382 if (res->type == RTN_BROADCAST) { 2378 if (type == RTN_BROADCAST) {
2383 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2379 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384 res->fi = NULL; 2380 fi = NULL;
2385 } else if (res->type == RTN_MULTICAST) { 2381 } else if (type == RTN_MULTICAST) {
2386 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2382 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2383 if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr,
2388 oldflp->proto)) 2384 oldflp4->flowi4_proto))
2389 flags &= ~RTCF_LOCAL; 2385 flags &= ~RTCF_LOCAL;
2390 /* If multicast route do not exist use 2386 /* If multicast route do not exist use
2391 * default one, but do not gateway in this case. 2387 * default one, but do not gateway in this case.
2392 * Yes, it is hack. 2388 * Yes, it is hack.
2393 */ 2389 */
2394 if (res->fi && res->prefixlen < 4) 2390 if (fi && res->prefixlen < 4)
2395 res->fi = NULL; 2391 fi = NULL;
2396 } 2392 }
2397 2393
2398 2394 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2399 rth = dst_alloc(&ipv4_dst_ops); 2395 IN_DEV_CONF_GET(in_dev, NOXFRM));
2400 if (!rth) 2396 if (!rth)
2401 return -ENOBUFS; 2397 return ERR_PTR(-ENOBUFS);
2402 2398
2403 atomic_set(&rth->dst.__refcnt, 1); 2399 rth->rt_key_dst = oldflp4->daddr;
2404 rth->dst.flags= DST_HOST; 2400 rth->rt_tos = tos;
2405 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2401 rth->rt_key_src = oldflp4->saddr;
2406 rth->dst.flags |= DST_NOXFRM; 2402 rth->rt_oif = oldflp4->flowi4_oif;
2407 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2403 rth->rt_mark = oldflp4->flowi4_mark;
2408 rth->dst.flags |= DST_NOPOLICY; 2404 rth->rt_dst = fl4->daddr;
2409 2405 rth->rt_src = fl4->saddr;
2410 rth->fl.fl4_dst = oldflp->fl4_dst; 2406 rth->rt_iif = 0;
2411 rth->fl.fl4_tos = tos;
2412 rth->fl.fl4_src = oldflp->fl4_src;
2413 rth->fl.oif = oldflp->oif;
2414 rth->fl.mark = oldflp->mark;
2415 rth->rt_dst = fl->fl4_dst;
2416 rth->rt_src = fl->fl4_src;
2417 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2418 /* get references to the devices that are to be hold by the routing 2407 /* get references to the devices that are to be hold by the routing
2419 cache entry */ 2408 cache entry */
2420 rth->dst.dev = dev_out; 2409 rth->dst.dev = dev_out;
2421 dev_hold(dev_out); 2410 dev_hold(dev_out);
2422 rth->rt_gateway = fl->fl4_dst; 2411 rth->rt_gateway = fl4->daddr;
2423 rth->rt_spec_dst= fl->fl4_src; 2412 rth->rt_spec_dst= fl4->saddr;
2424 2413
2425 rth->dst.output=ip_output; 2414 rth->dst.output=ip_output;
2426 rth->dst.obsolete = -1;
2427 rth->rt_genid = rt_genid(dev_net(dev_out)); 2415 rth->rt_genid = rt_genid(dev_net(dev_out));
2428 2416
2429 RT_CACHE_STAT_INC(out_slow_tot); 2417 RT_CACHE_STAT_INC(out_slow_tot);
2430 2418
2431 if (flags & RTCF_LOCAL) { 2419 if (flags & RTCF_LOCAL) {
2432 rth->dst.input = ip_local_deliver; 2420 rth->dst.input = ip_local_deliver;
2433 rth->rt_spec_dst = fl->fl4_dst; 2421 rth->rt_spec_dst = fl4->daddr;
2434 } 2422 }
2435 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2423 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436 rth->rt_spec_dst = fl->fl4_src; 2424 rth->rt_spec_dst = fl4->saddr;
2437 if (flags & RTCF_LOCAL && 2425 if (flags & RTCF_LOCAL &&
2438 !(dev_out->flags & IFF_LOOPBACK)) { 2426 !(dev_out->flags & IFF_LOOPBACK)) {
2439 rth->dst.output = ip_mc_output; 2427 rth->dst.output = ip_mc_output;
2440 RT_CACHE_STAT_INC(out_slow_mc); 2428 RT_CACHE_STAT_INC(out_slow_mc);
2441 } 2429 }
2442#ifdef CONFIG_IP_MROUTE 2430#ifdef CONFIG_IP_MROUTE
2443 if (res->type == RTN_MULTICAST) { 2431 if (type == RTN_MULTICAST) {
2444 if (IN_DEV_MFORWARD(in_dev) && 2432 if (IN_DEV_MFORWARD(in_dev) &&
2445 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2433 !ipv4_is_local_multicast(oldflp4->daddr)) {
2446 rth->dst.input = ip_mr_input; 2434 rth->dst.input = ip_mr_input;
2447 rth->dst.output = ip_mc_output; 2435 rth->dst.output = ip_mc_output;
2448 } 2436 }
@@ -2450,31 +2438,10 @@ static int __mkroute_output(struct rtable **result,
2450#endif 2438#endif
2451 } 2439 }
2452 2440
2453 rt_set_nexthop(rth, res, 0); 2441 rt_set_nexthop(rth, oldflp4, res, fi, type, 0);
2454 2442
2455 rth->rt_flags = flags; 2443 rth->rt_flags = flags;
2456 *result = rth; 2444 return rth;
2457 return 0;
2458}
2459
2460/* called with rcu_read_lock() */
2461static int ip_mkroute_output(struct rtable **rp,
2462 struct fib_result *res,
2463 const struct flowi *fl,
2464 const struct flowi *oldflp,
2465 struct net_device *dev_out,
2466 unsigned flags)
2467{
2468 struct rtable *rth = NULL;
2469 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2470 unsigned hash;
2471 if (err == 0) {
2472 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2473 rt_genid(dev_net(dev_out)));
2474 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2475 }
2476
2477 return err;
2478} 2445}
2479 2446
2480/* 2447/*
@@ -2482,34 +2449,36 @@ static int ip_mkroute_output(struct rtable **rp,
2482 * called with rcu_read_lock(); 2449 * called with rcu_read_lock();
2483 */ 2450 */
2484 2451
2485static int ip_route_output_slow(struct net *net, struct rtable **rp, 2452static struct rtable *ip_route_output_slow(struct net *net,
2486 const struct flowi *oldflp) 2453 const struct flowi4 *oldflp4)
2487{ 2454{
2488 u32 tos = RT_FL_TOS(oldflp); 2455 u32 tos = RT_FL_TOS(oldflp4);
2489 struct flowi fl = { .fl4_dst = oldflp->fl4_dst, 2456 struct flowi4 fl4;
2490 .fl4_src = oldflp->fl4_src,
2491 .fl4_tos = tos & IPTOS_RT_MASK,
2492 .fl4_scope = ((tos & RTO_ONLINK) ?
2493 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494 .mark = oldflp->mark,
2495 .iif = net->loopback_dev->ifindex,
2496 .oif = oldflp->oif };
2497 struct fib_result res; 2457 struct fib_result res;
2498 unsigned int flags = 0; 2458 unsigned int flags = 0;
2499 struct net_device *dev_out = NULL; 2459 struct net_device *dev_out = NULL;
2500 int err; 2460 struct rtable *rth;
2501
2502 2461
2503 res.fi = NULL; 2462 res.fi = NULL;
2504#ifdef CONFIG_IP_MULTIPLE_TABLES 2463#ifdef CONFIG_IP_MULTIPLE_TABLES
2505 res.r = NULL; 2464 res.r = NULL;
2506#endif 2465#endif
2507 2466
2508 if (oldflp->fl4_src) { 2467 fl4.flowi4_oif = oldflp4->flowi4_oif;
2509 err = -EINVAL; 2468 fl4.flowi4_iif = net->loopback_dev->ifindex;
2510 if (ipv4_is_multicast(oldflp->fl4_src) || 2469 fl4.flowi4_mark = oldflp4->flowi4_mark;
2511 ipv4_is_lbcast(oldflp->fl4_src) || 2470 fl4.daddr = oldflp4->daddr;
2512 ipv4_is_zeronet(oldflp->fl4_src)) 2471 fl4.saddr = oldflp4->saddr;
2472 fl4.flowi4_tos = tos & IPTOS_RT_MASK;
2473 fl4.flowi4_scope = ((tos & RTO_ONLINK) ?
2474 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2475
2476 rcu_read_lock();
2477 if (oldflp4->saddr) {
2478 rth = ERR_PTR(-EINVAL);
2479 if (ipv4_is_multicast(oldflp4->saddr) ||
2480 ipv4_is_lbcast(oldflp4->saddr) ||
2481 ipv4_is_zeronet(oldflp4->saddr))
2513 goto out; 2482 goto out;
2514 2483
2515 /* I removed check for oif == dev_out->oif here. 2484 /* I removed check for oif == dev_out->oif here.
@@ -2520,11 +2489,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2520 of another iface. --ANK 2489 of another iface. --ANK
2521 */ 2490 */
2522 2491
2523 if (oldflp->oif == 0 && 2492 if (oldflp4->flowi4_oif == 0 &&
2524 (ipv4_is_multicast(oldflp->fl4_dst) || 2493 (ipv4_is_multicast(oldflp4->daddr) ||
2525 ipv4_is_lbcast(oldflp->fl4_dst))) { 2494 ipv4_is_lbcast(oldflp4->daddr))) {
2526 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2495 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2527 dev_out = __ip_dev_find(net, oldflp->fl4_src, false); 2496 dev_out = __ip_dev_find(net, oldflp4->saddr, false);
2528 if (dev_out == NULL) 2497 if (dev_out == NULL)
2529 goto out; 2498 goto out;
2530 2499
@@ -2543,60 +2512,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 Luckily, this hack is good workaround. 2512 Luckily, this hack is good workaround.
2544 */ 2513 */
2545 2514
2546 fl.oif = dev_out->ifindex; 2515 fl4.flowi4_oif = dev_out->ifindex;
2547 goto make_route; 2516 goto make_route;
2548 } 2517 }
2549 2518
2550 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2519 if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2520 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552 if (!__ip_dev_find(net, oldflp->fl4_src, false)) 2521 if (!__ip_dev_find(net, oldflp4->saddr, false))
2553 goto out; 2522 goto out;
2554 } 2523 }
2555 } 2524 }
2556 2525
2557 2526
2558 if (oldflp->oif) { 2527 if (oldflp4->flowi4_oif) {
2559 dev_out = dev_get_by_index_rcu(net, oldflp->oif); 2528 dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif);
2560 err = -ENODEV; 2529 rth = ERR_PTR(-ENODEV);
2561 if (dev_out == NULL) 2530 if (dev_out == NULL)
2562 goto out; 2531 goto out;
2563 2532
2564 /* RACE: Check return value of inet_select_addr instead. */ 2533 /* RACE: Check return value of inet_select_addr instead. */
2565 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2534 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2566 err = -ENETUNREACH; 2535 rth = ERR_PTR(-ENETUNREACH);
2567 goto out; 2536 goto out;
2568 } 2537 }
2569 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2538 if (ipv4_is_local_multicast(oldflp4->daddr) ||
2570 ipv4_is_lbcast(oldflp->fl4_dst)) { 2539 ipv4_is_lbcast(oldflp4->daddr)) {
2571 if (!fl.fl4_src) 2540 if (!fl4.saddr)
2572 fl.fl4_src = inet_select_addr(dev_out, 0, 2541 fl4.saddr = inet_select_addr(dev_out, 0,
2573 RT_SCOPE_LINK); 2542 RT_SCOPE_LINK);
2574 goto make_route; 2543 goto make_route;
2575 } 2544 }
2576 if (!fl.fl4_src) { 2545 if (!fl4.saddr) {
2577 if (ipv4_is_multicast(oldflp->fl4_dst)) 2546 if (ipv4_is_multicast(oldflp4->daddr))
2578 fl.fl4_src = inet_select_addr(dev_out, 0, 2547 fl4.saddr = inet_select_addr(dev_out, 0,
2579 fl.fl4_scope); 2548 fl4.flowi4_scope);
2580 else if (!oldflp->fl4_dst) 2549 else if (!oldflp4->daddr)
2581 fl.fl4_src = inet_select_addr(dev_out, 0, 2550 fl4.saddr = inet_select_addr(dev_out, 0,
2582 RT_SCOPE_HOST); 2551 RT_SCOPE_HOST);
2583 } 2552 }
2584 } 2553 }
2585 2554
2586 if (!fl.fl4_dst) { 2555 if (!fl4.daddr) {
2587 fl.fl4_dst = fl.fl4_src; 2556 fl4.daddr = fl4.saddr;
2588 if (!fl.fl4_dst) 2557 if (!fl4.daddr)
2589 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2558 fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK);
2590 dev_out = net->loopback_dev; 2559 dev_out = net->loopback_dev;
2591 fl.oif = net->loopback_dev->ifindex; 2560 fl4.flowi4_oif = net->loopback_dev->ifindex;
2592 res.type = RTN_LOCAL; 2561 res.type = RTN_LOCAL;
2593 flags |= RTCF_LOCAL; 2562 flags |= RTCF_LOCAL;
2594 goto make_route; 2563 goto make_route;
2595 } 2564 }
2596 2565
2597 if (fib_lookup(net, &fl, &res)) { 2566 if (fib_lookup(net, &fl4, &res)) {
2598 res.fi = NULL; 2567 res.fi = NULL;
2599 if (oldflp->oif) { 2568 if (oldflp4->flowi4_oif) {
2600 /* Apparently, routing tables are wrong. Assume, 2569 /* Apparently, routing tables are wrong. Assume,
2601 that the destination is on link. 2570 that the destination is on link.
2602 2571
@@ -2615,90 +2584,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2615 likely IPv6, but we do not. 2584 likely IPv6, but we do not.
2616 */ 2585 */
2617 2586
2618 if (fl.fl4_src == 0) 2587 if (fl4.saddr == 0)
2619 fl.fl4_src = inet_select_addr(dev_out, 0, 2588 fl4.saddr = inet_select_addr(dev_out, 0,
2620 RT_SCOPE_LINK); 2589 RT_SCOPE_LINK);
2621 res.type = RTN_UNICAST; 2590 res.type = RTN_UNICAST;
2622 goto make_route; 2591 goto make_route;
2623 } 2592 }
2624 err = -ENETUNREACH; 2593 rth = ERR_PTR(-ENETUNREACH);
2625 goto out; 2594 goto out;
2626 } 2595 }
2627 2596
2628 if (res.type == RTN_LOCAL) { 2597 if (res.type == RTN_LOCAL) {
2629 if (!fl.fl4_src) { 2598 if (!fl4.saddr) {
2630 if (res.fi->fib_prefsrc) 2599 if (res.fi->fib_prefsrc)
2631 fl.fl4_src = res.fi->fib_prefsrc; 2600 fl4.saddr = res.fi->fib_prefsrc;
2632 else 2601 else
2633 fl.fl4_src = fl.fl4_dst; 2602 fl4.saddr = fl4.daddr;
2634 } 2603 }
2635 dev_out = net->loopback_dev; 2604 dev_out = net->loopback_dev;
2636 fl.oif = dev_out->ifindex; 2605 fl4.flowi4_oif = dev_out->ifindex;
2637 res.fi = NULL; 2606 res.fi = NULL;
2638 flags |= RTCF_LOCAL; 2607 flags |= RTCF_LOCAL;
2639 goto make_route; 2608 goto make_route;
2640 } 2609 }
2641 2610
2642#ifdef CONFIG_IP_ROUTE_MULTIPATH 2611#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2612 if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0)
2644 fib_select_multipath(&fl, &res); 2613 fib_select_multipath(&res);
2645 else 2614 else
2646#endif 2615#endif
2647 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2616 if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif)
2648 fib_select_default(net, &fl, &res); 2617 fib_select_default(&res);
2649 2618
2650 if (!fl.fl4_src) 2619 if (!fl4.saddr)
2651 fl.fl4_src = FIB_RES_PREFSRC(res); 2620 fl4.saddr = FIB_RES_PREFSRC(res);
2652 2621
2653 dev_out = FIB_RES_DEV(res); 2622 dev_out = FIB_RES_DEV(res);
2654 fl.oif = dev_out->ifindex; 2623 fl4.flowi4_oif = dev_out->ifindex;
2655 2624
2656 2625
2657make_route: 2626make_route:
2658 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2627 rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags);
2628 if (!IS_ERR(rth)) {
2629 unsigned int hash;
2659 2630
2660out: return err; 2631 hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif,
2632 rt_genid(dev_net(dev_out)));
2633 rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif);
2634 }
2635
2636out:
2637 rcu_read_unlock();
2638 return rth;
2661} 2639}
2662 2640
2663int __ip_route_output_key(struct net *net, struct rtable **rp, 2641struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
2664 const struct flowi *flp)
2665{ 2642{
2666 unsigned int hash;
2667 int res;
2668 struct rtable *rth; 2643 struct rtable *rth;
2644 unsigned int hash;
2669 2645
2670 if (!rt_caching(net)) 2646 if (!rt_caching(net))
2671 goto slow_output; 2647 goto slow_output;
2672 2648
2673 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2649 hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2674 2650
2675 rcu_read_lock_bh(); 2651 rcu_read_lock_bh();
2676 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2652 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677 rth = rcu_dereference_bh(rth->dst.rt_next)) { 2653 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678 if (rth->fl.fl4_dst == flp->fl4_dst && 2654 if (rth->rt_key_dst == flp4->daddr &&
2679 rth->fl.fl4_src == flp->fl4_src && 2655 rth->rt_key_src == flp4->saddr &&
2680 rt_is_output_route(rth) && 2656 rt_is_output_route(rth) &&
2681 rth->fl.oif == flp->oif && 2657 rth->rt_oif == flp4->flowi4_oif &&
2682 rth->fl.mark == flp->mark && 2658 rth->rt_mark == flp4->flowi4_mark &&
2683 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2659 !((rth->rt_tos ^ flp4->flowi4_tos) &
2684 (IPTOS_RT_MASK | RTO_ONLINK)) && 2660 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2685 net_eq(dev_net(rth->dst.dev), net) && 2661 net_eq(dev_net(rth->dst.dev), net) &&
2686 !rt_is_expired(rth)) { 2662 !rt_is_expired(rth)) {
2687 dst_use(&rth->dst, jiffies); 2663 dst_use(&rth->dst, jiffies);
2688 RT_CACHE_STAT_INC(out_hit); 2664 RT_CACHE_STAT_INC(out_hit);
2689 rcu_read_unlock_bh(); 2665 rcu_read_unlock_bh();
2690 *rp = rth; 2666 return rth;
2691 return 0;
2692 } 2667 }
2693 RT_CACHE_STAT_INC(out_hlist_search); 2668 RT_CACHE_STAT_INC(out_hlist_search);
2694 } 2669 }
2695 rcu_read_unlock_bh(); 2670 rcu_read_unlock_bh();
2696 2671
2697slow_output: 2672slow_output:
2698 rcu_read_lock(); 2673 return ip_route_output_slow(net, flp4);
2699 res = ip_route_output_slow(net, rp, flp);
2700 rcu_read_unlock();
2701 return res;
2702} 2674}
2703EXPORT_SYMBOL_GPL(__ip_route_output_key); 2675EXPORT_SYMBOL_GPL(__ip_route_output_key);
2704 2676
@@ -2726,17 +2698,14 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2726 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2698 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2727}; 2699};
2728 2700
2729 2701struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2730static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2731{ 2702{
2732 struct rtable *ort = *rp; 2703 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1);
2733 struct rtable *rt = (struct rtable *) 2704 struct rtable *ort = (struct rtable *) dst_orig;
2734 dst_alloc(&ipv4_dst_blackhole_ops);
2735 2705
2736 if (rt) { 2706 if (rt) {
2737 struct dst_entry *new = &rt->dst; 2707 struct dst_entry *new = &rt->dst;
2738 2708
2739 atomic_set(&new->__refcnt, 1);
2740 new->__use = 1; 2709 new->__use = 1;
2741 new->input = dst_discard; 2710 new->input = dst_discard;
2742 new->output = dst_discard; 2711 new->output = dst_discard;
@@ -2746,7 +2715,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2746 if (new->dev) 2715 if (new->dev)
2747 dev_hold(new->dev); 2716 dev_hold(new->dev);
2748 2717
2749 rt->fl = ort->fl; 2718 rt->rt_key_dst = ort->rt_key_dst;
2719 rt->rt_key_src = ort->rt_key_src;
2720 rt->rt_tos = ort->rt_tos;
2721 rt->rt_iif = ort->rt_iif;
2722 rt->rt_oif = ort->rt_oif;
2723 rt->rt_mark = ort->rt_mark;
2750 2724
2751 rt->rt_genid = rt_genid(net); 2725 rt->rt_genid = rt_genid(net);
2752 rt->rt_flags = ort->rt_flags; 2726 rt->rt_flags = ort->rt_flags;
@@ -2759,46 +2733,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2759 rt->peer = ort->peer; 2733 rt->peer = ort->peer;
2760 if (rt->peer) 2734 if (rt->peer)
2761 atomic_inc(&rt->peer->refcnt); 2735 atomic_inc(&rt->peer->refcnt);
2736 rt->fi = ort->fi;
2737 if (rt->fi)
2738 atomic_inc(&rt->fi->fib_clntref);
2762 2739
2763 dst_free(new); 2740 dst_free(new);
2764 } 2741 }
2765 2742
2766 dst_release(&(*rp)->dst); 2743 dst_release(dst_orig);
2767 *rp = rt; 2744
2768 return rt ? 0 : -ENOMEM; 2745 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2769} 2746}
2770 2747
2771int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2748struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2772 struct sock *sk, int flags) 2749 struct sock *sk)
2773{ 2750{
2774 int err; 2751 struct rtable *rt = __ip_route_output_key(net, flp4);
2775 2752
2776 if ((err = __ip_route_output_key(net, rp, flp)) != 0) 2753 if (IS_ERR(rt))
2777 return err; 2754 return rt;
2778 2755
2779 if (flp->proto) { 2756 if (flp4->flowi4_proto) {
2780 if (!flp->fl4_src) 2757 if (!flp4->saddr)
2781 flp->fl4_src = (*rp)->rt_src; 2758 flp4->saddr = rt->rt_src;
2782 if (!flp->fl4_dst) 2759 if (!flp4->daddr)
2783 flp->fl4_dst = (*rp)->rt_dst; 2760 flp4->daddr = rt->rt_dst;
2784 err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 2761 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2785 flags ? XFRM_LOOKUP_WAIT : 0); 2762 flowi4_to_flowi(flp4),
2786 if (err == -EREMOTE) 2763 sk, 0);
2787 err = ipv4_dst_blackhole(net, rp, flp);
2788
2789 return err;
2790 } 2764 }
2791 2765
2792 return 0; 2766 return rt;
2793} 2767}
2794EXPORT_SYMBOL_GPL(ip_route_output_flow); 2768EXPORT_SYMBOL_GPL(ip_route_output_flow);
2795 2769
2796int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2797{
2798 return ip_route_output_flow(net, rp, flp, NULL, 0);
2799}
2800EXPORT_SYMBOL(ip_route_output_key);
2801
2802static int rt_fill_info(struct net *net, 2770static int rt_fill_info(struct net *net,
2803 struct sk_buff *skb, u32 pid, u32 seq, int event, 2771 struct sk_buff *skb, u32 pid, u32 seq, int event,
2804 int nowait, unsigned int flags) 2772 int nowait, unsigned int flags)
@@ -2817,7 +2785,7 @@ static int rt_fill_info(struct net *net,
2817 r->rtm_family = AF_INET; 2785 r->rtm_family = AF_INET;
2818 r->rtm_dst_len = 32; 2786 r->rtm_dst_len = 32;
2819 r->rtm_src_len = 0; 2787 r->rtm_src_len = 0;
2820 r->rtm_tos = rt->fl.fl4_tos; 2788 r->rtm_tos = rt->rt_tos;
2821 r->rtm_table = RT_TABLE_MAIN; 2789 r->rtm_table = RT_TABLE_MAIN;
2822 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2790 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2823 r->rtm_type = rt->rt_type; 2791 r->rtm_type = rt->rt_type;
@@ -2829,19 +2797,19 @@ static int rt_fill_info(struct net *net,
2829 2797
2830 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); 2798 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2831 2799
2832 if (rt->fl.fl4_src) { 2800 if (rt->rt_key_src) {
2833 r->rtm_src_len = 32; 2801 r->rtm_src_len = 32;
2834 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2802 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
2835 } 2803 }
2836 if (rt->dst.dev) 2804 if (rt->dst.dev)
2837 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2805 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2838#ifdef CONFIG_NET_CLS_ROUTE 2806#ifdef CONFIG_IP_ROUTE_CLASSID
2839 if (rt->dst.tclassid) 2807 if (rt->dst.tclassid)
2840 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2808 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2841#endif 2809#endif
2842 if (rt_is_input_route(rt)) 2810 if (rt_is_input_route(rt))
2843 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2811 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2844 else if (rt->rt_src != rt->fl.fl4_src) 2812 else if (rt->rt_src != rt->rt_key_src)
2845 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2813 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2846 2814
2847 if (rt->rt_dst != rt->rt_gateway) 2815 if (rt->rt_dst != rt->rt_gateway)
@@ -2850,11 +2818,12 @@ static int rt_fill_info(struct net *net,
2850 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2818 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2851 goto nla_put_failure; 2819 goto nla_put_failure;
2852 2820
2853 if (rt->fl.mark) 2821 if (rt->rt_mark)
2854 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); 2822 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2855 2823
2856 error = rt->dst.error; 2824 error = rt->dst.error;
2857 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; 2825 expires = (rt->peer && rt->peer->pmtu_expires) ?
2826 rt->peer->pmtu_expires - jiffies : 0;
2858 if (rt->peer) { 2827 if (rt->peer) {
2859 inet_peer_refcheck(rt->peer); 2828 inet_peer_refcheck(rt->peer);
2860 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2829 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
@@ -2884,7 +2853,7 @@ static int rt_fill_info(struct net *net,
2884 } 2853 }
2885 } else 2854 } else
2886#endif 2855#endif
2887 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2856 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
2888 } 2857 }
2889 2858
2890 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 2859 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -2958,14 +2927,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2958 if (err == 0 && rt->dst.error) 2927 if (err == 0 && rt->dst.error)
2959 err = -rt->dst.error; 2928 err = -rt->dst.error;
2960 } else { 2929 } else {
2961 struct flowi fl = { 2930 struct flowi4 fl4 = {
2962 .fl4_dst = dst, 2931 .daddr = dst,
2963 .fl4_src = src, 2932 .saddr = src,
2964 .fl4_tos = rtm->rtm_tos, 2933 .flowi4_tos = rtm->rtm_tos,
2965 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2934 .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2966 .mark = mark, 2935 .flowi4_mark = mark,
2967 }; 2936 };
2968 err = ip_route_output_key(net, &rt, &fl); 2937 rt = ip_route_output_key(net, &fl4);
2938
2939 err = 0;
2940 if (IS_ERR(rt))
2941 err = PTR_ERR(rt);
2969 } 2942 }
2970 2943
2971 if (err) 2944 if (err)
@@ -3256,9 +3229,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3256}; 3229};
3257 3230
3258 3231
3259#ifdef CONFIG_NET_CLS_ROUTE 3232#ifdef CONFIG_IP_ROUTE_CLASSID
3260struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3233struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3261#endif /* CONFIG_NET_CLS_ROUTE */ 3234#endif /* CONFIG_IP_ROUTE_CLASSID */
3262 3235
3263static __initdata unsigned long rhash_entries; 3236static __initdata unsigned long rhash_entries;
3264static int __init set_rhash_entries(char *str) 3237static int __init set_rhash_entries(char *str)
@@ -3274,7 +3247,7 @@ int __init ip_rt_init(void)
3274{ 3247{
3275 int rc = 0; 3248 int rc = 0;
3276 3249
3277#ifdef CONFIG_NET_CLS_ROUTE 3250#ifdef CONFIG_IP_ROUTE_CLASSID
3278 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3251 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3279 if (!ip_rt_acct) 3252 if (!ip_rt_acct)
3280 panic("IP: failed to allocate ip_rt_acct\n"); 3253 panic("IP: failed to allocate ip_rt_acct\n");
@@ -3311,14 +3284,6 @@ int __init ip_rt_init(void)
3311 devinet_init(); 3284 devinet_init();
3312 ip_fib_init(); 3285 ip_fib_init();
3313 3286
3314 /* All the timers, started at system startup tend
3315 to synchronize. Perturb it a bit.
3316 */
3317 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3318 expires_ljiffies = jiffies;
3319 schedule_delayed_work(&expires_work,
3320 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3321
3322 if (ip_rt_proc_init()) 3287 if (ip_rt_proc_init())
3323 printk(KERN_ERR "Unable to create route proc files\n"); 3288 printk(KERN_ERR "Unable to create route proc files\n");
3324#ifdef CONFIG_XFRM 3289#ifdef CONFIG_XFRM