aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c1200
1 files changed, 588 insertions, 612 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603c2f6d..99e6e4bb1c72 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,8 +109,8 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111 111
112#define RT_FL_TOS(oldflp) \ 112#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114 114
115#define IP_MAX_MTU 0xFFF0 115#define IP_MAX_MTU 0xFFF0
116 116
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 131static int ip_rt_min_advmss __read_mostly = 256;
132static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
133 133
134static struct delayed_work expires_work;
135static unsigned long expires_ljiffies;
136
137/* 134/*
138 * Interface to generic destination cache. 135 * Interface to generic destination cache.
139 */ 136 */
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152{ 149{
153} 150}
154 151
152static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
153{
154 struct rtable *rt = (struct rtable *) dst;
155 struct inet_peer *peer;
156 u32 *p = NULL;
157
158 if (!rt->peer)
159 rt_bind_peer(rt, 1);
160
161 peer = rt->peer;
162 if (peer) {
163 u32 *old_p = __DST_METRICS_PTR(old);
164 unsigned long prev, new;
165
166 p = peer->metrics;
167 if (inet_metrics_new(peer))
168 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
169
170 new = (unsigned long) p;
171 prev = cmpxchg(&dst->_metrics, old, new);
172
173 if (prev != old) {
174 p = __DST_METRICS_PTR(prev);
175 if (prev & DST_METRICS_READ_ONLY)
176 p = NULL;
177 } else {
178 if (rt->fi) {
179 fib_info_put(rt->fi);
180 rt->fi = NULL;
181 }
182 }
183 }
184 return p;
185}
186
155static struct dst_ops ipv4_dst_ops = { 187static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET, 188 .family = AF_INET,
157 .protocol = cpu_to_be16(ETH_P_IP), 189 .protocol = cpu_to_be16(ETH_P_IP),
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = {
159 .check = ipv4_dst_check, 191 .check = ipv4_dst_check,
160 .default_advmss = ipv4_default_advmss, 192 .default_advmss = ipv4_default_advmss,
161 .default_mtu = ipv4_default_mtu, 193 .default_mtu = ipv4_default_mtu,
194 .cow_metrics = ipv4_cow_metrics,
162 .destroy = ipv4_dst_destroy, 195 .destroy = ipv4_dst_destroy,
163 .ifdown = ipv4_dst_ifdown, 196 .ifdown = ipv4_dst_ifdown,
164 .negative_advice = ipv4_negative_advice, 197 .negative_advice = ipv4_negative_advice,
@@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = {
171 204
172const __u8 ip_tos2prio[16] = { 205const __u8 ip_tos2prio[16] = {
173 TC_PRIO_BESTEFFORT, 206 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(FILLER), 207 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BESTEFFORT, 208 TC_PRIO_BESTEFFORT,
176 ECN_OR_COST(BESTEFFORT), 209 ECN_OR_COST(BESTEFFORT),
177 TC_PRIO_BULK, 210 TC_PRIO_BULK,
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
391 dst_metric(&r->dst, RTAX_WINDOW), 424 dst_metric(&r->dst, RTAX_WINDOW),
392 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
393 dst_metric(&r->dst, RTAX_RTTVAR)), 426 dst_metric(&r->dst, RTAX_RTTVAR)),
394 r->fl.fl4_tos, 427 r->rt_tos,
395 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
396 r->dst.hh ? (r->dst.hh->hh_output == 429 r->dst.hh ? (r->dst.hh->hh_output ==
397 dev_queue_xmit) : 0, 430 dev_queue_xmit) : 0,
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = {
514 .release = seq_release, 547 .release = seq_release,
515}; 548};
516 549
517#ifdef CONFIG_NET_CLS_ROUTE 550#ifdef CONFIG_IP_ROUTE_CLASSID
518static int rt_acct_proc_show(struct seq_file *m, void *v) 551static int rt_acct_proc_show(struct seq_file *m, void *v)
519{ 552{
520 struct ip_rt_acct *dst, *src; 553 struct ip_rt_acct *dst, *src;
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 if (!pde) 600 if (!pde)
568 goto err2; 601 goto err2;
569 602
570#ifdef CONFIG_NET_CLS_ROUTE 603#ifdef CONFIG_IP_ROUTE_CLASSID
571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 604 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572 if (!pde) 605 if (!pde)
573 goto err3; 606 goto err3;
574#endif 607#endif
575 return 0; 608 return 0;
576 609
577#ifdef CONFIG_NET_CLS_ROUTE 610#ifdef CONFIG_IP_ROUTE_CLASSID
578err3: 611err3:
579 remove_proc_entry("rt_cache", net->proc_net_stat); 612 remove_proc_entry("rt_cache", net->proc_net_stat);
580#endif 613#endif
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 621{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 622 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 623 remove_proc_entry("rt_cache", net->proc_net);
591#ifdef CONFIG_NET_CLS_ROUTE 624#ifdef CONFIG_IP_ROUTE_CLASSID
592 remove_proc_entry("rt_acct", net->proc_net); 625 remove_proc_entry("rt_acct", net->proc_net);
593#endif 626#endif
594} 627}
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth)
632static inline int rt_valuable(struct rtable *rth) 665static inline int rt_valuable(struct rtable *rth)
633{ 666{
634 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 667 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
635 rth->dst.expires; 668 (rth->peer && rth->peer->pmtu_expires);
636} 669}
637 670
638static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 671static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
643 if (atomic_read(&rth->dst.__refcnt)) 676 if (atomic_read(&rth->dst.__refcnt))
644 goto out; 677 goto out;
645 678
646 ret = 1;
647 if (rth->dst.expires &&
648 time_after_eq(jiffies, rth->dst.expires))
649 goto out;
650
651 age = jiffies - rth->dst.lastuse; 679 age = jiffies - rth->dst.lastuse;
652 ret = 0;
653 if ((age <= tmo1 && !rt_fast_clean(rth)) || 680 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
654 (age <= tmo2 && rt_valuable(rth))) 681 (age <= tmo2 && rt_valuable(rth)))
655 goto out; 682 goto out;
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net)
684 net->ipv4.sysctl_rt_cache_rebuild_count; 711 net->ipv4.sysctl_rt_cache_rebuild_count;
685} 712}
686 713
687static inline bool compare_hash_inputs(const struct flowi *fl1, 714static inline bool compare_hash_inputs(const struct rtable *rt1,
688 const struct flowi *fl2) 715 const struct rtable *rt2)
689{ 716{
690 return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 717 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
691 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 718 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
692 (fl1->iif ^ fl2->iif)) == 0); 719 (rt1->rt_iif ^ rt2->rt_iif)) == 0);
693} 720}
694 721
695static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 722static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
696{ 723{
697 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | 724 return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
698 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | 725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
699 (fl1->mark ^ fl2->mark) | 726 (rt1->rt_mark ^ rt2->rt_mark) |
700 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | 727 (rt1->rt_tos ^ rt2->rt_tos) |
701 (fl1->oif ^ fl2->oif) | 728 (rt1->rt_oif ^ rt2->rt_oif) |
702 (fl1->iif ^ fl2->iif)) == 0; 729 (rt1->rt_iif ^ rt2->rt_iif)) == 0;
703} 730}
704 731
705static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 732static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -786,106 +813,15 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
786 const struct rtable *aux = head; 813 const struct rtable *aux = head;
787 814
788 while (aux != rth) { 815 while (aux != rth) {
789 if (compare_hash_inputs(&aux->fl, &rth->fl)) 816 if (compare_hash_inputs(aux, rth))
790 return 0; 817 return 0;
791 aux = rcu_dereference_protected(aux->dst.rt_next, 1); 818 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
792 } 819 }
793 return ONE; 820 return ONE;
794} 821}
795 822
796static void rt_check_expire(void)
797{
798 static unsigned int rover;
799 unsigned int i = rover, goal;
800 struct rtable *rth;
801 struct rtable __rcu **rthp;
802 unsigned long samples = 0;
803 unsigned long sum = 0, sum2 = 0;
804 unsigned long delta;
805 u64 mult;
806
807 delta = jiffies - expires_ljiffies;
808 expires_ljiffies = jiffies;
809 mult = ((u64)delta) << rt_hash_log;
810 if (ip_rt_gc_timeout > 1)
811 do_div(mult, ip_rt_gc_timeout);
812 goal = (unsigned int)mult;
813 if (goal > rt_hash_mask)
814 goal = rt_hash_mask + 1;
815 for (; goal > 0; goal--) {
816 unsigned long tmo = ip_rt_gc_timeout;
817 unsigned long length;
818
819 i = (i + 1) & rt_hash_mask;
820 rthp = &rt_hash_table[i].chain;
821
822 if (need_resched())
823 cond_resched();
824
825 samples++;
826
827 if (rcu_dereference_raw(*rthp) == NULL)
828 continue;
829 length = 0;
830 spin_lock_bh(rt_hash_lock_addr(i));
831 while ((rth = rcu_dereference_protected(*rthp,
832 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
833 prefetch(rth->dst.rt_next);
834 if (rt_is_expired(rth)) {
835 *rthp = rth->dst.rt_next;
836 rt_free(rth);
837 continue;
838 }
839 if (rth->dst.expires) {
840 /* Entry is expired even if it is in use */
841 if (time_before_eq(jiffies, rth->dst.expires)) {
842nofree:
843 tmo >>= 1;
844 rthp = &rth->dst.rt_next;
845 /*
846 * We only count entries on
847 * a chain with equal hash inputs once
848 * so that entries for different QOS
849 * levels, and other non-hash input
850 * attributes don't unfairly skew
851 * the length computation
852 */
853 length += has_noalias(rt_hash_table[i].chain, rth);
854 continue;
855 }
856 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
857 goto nofree;
858
859 /* Cleanup aged off entries. */
860 *rthp = rth->dst.rt_next;
861 rt_free(rth);
862 }
863 spin_unlock_bh(rt_hash_lock_addr(i));
864 sum += length;
865 sum2 += length*length;
866 }
867 if (samples) {
868 unsigned long avg = sum / samples;
869 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
870 rt_chain_length_max = max_t(unsigned long,
871 ip_rt_gc_elasticity,
872 (avg + 4*sd) >> FRACT_BITS);
873 }
874 rover = i;
875}
876
877/*
878 * rt_worker_func() is run in process context.
879 * we call rt_check_expire() to scan part of the hash table
880 */
881static void rt_worker_func(struct work_struct *work)
882{
883 rt_check_expire();
884 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
885}
886
887/* 823/*
888 * Pertubation of rt_genid by a small quantity [1..256] 824 * Perturbation of rt_genid by a small quantity [1..256]
889 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 825 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
890 * many times (2^24) without giving recent rt_genid. 826 * many times (2^24) without giving recent rt_genid.
891 * Jenkins hash is strong enough that litle changes of rt_genid are OK. 827 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head)
1078 return length >> FRACT_BITS; 1014 return length >> FRACT_BITS;
1079} 1015}
1080 1016
1081static int rt_intern_hash(unsigned hash, struct rtable *rt, 1017static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1082 struct rtable **rp, struct sk_buff *skb, int ifindex) 1018 struct sk_buff *skb, int ifindex)
1083{ 1019{
1084 struct rtable *rth, *cand; 1020 struct rtable *rth, *cand;
1085 struct rtable __rcu **rthp, **candp; 1021 struct rtable __rcu **rthp, **candp;
@@ -1120,7 +1056,7 @@ restart:
1120 printk(KERN_WARNING 1056 printk(KERN_WARNING
1121 "Neighbour table failure & not caching routes.\n"); 1057 "Neighbour table failure & not caching routes.\n");
1122 ip_rt_put(rt); 1058 ip_rt_put(rt);
1123 return err; 1059 return ERR_PTR(err);
1124 } 1060 }
1125 } 1061 }
1126 1062
@@ -1137,7 +1073,7 @@ restart:
1137 rt_free(rth); 1073 rt_free(rth);
1138 continue; 1074 continue;
1139 } 1075 }
1140 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1076 if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
1141 /* Put it first */ 1077 /* Put it first */
1142 *rthp = rth->dst.rt_next; 1078 *rthp = rth->dst.rt_next;
1143 /* 1079 /*
@@ -1157,11 +1093,9 @@ restart:
1157 spin_unlock_bh(rt_hash_lock_addr(hash)); 1093 spin_unlock_bh(rt_hash_lock_addr(hash));
1158 1094
1159 rt_drop(rt); 1095 rt_drop(rt);
1160 if (rp) 1096 if (skb)
1161 *rp = rth;
1162 else
1163 skb_dst_set(skb, &rth->dst); 1097 skb_dst_set(skb, &rth->dst);
1164 return 0; 1098 return rth;
1165 } 1099 }
1166 1100
1167 if (!atomic_read(&rth->dst.__refcnt)) { 1101 if (!atomic_read(&rth->dst.__refcnt)) {
@@ -1202,7 +1136,7 @@ restart:
1202 rt_emergency_hash_rebuild(net); 1136 rt_emergency_hash_rebuild(net);
1203 spin_unlock_bh(rt_hash_lock_addr(hash)); 1137 spin_unlock_bh(rt_hash_lock_addr(hash));
1204 1138
1205 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1139 hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1206 ifindex, rt_genid(net)); 1140 ifindex, rt_genid(net));
1207 goto restart; 1141 goto restart;
1208 } 1142 }
@@ -1218,7 +1152,7 @@ restart:
1218 1152
1219 if (err != -ENOBUFS) { 1153 if (err != -ENOBUFS) {
1220 rt_drop(rt); 1154 rt_drop(rt);
1221 return err; 1155 return ERR_PTR(err);
1222 } 1156 }
1223 1157
1224 /* Neighbour tables are full and nothing 1158 /* Neighbour tables are full and nothing
@@ -1239,7 +1173,7 @@ restart:
1239 if (net_ratelimit()) 1173 if (net_ratelimit())
1240 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); 1174 printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
1241 rt_drop(rt); 1175 rt_drop(rt);
1242 return -ENOBUFS; 1176 return ERR_PTR(-ENOBUFS);
1243 } 1177 }
1244 } 1178 }
1245 1179
@@ -1257,7 +1191,7 @@ restart:
1257#endif 1191#endif
1258 /* 1192 /*
1259 * Since lookup is lockfree, we must make sure 1193 * Since lookup is lockfree, we must make sure
1260 * previous writes to rt are comitted to memory 1194 * previous writes to rt are committed to memory
1261 * before making rt visible to other CPUS. 1195 * before making rt visible to other CPUS.
1262 */ 1196 */
1263 rcu_assign_pointer(rt_hash_table[hash].chain, rt); 1197 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
@@ -1265,11 +1199,16 @@ restart:
1265 spin_unlock_bh(rt_hash_lock_addr(hash)); 1199 spin_unlock_bh(rt_hash_lock_addr(hash));
1266 1200
1267skip_hashing: 1201skip_hashing:
1268 if (rp) 1202 if (skb)
1269 *rp = rt;
1270 else
1271 skb_dst_set(skb, &rt->dst); 1203 skb_dst_set(skb, &rt->dst);
1272 return 0; 1204 return rt;
1205}
1206
1207static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
1208
1209static u32 rt_peer_genid(void)
1210{
1211 return atomic_read(&__rt_peer_genid);
1273} 1212}
1274 1213
1275void rt_bind_peer(struct rtable *rt, int create) 1214void rt_bind_peer(struct rtable *rt, int create)
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create)
1280 1219
1281 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1220 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1282 inet_putpeer(peer); 1221 inet_putpeer(peer);
1222 else
1223 rt->rt_peer_genid = rt_peer_genid();
1283} 1224}
1284 1225
1285/* 1226/*
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt)
1349void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1290void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350 __be32 saddr, struct net_device *dev) 1291 __be32 saddr, struct net_device *dev)
1351{ 1292{
1352 int i, k;
1353 struct in_device *in_dev = __in_dev_get_rcu(dev); 1293 struct in_device *in_dev = __in_dev_get_rcu(dev);
1354 struct rtable *rth; 1294 struct inet_peer *peer;
1355 struct rtable __rcu **rthp;
1356 __be32 skeys[2] = { saddr, 0 };
1357 int ikeys[2] = { dev->ifindex, 0 };
1358 struct netevent_redirect netevent;
1359 struct net *net; 1295 struct net *net;
1360 1296
1361 if (!in_dev) 1297 if (!in_dev)
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1367 ipv4_is_zeronet(new_gw)) 1303 ipv4_is_zeronet(new_gw))
1368 goto reject_redirect; 1304 goto reject_redirect;
1369 1305
1370 if (!rt_caching(net))
1371 goto reject_redirect;
1372
1373 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1306 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1374 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1307 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1375 goto reject_redirect; 1308 goto reject_redirect;
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1380 goto reject_redirect; 1313 goto reject_redirect;
1381 } 1314 }
1382 1315
1383 for (i = 0; i < 2; i++) { 1316 peer = inet_getpeer_v4(daddr, 1);
1384 for (k = 0; k < 2; k++) { 1317 if (peer) {
1385 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1318 peer->redirect_learned.a4 = new_gw;
1386 rt_genid(net));
1387
1388 rthp = &rt_hash_table[hash].chain;
1389
1390 while ((rth = rcu_dereference(*rthp)) != NULL) {
1391 struct rtable *rt;
1392
1393 if (rth->fl.fl4_dst != daddr ||
1394 rth->fl.fl4_src != skeys[i] ||
1395 rth->fl.oif != ikeys[k] ||
1396 rt_is_input_route(rth) ||
1397 rt_is_expired(rth) ||
1398 !net_eq(dev_net(rth->dst.dev), net)) {
1399 rthp = &rth->dst.rt_next;
1400 continue;
1401 }
1402
1403 if (rth->rt_dst != daddr ||
1404 rth->rt_src != saddr ||
1405 rth->dst.error ||
1406 rth->rt_gateway != old_gw ||
1407 rth->dst.dev != dev)
1408 break;
1409
1410 dst_hold(&rth->dst);
1411
1412 rt = dst_alloc(&ipv4_dst_ops);
1413 if (rt == NULL) {
1414 ip_rt_put(rth);
1415 return;
1416 }
1417
1418 /* Copy all the information. */
1419 *rt = *rth;
1420 rt->dst.__use = 1;
1421 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->dst.child = NULL;
1423 if (rt->dst.dev)
1424 dev_hold(rt->dst.dev);
1425 rt->dst.obsolete = -1;
1426 rt->dst.lastuse = jiffies;
1427 rt->dst.path = &rt->dst;
1428 rt->dst.neighbour = NULL;
1429 rt->dst.hh = NULL;
1430#ifdef CONFIG_XFRM
1431 rt->dst.xfrm = NULL;
1432#endif
1433 rt->rt_genid = rt_genid(net);
1434 rt->rt_flags |= RTCF_REDIRECTED;
1435
1436 /* Gateway is different ... */
1437 rt->rt_gateway = new_gw;
1438
1439 /* Redirect received -> path was valid */
1440 dst_confirm(&rth->dst);
1441
1442 if (rt->peer)
1443 atomic_inc(&rt->peer->refcnt);
1444
1445 if (arp_bind_neighbour(&rt->dst) ||
1446 !(rt->dst.neighbour->nud_state &
1447 NUD_VALID)) {
1448 if (rt->dst.neighbour)
1449 neigh_event_send(rt->dst.neighbour, NULL);
1450 ip_rt_put(rth);
1451 rt_drop(rt);
1452 goto do_next;
1453 }
1454 1319
1455 netevent.old = &rth->dst; 1320 inet_putpeer(peer);
1456 netevent.new = &rt->dst;
1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1458 &netevent);
1459 1321
1460 rt_del(hash, rth); 1322 atomic_inc(&__rt_peer_genid);
1461 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1462 ip_rt_put(rt);
1463 goto do_next;
1464 }
1465 do_next:
1466 ;
1467 }
1468 } 1323 }
1469 return; 1324 return;
1470 1325
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1488 if (dst->obsolete > 0) { 1343 if (dst->obsolete > 0) {
1489 ip_rt_put(rt); 1344 ip_rt_put(rt);
1490 ret = NULL; 1345 ret = NULL;
1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1346 } else if (rt->rt_flags & RTCF_REDIRECTED) {
1492 (rt->dst.expires && 1347 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1493 time_after_eq(jiffies, rt->dst.expires))) { 1348 rt->rt_oif,
1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1495 rt->fl.oif,
1496 rt_genid(dev_net(dst->dev))); 1349 rt_genid(dev_net(dst->dev)));
1497#if RT_CACHE_DEBUG >= 1 1350#if RT_CACHE_DEBUG >= 1
1498 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", 1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1499 &rt->rt_dst, rt->fl.fl4_tos); 1352 &rt->rt_dst, rt->rt_tos);
1500#endif 1353#endif
1501 rt_del(hash, rt); 1354 rt_del(hash, rt);
1502 ret = NULL; 1355 ret = NULL;
1356 } else if (rt->peer &&
1357 rt->peer->pmtu_expires &&
1358 time_after_eq(jiffies, rt->peer->pmtu_expires)) {
1359 unsigned long orig = rt->peer->pmtu_expires;
1360
1361 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1362 dst_metric_set(dst, RTAX_MTU,
1363 rt->peer->pmtu_orig);
1503 } 1364 }
1504 } 1365 }
1505 return ret; 1366 return ret;
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1525{ 1386{
1526 struct rtable *rt = skb_rtable(skb); 1387 struct rtable *rt = skb_rtable(skb);
1527 struct in_device *in_dev; 1388 struct in_device *in_dev;
1389 struct inet_peer *peer;
1528 int log_martians; 1390 int log_martians;
1529 1391
1530 rcu_read_lock(); 1392 rcu_read_lock();
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1536 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1398 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1537 rcu_read_unlock(); 1399 rcu_read_unlock();
1538 1400
1401 if (!rt->peer)
1402 rt_bind_peer(rt, 1);
1403 peer = rt->peer;
1404 if (!peer) {
1405 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1406 return;
1407 }
1408
1539 /* No redirected packets during ip_rt_redirect_silence; 1409 /* No redirected packets during ip_rt_redirect_silence;
1540 * reset the algorithm. 1410 * reset the algorithm.
1541 */ 1411 */
1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1412 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1543 rt->dst.rate_tokens = 0; 1413 peer->rate_tokens = 0;
1544 1414
1545 /* Too many ignored redirects; do not send anything 1415 /* Too many ignored redirects; do not send anything
1546 * set dst.rate_last to the last seen redirected packet. 1416 * set dst.rate_last to the last seen redirected packet.
1547 */ 1417 */
1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1418 if (peer->rate_tokens >= ip_rt_redirect_number) {
1549 rt->dst.rate_last = jiffies; 1419 peer->rate_last = jiffies;
1550 return; 1420 return;
1551 } 1421 }
1552 1422
1553 /* Check for load limit; set rate_last to the latest sent 1423 /* Check for load limit; set rate_last to the latest sent
1554 * redirect. 1424 * redirect.
1555 */ 1425 */
1556 if (rt->dst.rate_tokens == 0 || 1426 if (peer->rate_tokens == 0 ||
1557 time_after(jiffies, 1427 time_after(jiffies,
1558 (rt->dst.rate_last + 1428 (peer->rate_last +
1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1429 (ip_rt_redirect_load << peer->rate_tokens)))) {
1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1430 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1561 rt->dst.rate_last = jiffies; 1431 peer->rate_last = jiffies;
1562 ++rt->dst.rate_tokens; 1432 ++peer->rate_tokens;
1563#ifdef CONFIG_IP_ROUTE_VERBOSE 1433#ifdef CONFIG_IP_ROUTE_VERBOSE
1564 if (log_martians && 1434 if (log_martians &&
1565 rt->dst.rate_tokens == ip_rt_redirect_number && 1435 peer->rate_tokens == ip_rt_redirect_number &&
1566 net_ratelimit()) 1436 net_ratelimit())
1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1437 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1568 &rt->rt_src, rt->rt_iif, 1438 &rt->rt_src, rt->rt_iif,
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1574static int ip_error(struct sk_buff *skb) 1444static int ip_error(struct sk_buff *skb)
1575{ 1445{
1576 struct rtable *rt = skb_rtable(skb); 1446 struct rtable *rt = skb_rtable(skb);
1447 struct inet_peer *peer;
1577 unsigned long now; 1448 unsigned long now;
1449 bool send;
1578 int code; 1450 int code;
1579 1451
1580 switch (rt->dst.error) { 1452 switch (rt->dst.error) {
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb)
1594 break; 1466 break;
1595 } 1467 }
1596 1468
1597 now = jiffies; 1469 if (!rt->peer)
1598 rt->dst.rate_tokens += now - rt->dst.rate_last; 1470 rt_bind_peer(rt, 1);
1599 if (rt->dst.rate_tokens > ip_rt_error_burst) 1471 peer = rt->peer;
1600 rt->dst.rate_tokens = ip_rt_error_burst; 1472
1601 rt->dst.rate_last = now; 1473 send = true;
1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1474 if (peer) {
1603 rt->dst.rate_tokens -= ip_rt_error_cost; 1475 now = jiffies;
1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1476 peer->rate_tokens += now - peer->rate_last;
1477 if (peer->rate_tokens > ip_rt_error_burst)
1478 peer->rate_tokens = ip_rt_error_burst;
1479 peer->rate_last = now;
1480 if (peer->rate_tokens >= ip_rt_error_cost)
1481 peer->rate_tokens -= ip_rt_error_cost;
1482 else
1483 send = false;
1605 } 1484 }
1485 if (send)
1486 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1606 1487
1607out: kfree_skb(skb); 1488out: kfree_skb(skb);
1608 return 0; 1489 return 0;
@@ -1630,88 +1511,140 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1630 unsigned short new_mtu, 1511 unsigned short new_mtu,
1631 struct net_device *dev) 1512 struct net_device *dev)
1632{ 1513{
1633 int i, k;
1634 unsigned short old_mtu = ntohs(iph->tot_len); 1514 unsigned short old_mtu = ntohs(iph->tot_len);
1635 struct rtable *rth;
1636 int ikeys[2] = { dev->ifindex, 0 };
1637 __be32 skeys[2] = { iph->saddr, 0, };
1638 __be32 daddr = iph->daddr;
1639 unsigned short est_mtu = 0; 1515 unsigned short est_mtu = 0;
1516 struct inet_peer *peer;
1640 1517
1641 for (k = 0; k < 2; k++) { 1518 peer = inet_getpeer_v4(iph->daddr, 1);
1642 for (i = 0; i < 2; i++) { 1519 if (peer) {
1643 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1520 unsigned short mtu = new_mtu;
1644 rt_genid(net));
1645
1646 rcu_read_lock();
1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1648 rth = rcu_dereference(rth->dst.rt_next)) {
1649 unsigned short mtu = new_mtu;
1650
1651 if (rth->fl.fl4_dst != daddr ||
1652 rth->fl.fl4_src != skeys[i] ||
1653 rth->rt_dst != daddr ||
1654 rth->rt_src != iph->saddr ||
1655 rth->fl.oif != ikeys[k] ||
1656 rt_is_input_route(rth) ||
1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1658 !net_eq(dev_net(rth->dst.dev), net) ||
1659 rt_is_expired(rth))
1660 continue;
1661 1521
1662 if (new_mtu < 68 || new_mtu >= old_mtu) { 1522 if (new_mtu < 68 || new_mtu >= old_mtu) {
1523 /* BSD 4.2 derived systems incorrectly adjust
1524 * tot_len by the IP header length, and report
1525 * a zero MTU in the ICMP message.
1526 */
1527 if (mtu == 0 &&
1528 old_mtu >= 68 + (iph->ihl << 2))
1529 old_mtu -= iph->ihl << 2;
1530 mtu = guess_mtu(old_mtu);
1531 }
1663 1532
1664 /* BSD 4.2 compatibility hack :-( */ 1533 if (mtu < ip_rt_min_pmtu)
1665 if (mtu == 0 && 1534 mtu = ip_rt_min_pmtu;
1666 old_mtu >= dst_mtu(&rth->dst) && 1535 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1667 old_mtu >= 68 + (iph->ihl << 2)) 1536 unsigned long pmtu_expires;
1668 old_mtu -= iph->ihl << 2;
1669 1537
1670 mtu = guess_mtu(old_mtu); 1538 pmtu_expires = jiffies + ip_rt_mtu_expires;
1671 } 1539 if (!pmtu_expires)
1672 if (mtu <= dst_mtu(&rth->dst)) { 1540 pmtu_expires = 1UL;
1673 if (mtu < dst_mtu(&rth->dst)) { 1541
1674 dst_confirm(&rth->dst); 1542 est_mtu = mtu;
1675 if (mtu < ip_rt_min_pmtu) { 1543 peer->pmtu_learned = mtu;
1676 u32 lock = dst_metric(&rth->dst, 1544 peer->pmtu_expires = pmtu_expires;
1677 RTAX_LOCK);
1678 mtu = ip_rt_min_pmtu;
1679 lock |= (1 << RTAX_MTU);
1680 dst_metric_set(&rth->dst, RTAX_LOCK,
1681 lock);
1682 }
1683 dst_metric_set(&rth->dst, RTAX_MTU, mtu);
1684 dst_set_expires(&rth->dst,
1685 ip_rt_mtu_expires);
1686 }
1687 est_mtu = mtu;
1688 }
1689 }
1690 rcu_read_unlock();
1691 } 1545 }
1546
1547 inet_putpeer(peer);
1548
1549 atomic_inc(&__rt_peer_genid);
1692 } 1550 }
1693 return est_mtu ? : new_mtu; 1551 return est_mtu ? : new_mtu;
1694} 1552}
1695 1553
1554static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1555{
1556 unsigned long expires = peer->pmtu_expires;
1557
1558 if (time_before(jiffies, expires)) {
1559 u32 orig_dst_mtu = dst_mtu(dst);
1560 if (peer->pmtu_learned < orig_dst_mtu) {
1561 if (!peer->pmtu_orig)
1562 peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
1563 dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
1564 }
1565 } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
1566 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1567}
1568
1696static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1569static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1697{ 1570{
1698 if (dst_mtu(dst) > mtu && mtu >= 68 && 1571 struct rtable *rt = (struct rtable *) dst;
1699 !(dst_metric_locked(dst, RTAX_MTU))) { 1572 struct inet_peer *peer;
1700 if (mtu < ip_rt_min_pmtu) { 1573
1701 u32 lock = dst_metric(dst, RTAX_LOCK); 1574 dst_confirm(dst);
1575
1576 if (!rt->peer)
1577 rt_bind_peer(rt, 1);
1578 peer = rt->peer;
1579 if (peer) {
1580 if (mtu < ip_rt_min_pmtu)
1702 mtu = ip_rt_min_pmtu; 1581 mtu = ip_rt_min_pmtu;
1703 dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); 1582 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1583 unsigned long pmtu_expires;
1584
1585 pmtu_expires = jiffies + ip_rt_mtu_expires;
1586 if (!pmtu_expires)
1587 pmtu_expires = 1UL;
1588
1589 peer->pmtu_learned = mtu;
1590 peer->pmtu_expires = pmtu_expires;
1591
1592 atomic_inc(&__rt_peer_genid);
1593 rt->rt_peer_genid = rt_peer_genid();
1704 } 1594 }
1705 dst_metric_set(dst, RTAX_MTU, mtu); 1595 check_peer_pmtu(dst, peer);
1706 dst_set_expires(dst, ip_rt_mtu_expires); 1596 }
1707 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 1597}
1598
1599static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1600{
1601 struct rtable *rt = (struct rtable *) dst;
1602 __be32 orig_gw = rt->rt_gateway;
1603
1604 dst_confirm(&rt->dst);
1605
1606 neigh_release(rt->dst.neighbour);
1607 rt->dst.neighbour = NULL;
1608
1609 rt->rt_gateway = peer->redirect_learned.a4;
1610 if (arp_bind_neighbour(&rt->dst) ||
1611 !(rt->dst.neighbour->nud_state & NUD_VALID)) {
1612 if (rt->dst.neighbour)
1613 neigh_event_send(rt->dst.neighbour, NULL);
1614 rt->rt_gateway = orig_gw;
1615 return -EAGAIN;
1616 } else {
1617 rt->rt_flags |= RTCF_REDIRECTED;
1618 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1619 rt->dst.neighbour);
1708 } 1620 }
1621 return 0;
1709} 1622}
1710 1623
1711static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1624static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1712{ 1625{
1713 if (rt_is_expired((struct rtable *)dst)) 1626 struct rtable *rt = (struct rtable *) dst;
1627
1628 if (rt_is_expired(rt))
1714 return NULL; 1629 return NULL;
1630 if (rt->rt_peer_genid != rt_peer_genid()) {
1631 struct inet_peer *peer;
1632
1633 if (!rt->peer)
1634 rt_bind_peer(rt, 0);
1635
1636 peer = rt->peer;
1637 if (peer && peer->pmtu_expires)
1638 check_peer_pmtu(dst, peer);
1639
1640 if (peer && peer->redirect_learned.a4 &&
1641 peer->redirect_learned.a4 != rt->rt_gateway) {
1642 if (check_peer_redir(dst, peer))
1643 return NULL;
1644 }
1645
1646 rt->rt_peer_genid = rt_peer_genid();
1647 }
1715 return dst; 1648 return dst;
1716} 1649}
1717 1650
@@ -1720,6 +1653,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1720 struct rtable *rt = (struct rtable *) dst; 1653 struct rtable *rt = (struct rtable *) dst;
1721 struct inet_peer *peer = rt->peer; 1654 struct inet_peer *peer = rt->peer;
1722 1655
1656 if (rt->fi) {
1657 fib_info_put(rt->fi);
1658 rt->fi = NULL;
1659 }
1723 if (peer) { 1660 if (peer) {
1724 rt->peer = NULL; 1661 rt->peer = NULL;
1725 inet_putpeer(peer); 1662 inet_putpeer(peer);
@@ -1734,8 +1671,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1734 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1671 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1735 1672
1736 rt = skb_rtable(skb); 1673 rt = skb_rtable(skb);
1737 if (rt) 1674 if (rt &&
1738 dst_set_expires(&rt->dst, 0); 1675 rt->peer &&
1676 rt->peer->pmtu_expires) {
1677 unsigned long orig = rt->peer->pmtu_expires;
1678
1679 if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
1680 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
1681 }
1739} 1682}
1740 1683
1741static int ip_rt_bug(struct sk_buff *skb) 1684static int ip_rt_bug(struct sk_buff *skb)
@@ -1764,9 +1707,18 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1764 if (rt_is_output_route(rt)) 1707 if (rt_is_output_route(rt))
1765 src = rt->rt_src; 1708 src = rt->rt_src;
1766 else { 1709 else {
1710 struct flowi4 fl4 = {
1711 .daddr = rt->rt_key_dst,
1712 .saddr = rt->rt_key_src,
1713 .flowi4_tos = rt->rt_tos,
1714 .flowi4_oif = rt->rt_oif,
1715 .flowi4_iif = rt->rt_iif,
1716 .flowi4_mark = rt->rt_mark,
1717 };
1718
1767 rcu_read_lock(); 1719 rcu_read_lock();
1768 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) 1720 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
1769 src = FIB_RES_PREFSRC(res); 1721 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1770 else 1722 else
1771 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1723 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1772 RT_SCOPE_UNIVERSE); 1724 RT_SCOPE_UNIVERSE);
@@ -1775,7 +1727,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1775 memcpy(addr, &src, 4); 1727 memcpy(addr, &src, 4);
1776} 1728}
1777 1729
1778#ifdef CONFIG_NET_CLS_ROUTE 1730#ifdef CONFIG_IP_ROUTE_CLASSID
1779static void set_class_tag(struct rtable *rt, u32 tag) 1731static void set_class_tag(struct rtable *rt, u32 tag)
1780{ 1732{
1781 if (!(rt->dst.tclassid & 0xFFFF)) 1733 if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,17 +1767,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
1815 return mtu; 1767 return mtu;
1816} 1768}
1817 1769
1818static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1770static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4,
1771 struct fib_info *fi)
1772{
1773 struct inet_peer *peer;
1774 int create = 0;
1775
1776 /* If a peer entry exists for this destination, we must hook
1777 * it up in order to get at cached metrics.
1778 */
1779 if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1780 create = 1;
1781
1782 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
1783 if (peer) {
1784 rt->rt_peer_genid = rt_peer_genid();
1785 if (inet_metrics_new(peer))
1786 memcpy(peer->metrics, fi->fib_metrics,
1787 sizeof(u32) * RTAX_MAX);
1788 dst_init_metrics(&rt->dst, peer->metrics, false);
1789
1790 if (peer->pmtu_expires)
1791 check_peer_pmtu(&rt->dst, peer);
1792 if (peer->redirect_learned.a4 &&
1793 peer->redirect_learned.a4 != rt->rt_gateway) {
1794 rt->rt_gateway = peer->redirect_learned.a4;
1795 rt->rt_flags |= RTCF_REDIRECTED;
1796 }
1797 } else {
1798 if (fi->fib_metrics != (u32 *) dst_default_metrics) {
1799 rt->fi = fi;
1800 atomic_inc(&fi->fib_clntref);
1801 }
1802 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1803 }
1804}
1805
1806static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4,
1807 const struct fib_result *res,
1808 struct fib_info *fi, u16 type, u32 itag)
1819{ 1809{
1820 struct dst_entry *dst = &rt->dst; 1810 struct dst_entry *dst = &rt->dst;
1821 struct fib_info *fi = res->fi;
1822 1811
1823 if (fi) { 1812 if (fi) {
1824 if (FIB_RES_GW(*res) && 1813 if (FIB_RES_GW(*res) &&
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1814 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1815 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1816 rt_init_metrics(rt, oldflp4, fi);
1828#ifdef CONFIG_NET_CLS_ROUTE 1817#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1818 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1819#endif
1831 } 1820 }
@@ -1835,13 +1824,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1824 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1825 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837 1826
1838#ifdef CONFIG_NET_CLS_ROUTE 1827#ifdef CONFIG_IP_ROUTE_CLASSID
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1828#ifdef CONFIG_IP_MULTIPLE_TABLES
1840 set_class_tag(rt, fib_rules_tclass(res)); 1829 set_class_tag(rt, fib_rules_tclass(res));
1841#endif 1830#endif
1842 set_class_tag(rt, itag); 1831 set_class_tag(rt, itag);
1843#endif 1832#endif
1844 rt->rt_type = res->type; 1833 rt->rt_type = type;
1834}
1835
1836static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm)
1837{
1838 struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1);
1839 if (rt) {
1840 rt->dst.obsolete = -1;
1841
1842 rt->dst.flags = DST_HOST |
1843 (nopolicy ? DST_NOPOLICY : 0) |
1844 (noxfrm ? DST_NOXFRM : 0);
1845 }
1846 return rt;
1845} 1847}
1846 1848
1847/* called in rcu_read_lock() section */ 1849/* called in rcu_read_lock() section */
@@ -1874,31 +1876,26 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1874 if (err < 0) 1876 if (err < 0)
1875 goto e_err; 1877 goto e_err;
1876 } 1878 }
1877 rth = dst_alloc(&ipv4_dst_ops); 1879 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
1878 if (!rth) 1880 if (!rth)
1879 goto e_nobufs; 1881 goto e_nobufs;
1880 1882
1881 rth->dst.output = ip_rt_bug; 1883 rth->dst.output = ip_rt_bug;
1882 rth->dst.obsolete = -1;
1883 1884
1884 atomic_set(&rth->dst.__refcnt, 1); 1885 rth->rt_key_dst = daddr;
1885 rth->dst.flags= DST_HOST;
1886 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1887 rth->dst.flags |= DST_NOPOLICY;
1888 rth->fl.fl4_dst = daddr;
1889 rth->rt_dst = daddr; 1886 rth->rt_dst = daddr;
1890 rth->fl.fl4_tos = tos; 1887 rth->rt_tos = tos;
1891 rth->fl.mark = skb->mark; 1888 rth->rt_mark = skb->mark;
1892 rth->fl.fl4_src = saddr; 1889 rth->rt_key_src = saddr;
1893 rth->rt_src = saddr; 1890 rth->rt_src = saddr;
1894#ifdef CONFIG_NET_CLS_ROUTE 1891#ifdef CONFIG_IP_ROUTE_CLASSID
1895 rth->dst.tclassid = itag; 1892 rth->dst.tclassid = itag;
1896#endif 1893#endif
1897 rth->rt_iif = 1894 rth->rt_route_iif = dev->ifindex;
1898 rth->fl.iif = dev->ifindex; 1895 rth->rt_iif = dev->ifindex;
1899 rth->dst.dev = init_net.loopback_dev; 1896 rth->dst.dev = init_net.loopback_dev;
1900 dev_hold(rth->dst.dev); 1897 dev_hold(rth->dst.dev);
1901 rth->fl.oif = 0; 1898 rth->rt_oif = 0;
1902 rth->rt_gateway = daddr; 1899 rth->rt_gateway = daddr;
1903 rth->rt_spec_dst= spec_dst; 1900 rth->rt_spec_dst= spec_dst;
1904 rth->rt_genid = rt_genid(dev_net(dev)); 1901 rth->rt_genid = rt_genid(dev_net(dev));
@@ -1916,7 +1913,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1916 RT_CACHE_STAT_INC(in_slow_mc); 1913 RT_CACHE_STAT_INC(in_slow_mc);
1917 1914
1918 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1915 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1919 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1916 rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
1917 err = 0;
1918 if (IS_ERR(rth))
1919 err = PTR_ERR(rth);
1920 1920
1921e_nobufs: 1921e_nobufs:
1922 return -ENOBUFS; 1922 return -ENOBUFS;
@@ -1959,7 +1959,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1959 1959
1960/* called in rcu_read_lock() section */ 1960/* called in rcu_read_lock() section */
1961static int __mkroute_input(struct sk_buff *skb, 1961static int __mkroute_input(struct sk_buff *skb,
1962 struct fib_result *res, 1962 const struct fib_result *res,
1963 struct in_device *in_dev, 1963 struct in_device *in_dev,
1964 __be32 daddr, __be32 saddr, u32 tos, 1964 __be32 daddr, __be32 saddr, u32 tos,
1965 struct rtable **result) 1965 struct rtable **result)
@@ -2013,39 +2013,32 @@ static int __mkroute_input(struct sk_buff *skb,
2013 } 2013 }
2014 } 2014 }
2015 2015
2016 2016 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2017 rth = dst_alloc(&ipv4_dst_ops); 2017 IN_DEV_CONF_GET(out_dev, NOXFRM));
2018 if (!rth) { 2018 if (!rth) {
2019 err = -ENOBUFS; 2019 err = -ENOBUFS;
2020 goto cleanup; 2020 goto cleanup;
2021 } 2021 }
2022 2022
2023 atomic_set(&rth->dst.__refcnt, 1); 2023 rth->rt_key_dst = daddr;
2024 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2024 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2025 rth->rt_tos = tos;
2032 rth->fl.mark = skb->mark; 2026 rth->rt_mark = skb->mark;
2033 rth->fl.fl4_src = saddr; 2027 rth->rt_key_src = saddr;
2034 rth->rt_src = saddr; 2028 rth->rt_src = saddr;
2035 rth->rt_gateway = daddr; 2029 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2030 rth->rt_route_iif = in_dev->dev->ifindex;
2037 rth->fl.iif = in_dev->dev->ifindex; 2031 rth->rt_iif = in_dev->dev->ifindex;
2038 rth->dst.dev = (out_dev)->dev; 2032 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->dst.dev); 2033 dev_hold(rth->dst.dev);
2040 rth->fl.oif = 0; 2034 rth->rt_oif = 0;
2041 rth->rt_spec_dst= spec_dst; 2035 rth->rt_spec_dst= spec_dst;
2042 2036
2043 rth->dst.obsolete = -1;
2044 rth->dst.input = ip_forward; 2037 rth->dst.input = ip_forward;
2045 rth->dst.output = ip_output; 2038 rth->dst.output = ip_output;
2046 rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); 2039 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2047 2040
2048 rt_set_nexthop(rth, res, itag); 2041 rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
2049 2042
2050 rth->rt_flags = flags; 2043 rth->rt_flags = flags;
2051 2044
@@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb,
2057 2050
2058static int ip_mkroute_input(struct sk_buff *skb, 2051static int ip_mkroute_input(struct sk_buff *skb,
2059 struct fib_result *res, 2052 struct fib_result *res,
2060 const struct flowi *fl, 2053 const struct flowi4 *fl4,
2061 struct in_device *in_dev, 2054 struct in_device *in_dev,
2062 __be32 daddr, __be32 saddr, u32 tos) 2055 __be32 daddr, __be32 saddr, u32 tos)
2063{ 2056{
@@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
2066 unsigned hash; 2059 unsigned hash;
2067 2060
2068#ifdef CONFIG_IP_ROUTE_MULTIPATH 2061#ifdef CONFIG_IP_ROUTE_MULTIPATH
2069 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 2062 if (res->fi && res->fi->fib_nhs > 1)
2070 fib_select_multipath(fl, res); 2063 fib_select_multipath(res);
2071#endif 2064#endif
2072 2065
2073 /* create a routing cache entry */ 2066 /* create a routing cache entry */
@@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb,
2076 return err; 2069 return err;
2077 2070
2078 /* put it into the cache */ 2071 /* put it into the cache */
2079 hash = rt_hash(daddr, saddr, fl->iif, 2072 hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
2080 rt_genid(dev_net(rth->dst.dev))); 2073 rt_genid(dev_net(rth->dst.dev)));
2081 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2074 rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
2075 if (IS_ERR(rth))
2076 return PTR_ERR(rth);
2077 return 0;
2082} 2078}
2083 2079
2084/* 2080/*
@@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2097{ 2093{
2098 struct fib_result res; 2094 struct fib_result res;
2099 struct in_device *in_dev = __in_dev_get_rcu(dev); 2095 struct in_device *in_dev = __in_dev_get_rcu(dev);
2100 struct flowi fl = { .fl4_dst = daddr, 2096 struct flowi4 fl4;
2101 .fl4_src = saddr,
2102 .fl4_tos = tos,
2103 .fl4_scope = RT_SCOPE_UNIVERSE,
2104 .mark = skb->mark,
2105 .iif = dev->ifindex };
2106 unsigned flags = 0; 2097 unsigned flags = 0;
2107 u32 itag = 0; 2098 u32 itag = 0;
2108 struct rtable * rth; 2099 struct rtable * rth;
@@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2139 /* 2130 /*
2140 * Now we are ready to route packet. 2131 * Now we are ready to route packet.
2141 */ 2132 */
2142 err = fib_lookup(net, &fl, &res); 2133 fl4.flowi4_oif = 0;
2134 fl4.flowi4_iif = dev->ifindex;
2135 fl4.flowi4_mark = skb->mark;
2136 fl4.flowi4_tos = tos;
2137 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2138 fl4.daddr = daddr;
2139 fl4.saddr = saddr;
2140 err = fib_lookup(net, &fl4, &res);
2143 if (err != 0) { 2141 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2142 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2143 goto e_hostunreach;
@@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2168 if (res.type != RTN_UNICAST) 2166 if (res.type != RTN_UNICAST)
2169 goto martian_destination; 2167 goto martian_destination;
2170 2168
2171 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2169 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
2172out: return err; 2170out: return err;
2173 2171
2174brd_input: 2172brd_input:
@@ -2190,29 +2188,24 @@ brd_input:
2190 RT_CACHE_STAT_INC(in_brd); 2188 RT_CACHE_STAT_INC(in_brd);
2191 2189
2192local_input: 2190local_input:
2193 rth = dst_alloc(&ipv4_dst_ops); 2191 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
2194 if (!rth) 2192 if (!rth)
2195 goto e_nobufs; 2193 goto e_nobufs;
2196 2194
2197 rth->dst.output= ip_rt_bug; 2195 rth->dst.output= ip_rt_bug;
2198 rth->dst.obsolete = -1;
2199 rth->rt_genid = rt_genid(net); 2196 rth->rt_genid = rt_genid(net);
2200 2197
2201 atomic_set(&rth->dst.__refcnt, 1); 2198 rth->rt_key_dst = daddr;
2202 rth->dst.flags= DST_HOST;
2203 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2204 rth->dst.flags |= DST_NOPOLICY;
2205 rth->fl.fl4_dst = daddr;
2206 rth->rt_dst = daddr; 2199 rth->rt_dst = daddr;
2207 rth->fl.fl4_tos = tos; 2200 rth->rt_tos = tos;
2208 rth->fl.mark = skb->mark; 2201 rth->rt_mark = skb->mark;
2209 rth->fl.fl4_src = saddr; 2202 rth->rt_key_src = saddr;
2210 rth->rt_src = saddr; 2203 rth->rt_src = saddr;
2211#ifdef CONFIG_NET_CLS_ROUTE 2204#ifdef CONFIG_IP_ROUTE_CLASSID
2212 rth->dst.tclassid = itag; 2205 rth->dst.tclassid = itag;
2213#endif 2206#endif
2214 rth->rt_iif = 2207 rth->rt_route_iif = dev->ifindex;
2215 rth->fl.iif = dev->ifindex; 2208 rth->rt_iif = dev->ifindex;
2216 rth->dst.dev = net->loopback_dev; 2209 rth->dst.dev = net->loopback_dev;
2217 dev_hold(rth->dst.dev); 2210 dev_hold(rth->dst.dev);
2218 rth->rt_gateway = daddr; 2211 rth->rt_gateway = daddr;
@@ -2225,8 +2218,11 @@ local_input:
2225 rth->rt_flags &= ~RTCF_LOCAL; 2218 rth->rt_flags &= ~RTCF_LOCAL;
2226 } 2219 }
2227 rth->rt_type = res.type; 2220 rth->rt_type = res.type;
2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2221 hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2222 rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
2223 err = 0;
2224 if (IS_ERR(rth))
2225 err = PTR_ERR(rth);
2230 goto out; 2226 goto out;
2231 2227
2232no_route: 2228no_route:
@@ -2288,12 +2284,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2288 2284
2289 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2285 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2290 rth = rcu_dereference(rth->dst.rt_next)) { 2286 rth = rcu_dereference(rth->dst.rt_next)) {
2291 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2287 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2292 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2288 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2293 (rth->fl.iif ^ iif) | 2289 (rth->rt_iif ^ iif) |
2294 rth->fl.oif | 2290 rth->rt_oif |
2295 (rth->fl.fl4_tos ^ tos)) == 0 && 2291 (rth->rt_tos ^ tos)) == 0 &&
2296 rth->fl.mark == skb->mark && 2292 rth->rt_mark == skb->mark &&
2297 net_eq(dev_net(rth->dst.dev), net) && 2293 net_eq(dev_net(rth->dst.dev), net) &&
2298 !rt_is_expired(rth)) { 2294 !rt_is_expired(rth)) {
2299 if (noref) { 2295 if (noref) {
@@ -2326,8 +2322,8 @@ skip_cache:
2326 struct in_device *in_dev = __in_dev_get_rcu(dev); 2322 struct in_device *in_dev = __in_dev_get_rcu(dev);
2327 2323
2328 if (in_dev) { 2324 if (in_dev) {
2329 int our = ip_check_mc(in_dev, daddr, saddr, 2325 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
2330 ip_hdr(skb)->protocol); 2326 ip_hdr(skb)->protocol);
2331 if (our 2327 if (our
2332#ifdef CONFIG_IP_MROUTE 2328#ifdef CONFIG_IP_MROUTE
2333 || 2329 ||
@@ -2351,98 +2347,92 @@ skip_cache:
2351EXPORT_SYMBOL(ip_route_input_common); 2347EXPORT_SYMBOL(ip_route_input_common);
2352 2348
2353/* called with rcu_read_lock() */ 2349/* called with rcu_read_lock() */
2354static int __mkroute_output(struct rtable **result, 2350static struct rtable *__mkroute_output(const struct fib_result *res,
2355 struct fib_result *res, 2351 const struct flowi4 *fl4,
2356 const struct flowi *fl, 2352 const struct flowi4 *oldflp4,
2357 const struct flowi *oldflp, 2353 struct net_device *dev_out,
2358 struct net_device *dev_out, 2354 unsigned int flags)
2359 unsigned flags)
2360{ 2355{
2361 struct rtable *rth; 2356 struct fib_info *fi = res->fi;
2357 u32 tos = RT_FL_TOS(oldflp4);
2362 struct in_device *in_dev; 2358 struct in_device *in_dev;
2363 u32 tos = RT_FL_TOS(oldflp); 2359 u16 type = res->type;
2360 struct rtable *rth;
2364 2361
2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) 2362 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2366 return -EINVAL; 2363 return ERR_PTR(-EINVAL);
2367 2364
2368 if (ipv4_is_lbcast(fl->fl4_dst)) 2365 if (ipv4_is_lbcast(fl4->daddr))
2369 res->type = RTN_BROADCAST; 2366 type = RTN_BROADCAST;
2370 else if (ipv4_is_multicast(fl->fl4_dst)) 2367 else if (ipv4_is_multicast(fl4->daddr))
2371 res->type = RTN_MULTICAST; 2368 type = RTN_MULTICAST;
2372 else if (ipv4_is_zeronet(fl->fl4_dst)) 2369 else if (ipv4_is_zeronet(fl4->daddr))
2373 return -EINVAL; 2370 return ERR_PTR(-EINVAL);
2374 2371
2375 if (dev_out->flags & IFF_LOOPBACK) 2372 if (dev_out->flags & IFF_LOOPBACK)
2376 flags |= RTCF_LOCAL; 2373 flags |= RTCF_LOCAL;
2377 2374
2378 in_dev = __in_dev_get_rcu(dev_out); 2375 in_dev = __in_dev_get_rcu(dev_out);
2379 if (!in_dev) 2376 if (!in_dev)
2380 return -EINVAL; 2377 return ERR_PTR(-EINVAL);
2381 2378
2382 if (res->type == RTN_BROADCAST) { 2379 if (type == RTN_BROADCAST) {
2383 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2380 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2384 res->fi = NULL; 2381 fi = NULL;
2385 } else if (res->type == RTN_MULTICAST) { 2382 } else if (type == RTN_MULTICAST) {
2386 flags |= RTCF_MULTICAST | RTCF_LOCAL; 2383 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2384 if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr,
2388 oldflp->proto)) 2385 oldflp4->flowi4_proto))
2389 flags &= ~RTCF_LOCAL; 2386 flags &= ~RTCF_LOCAL;
2390 /* If multicast route do not exist use 2387 /* If multicast route do not exist use
2391 * default one, but do not gateway in this case. 2388 * default one, but do not gateway in this case.
2392 * Yes, it is hack. 2389 * Yes, it is hack.
2393 */ 2390 */
2394 if (res->fi && res->prefixlen < 4) 2391 if (fi && res->prefixlen < 4)
2395 res->fi = NULL; 2392 fi = NULL;
2396 } 2393 }
2397 2394
2398 2395 rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
2399 rth = dst_alloc(&ipv4_dst_ops); 2396 IN_DEV_CONF_GET(in_dev, NOXFRM));
2400 if (!rth) 2397 if (!rth)
2401 return -ENOBUFS; 2398 return ERR_PTR(-ENOBUFS);
2402 2399
2403 atomic_set(&rth->dst.__refcnt, 1); 2400 rth->rt_key_dst = oldflp4->daddr;
2404 rth->dst.flags= DST_HOST; 2401 rth->rt_tos = tos;
2405 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2402 rth->rt_key_src = oldflp4->saddr;
2406 rth->dst.flags |= DST_NOXFRM; 2403 rth->rt_oif = oldflp4->flowi4_oif;
2407 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2404 rth->rt_mark = oldflp4->flowi4_mark;
2408 rth->dst.flags |= DST_NOPOLICY; 2405 rth->rt_dst = fl4->daddr;
2409 2406 rth->rt_src = fl4->saddr;
2410 rth->fl.fl4_dst = oldflp->fl4_dst; 2407 rth->rt_route_iif = 0;
2411 rth->fl.fl4_tos = tos; 2408 rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex;
2412 rth->fl.fl4_src = oldflp->fl4_src;
2413 rth->fl.oif = oldflp->oif;
2414 rth->fl.mark = oldflp->mark;
2415 rth->rt_dst = fl->fl4_dst;
2416 rth->rt_src = fl->fl4_src;
2417 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2418 /* get references to the devices that are to be hold by the routing 2409 /* get references to the devices that are to be hold by the routing
2419 cache entry */ 2410 cache entry */
2420 rth->dst.dev = dev_out; 2411 rth->dst.dev = dev_out;
2421 dev_hold(dev_out); 2412 dev_hold(dev_out);
2422 rth->rt_gateway = fl->fl4_dst; 2413 rth->rt_gateway = fl4->daddr;
2423 rth->rt_spec_dst= fl->fl4_src; 2414 rth->rt_spec_dst= fl4->saddr;
2424 2415
2425 rth->dst.output=ip_output; 2416 rth->dst.output=ip_output;
2426 rth->dst.obsolete = -1;
2427 rth->rt_genid = rt_genid(dev_net(dev_out)); 2417 rth->rt_genid = rt_genid(dev_net(dev_out));
2428 2418
2429 RT_CACHE_STAT_INC(out_slow_tot); 2419 RT_CACHE_STAT_INC(out_slow_tot);
2430 2420
2431 if (flags & RTCF_LOCAL) { 2421 if (flags & RTCF_LOCAL) {
2432 rth->dst.input = ip_local_deliver; 2422 rth->dst.input = ip_local_deliver;
2433 rth->rt_spec_dst = fl->fl4_dst; 2423 rth->rt_spec_dst = fl4->daddr;
2434 } 2424 }
2435 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2425 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2436 rth->rt_spec_dst = fl->fl4_src; 2426 rth->rt_spec_dst = fl4->saddr;
2437 if (flags & RTCF_LOCAL && 2427 if (flags & RTCF_LOCAL &&
2438 !(dev_out->flags & IFF_LOOPBACK)) { 2428 !(dev_out->flags & IFF_LOOPBACK)) {
2439 rth->dst.output = ip_mc_output; 2429 rth->dst.output = ip_mc_output;
2440 RT_CACHE_STAT_INC(out_slow_mc); 2430 RT_CACHE_STAT_INC(out_slow_mc);
2441 } 2431 }
2442#ifdef CONFIG_IP_MROUTE 2432#ifdef CONFIG_IP_MROUTE
2443 if (res->type == RTN_MULTICAST) { 2433 if (type == RTN_MULTICAST) {
2444 if (IN_DEV_MFORWARD(in_dev) && 2434 if (IN_DEV_MFORWARD(in_dev) &&
2445 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2435 !ipv4_is_local_multicast(oldflp4->daddr)) {
2446 rth->dst.input = ip_mr_input; 2436 rth->dst.input = ip_mr_input;
2447 rth->dst.output = ip_mc_output; 2437 rth->dst.output = ip_mc_output;
2448 } 2438 }
@@ -2450,31 +2440,10 @@ static int __mkroute_output(struct rtable **result,
2450#endif 2440#endif
2451 } 2441 }
2452 2442
2453 rt_set_nexthop(rth, res, 0); 2443 rt_set_nexthop(rth, oldflp4, res, fi, type, 0);
2454 2444
2455 rth->rt_flags = flags; 2445 rth->rt_flags = flags;
2456 *result = rth; 2446 return rth;
2457 return 0;
2458}
2459
2460/* called with rcu_read_lock() */
2461static int ip_mkroute_output(struct rtable **rp,
2462 struct fib_result *res,
2463 const struct flowi *fl,
2464 const struct flowi *oldflp,
2465 struct net_device *dev_out,
2466 unsigned flags)
2467{
2468 struct rtable *rth = NULL;
2469 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2470 unsigned hash;
2471 if (err == 0) {
2472 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2473 rt_genid(dev_net(dev_out)));
2474 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2475 }
2476
2477 return err;
2478} 2447}
2479 2448
2480/* 2449/*
@@ -2482,34 +2451,36 @@ static int ip_mkroute_output(struct rtable **rp,
2482 * called with rcu_read_lock(); 2451 * called with rcu_read_lock();
2483 */ 2452 */
2484 2453
2485static int ip_route_output_slow(struct net *net, struct rtable **rp, 2454static struct rtable *ip_route_output_slow(struct net *net,
2486 const struct flowi *oldflp) 2455 const struct flowi4 *oldflp4)
2487{ 2456{
2488 u32 tos = RT_FL_TOS(oldflp); 2457 u32 tos = RT_FL_TOS(oldflp4);
2489 struct flowi fl = { .fl4_dst = oldflp->fl4_dst, 2458 struct flowi4 fl4;
2490 .fl4_src = oldflp->fl4_src,
2491 .fl4_tos = tos & IPTOS_RT_MASK,
2492 .fl4_scope = ((tos & RTO_ONLINK) ?
2493 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2494 .mark = oldflp->mark,
2495 .iif = net->loopback_dev->ifindex,
2496 .oif = oldflp->oif };
2497 struct fib_result res; 2459 struct fib_result res;
2498 unsigned int flags = 0; 2460 unsigned int flags = 0;
2499 struct net_device *dev_out = NULL; 2461 struct net_device *dev_out = NULL;
2500 int err; 2462 struct rtable *rth;
2501
2502 2463
2503 res.fi = NULL; 2464 res.fi = NULL;
2504#ifdef CONFIG_IP_MULTIPLE_TABLES 2465#ifdef CONFIG_IP_MULTIPLE_TABLES
2505 res.r = NULL; 2466 res.r = NULL;
2506#endif 2467#endif
2507 2468
2508 if (oldflp->fl4_src) { 2469 fl4.flowi4_oif = oldflp4->flowi4_oif;
2509 err = -EINVAL; 2470 fl4.flowi4_iif = net->loopback_dev->ifindex;
2510 if (ipv4_is_multicast(oldflp->fl4_src) || 2471 fl4.flowi4_mark = oldflp4->flowi4_mark;
2511 ipv4_is_lbcast(oldflp->fl4_src) || 2472 fl4.daddr = oldflp4->daddr;
2512 ipv4_is_zeronet(oldflp->fl4_src)) 2473 fl4.saddr = oldflp4->saddr;
2474 fl4.flowi4_tos = tos & IPTOS_RT_MASK;
2475 fl4.flowi4_scope = ((tos & RTO_ONLINK) ?
2476 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
2477
2478 rcu_read_lock();
2479 if (oldflp4->saddr) {
2480 rth = ERR_PTR(-EINVAL);
2481 if (ipv4_is_multicast(oldflp4->saddr) ||
2482 ipv4_is_lbcast(oldflp4->saddr) ||
2483 ipv4_is_zeronet(oldflp4->saddr))
2513 goto out; 2484 goto out;
2514 2485
2515 /* I removed check for oif == dev_out->oif here. 2486 /* I removed check for oif == dev_out->oif here.
@@ -2520,11 +2491,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2520 of another iface. --ANK 2491 of another iface. --ANK
2521 */ 2492 */
2522 2493
2523 if (oldflp->oif == 0 && 2494 if (oldflp4->flowi4_oif == 0 &&
2524 (ipv4_is_multicast(oldflp->fl4_dst) || 2495 (ipv4_is_multicast(oldflp4->daddr) ||
2525 ipv4_is_lbcast(oldflp->fl4_dst))) { 2496 ipv4_is_lbcast(oldflp4->daddr))) {
2526 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2497 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2527 dev_out = __ip_dev_find(net, oldflp->fl4_src, false); 2498 dev_out = __ip_dev_find(net, oldflp4->saddr, false);
2528 if (dev_out == NULL) 2499 if (dev_out == NULL)
2529 goto out; 2500 goto out;
2530 2501
@@ -2543,60 +2514,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 Luckily, this hack is good workaround. 2514 Luckily, this hack is good workaround.
2544 */ 2515 */
2545 2516
2546 fl.oif = dev_out->ifindex; 2517 fl4.flowi4_oif = dev_out->ifindex;
2547 goto make_route; 2518 goto make_route;
2548 } 2519 }
2549 2520
2550 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2521 if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2551 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2522 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2552 if (!__ip_dev_find(net, oldflp->fl4_src, false)) 2523 if (!__ip_dev_find(net, oldflp4->saddr, false))
2553 goto out; 2524 goto out;
2554 } 2525 }
2555 } 2526 }
2556 2527
2557 2528
2558 if (oldflp->oif) { 2529 if (oldflp4->flowi4_oif) {
2559 dev_out = dev_get_by_index_rcu(net, oldflp->oif); 2530 dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif);
2560 err = -ENODEV; 2531 rth = ERR_PTR(-ENODEV);
2561 if (dev_out == NULL) 2532 if (dev_out == NULL)
2562 goto out; 2533 goto out;
2563 2534
2564 /* RACE: Check return value of inet_select_addr instead. */ 2535 /* RACE: Check return value of inet_select_addr instead. */
2565 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2536 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2566 err = -ENETUNREACH; 2537 rth = ERR_PTR(-ENETUNREACH);
2567 goto out; 2538 goto out;
2568 } 2539 }
2569 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2540 if (ipv4_is_local_multicast(oldflp4->daddr) ||
2570 ipv4_is_lbcast(oldflp->fl4_dst)) { 2541 ipv4_is_lbcast(oldflp4->daddr)) {
2571 if (!fl.fl4_src) 2542 if (!fl4.saddr)
2572 fl.fl4_src = inet_select_addr(dev_out, 0, 2543 fl4.saddr = inet_select_addr(dev_out, 0,
2573 RT_SCOPE_LINK); 2544 RT_SCOPE_LINK);
2574 goto make_route; 2545 goto make_route;
2575 } 2546 }
2576 if (!fl.fl4_src) { 2547 if (!fl4.saddr) {
2577 if (ipv4_is_multicast(oldflp->fl4_dst)) 2548 if (ipv4_is_multicast(oldflp4->daddr))
2578 fl.fl4_src = inet_select_addr(dev_out, 0, 2549 fl4.saddr = inet_select_addr(dev_out, 0,
2579 fl.fl4_scope); 2550 fl4.flowi4_scope);
2580 else if (!oldflp->fl4_dst) 2551 else if (!oldflp4->daddr)
2581 fl.fl4_src = inet_select_addr(dev_out, 0, 2552 fl4.saddr = inet_select_addr(dev_out, 0,
2582 RT_SCOPE_HOST); 2553 RT_SCOPE_HOST);
2583 } 2554 }
2584 } 2555 }
2585 2556
2586 if (!fl.fl4_dst) { 2557 if (!fl4.daddr) {
2587 fl.fl4_dst = fl.fl4_src; 2558 fl4.daddr = fl4.saddr;
2588 if (!fl.fl4_dst) 2559 if (!fl4.daddr)
2589 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2560 fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK);
2590 dev_out = net->loopback_dev; 2561 dev_out = net->loopback_dev;
2591 fl.oif = net->loopback_dev->ifindex; 2562 fl4.flowi4_oif = net->loopback_dev->ifindex;
2592 res.type = RTN_LOCAL; 2563 res.type = RTN_LOCAL;
2593 flags |= RTCF_LOCAL; 2564 flags |= RTCF_LOCAL;
2594 goto make_route; 2565 goto make_route;
2595 } 2566 }
2596 2567
2597 if (fib_lookup(net, &fl, &res)) { 2568 if (fib_lookup(net, &fl4, &res)) {
2598 res.fi = NULL; 2569 res.fi = NULL;
2599 if (oldflp->oif) { 2570 if (oldflp4->flowi4_oif) {
2600 /* Apparently, routing tables are wrong. Assume, 2571 /* Apparently, routing tables are wrong. Assume,
2601 that the destination is on link. 2572 that the destination is on link.
2602 2573
@@ -2615,90 +2586,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2615 likely IPv6, but we do not. 2586 likely IPv6, but we do not.
2616 */ 2587 */
2617 2588
2618 if (fl.fl4_src == 0) 2589 if (fl4.saddr == 0)
2619 fl.fl4_src = inet_select_addr(dev_out, 0, 2590 fl4.saddr = inet_select_addr(dev_out, 0,
2620 RT_SCOPE_LINK); 2591 RT_SCOPE_LINK);
2621 res.type = RTN_UNICAST; 2592 res.type = RTN_UNICAST;
2622 goto make_route; 2593 goto make_route;
2623 } 2594 }
2624 err = -ENETUNREACH; 2595 rth = ERR_PTR(-ENETUNREACH);
2625 goto out; 2596 goto out;
2626 } 2597 }
2627 2598
2628 if (res.type == RTN_LOCAL) { 2599 if (res.type == RTN_LOCAL) {
2629 if (!fl.fl4_src) { 2600 if (!fl4.saddr) {
2630 if (res.fi->fib_prefsrc) 2601 if (res.fi->fib_prefsrc)
2631 fl.fl4_src = res.fi->fib_prefsrc; 2602 fl4.saddr = res.fi->fib_prefsrc;
2632 else 2603 else
2633 fl.fl4_src = fl.fl4_dst; 2604 fl4.saddr = fl4.daddr;
2634 } 2605 }
2635 dev_out = net->loopback_dev; 2606 dev_out = net->loopback_dev;
2636 fl.oif = dev_out->ifindex; 2607 fl4.flowi4_oif = dev_out->ifindex;
2637 res.fi = NULL; 2608 res.fi = NULL;
2638 flags |= RTCF_LOCAL; 2609 flags |= RTCF_LOCAL;
2639 goto make_route; 2610 goto make_route;
2640 } 2611 }
2641 2612
2642#ifdef CONFIG_IP_ROUTE_MULTIPATH 2613#ifdef CONFIG_IP_ROUTE_MULTIPATH
2643 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2614 if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0)
2644 fib_select_multipath(&fl, &res); 2615 fib_select_multipath(&res);
2645 else 2616 else
2646#endif 2617#endif
2647 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2618 if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif)
2648 fib_select_default(net, &fl, &res); 2619 fib_select_default(&res);
2649 2620
2650 if (!fl.fl4_src) 2621 if (!fl4.saddr)
2651 fl.fl4_src = FIB_RES_PREFSRC(res); 2622 fl4.saddr = FIB_RES_PREFSRC(net, res);
2652 2623
2653 dev_out = FIB_RES_DEV(res); 2624 dev_out = FIB_RES_DEV(res);
2654 fl.oif = dev_out->ifindex; 2625 fl4.flowi4_oif = dev_out->ifindex;
2655 2626
2656 2627
2657make_route: 2628make_route:
2658 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2629 rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags);
2630 if (!IS_ERR(rth)) {
2631 unsigned int hash;
2659 2632
2660out: return err; 2633 hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif,
2634 rt_genid(dev_net(dev_out)));
2635 rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif);
2636 }
2637
2638out:
2639 rcu_read_unlock();
2640 return rth;
2661} 2641}
2662 2642
2663int __ip_route_output_key(struct net *net, struct rtable **rp, 2643struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4)
2664 const struct flowi *flp)
2665{ 2644{
2666 unsigned int hash;
2667 int res;
2668 struct rtable *rth; 2645 struct rtable *rth;
2646 unsigned int hash;
2669 2647
2670 if (!rt_caching(net)) 2648 if (!rt_caching(net))
2671 goto slow_output; 2649 goto slow_output;
2672 2650
2673 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2651 hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));
2674 2652
2675 rcu_read_lock_bh(); 2653 rcu_read_lock_bh();
2676 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2654 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2677 rth = rcu_dereference_bh(rth->dst.rt_next)) { 2655 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2678 if (rth->fl.fl4_dst == flp->fl4_dst && 2656 if (rth->rt_key_dst == flp4->daddr &&
2679 rth->fl.fl4_src == flp->fl4_src && 2657 rth->rt_key_src == flp4->saddr &&
2680 rt_is_output_route(rth) && 2658 rt_is_output_route(rth) &&
2681 rth->fl.oif == flp->oif && 2659 rth->rt_oif == flp4->flowi4_oif &&
2682 rth->fl.mark == flp->mark && 2660 rth->rt_mark == flp4->flowi4_mark &&
2683 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2661 !((rth->rt_tos ^ flp4->flowi4_tos) &
2684 (IPTOS_RT_MASK | RTO_ONLINK)) && 2662 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2685 net_eq(dev_net(rth->dst.dev), net) && 2663 net_eq(dev_net(rth->dst.dev), net) &&
2686 !rt_is_expired(rth)) { 2664 !rt_is_expired(rth)) {
2687 dst_use(&rth->dst, jiffies); 2665 dst_use(&rth->dst, jiffies);
2688 RT_CACHE_STAT_INC(out_hit); 2666 RT_CACHE_STAT_INC(out_hit);
2689 rcu_read_unlock_bh(); 2667 rcu_read_unlock_bh();
2690 *rp = rth; 2668 return rth;
2691 return 0;
2692 } 2669 }
2693 RT_CACHE_STAT_INC(out_hlist_search); 2670 RT_CACHE_STAT_INC(out_hlist_search);
2694 } 2671 }
2695 rcu_read_unlock_bh(); 2672 rcu_read_unlock_bh();
2696 2673
2697slow_output: 2674slow_output:
2698 rcu_read_lock(); 2675 return ip_route_output_slow(net, flp4);
2699 res = ip_route_output_slow(net, rp, flp);
2700 rcu_read_unlock();
2701 return res;
2702} 2676}
2703EXPORT_SYMBOL_GPL(__ip_route_output_key); 2677EXPORT_SYMBOL_GPL(__ip_route_output_key);
2704 2678
@@ -2716,6 +2690,12 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2716{ 2690{
2717} 2691}
2718 2692
2693static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2694 unsigned long old)
2695{
2696 return NULL;
2697}
2698
2719static struct dst_ops ipv4_dst_blackhole_ops = { 2699static struct dst_ops ipv4_dst_blackhole_ops = {
2720 .family = AF_INET, 2700 .family = AF_INET,
2721 .protocol = cpu_to_be16(ETH_P_IP), 2701 .protocol = cpu_to_be16(ETH_P_IP),
@@ -2724,19 +2704,17 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2724 .default_mtu = ipv4_blackhole_default_mtu, 2704 .default_mtu = ipv4_blackhole_default_mtu,
2725 .default_advmss = ipv4_default_advmss, 2705 .default_advmss = ipv4_default_advmss,
2726 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2706 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2707 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2727}; 2708};
2728 2709
2729 2710struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2730static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2731{ 2711{
2732 struct rtable *ort = *rp; 2712 struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1);
2733 struct rtable *rt = (struct rtable *) 2713 struct rtable *ort = (struct rtable *) dst_orig;
2734 dst_alloc(&ipv4_dst_blackhole_ops);
2735 2714
2736 if (rt) { 2715 if (rt) {
2737 struct dst_entry *new = &rt->dst; 2716 struct dst_entry *new = &rt->dst;
2738 2717
2739 atomic_set(&new->__refcnt, 1);
2740 new->__use = 1; 2718 new->__use = 1;
2741 new->input = dst_discard; 2719 new->input = dst_discard;
2742 new->output = dst_discard; 2720 new->output = dst_discard;
@@ -2746,59 +2724,58 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2746 if (new->dev) 2724 if (new->dev)
2747 dev_hold(new->dev); 2725 dev_hold(new->dev);
2748 2726
2749 rt->fl = ort->fl; 2727 rt->rt_key_dst = ort->rt_key_dst;
2728 rt->rt_key_src = ort->rt_key_src;
2729 rt->rt_tos = ort->rt_tos;
2730 rt->rt_route_iif = ort->rt_route_iif;
2731 rt->rt_iif = ort->rt_iif;
2732 rt->rt_oif = ort->rt_oif;
2733 rt->rt_mark = ort->rt_mark;
2750 2734
2751 rt->rt_genid = rt_genid(net); 2735 rt->rt_genid = rt_genid(net);
2752 rt->rt_flags = ort->rt_flags; 2736 rt->rt_flags = ort->rt_flags;
2753 rt->rt_type = ort->rt_type; 2737 rt->rt_type = ort->rt_type;
2754 rt->rt_dst = ort->rt_dst; 2738 rt->rt_dst = ort->rt_dst;
2755 rt->rt_src = ort->rt_src; 2739 rt->rt_src = ort->rt_src;
2756 rt->rt_iif = ort->rt_iif;
2757 rt->rt_gateway = ort->rt_gateway; 2740 rt->rt_gateway = ort->rt_gateway;
2758 rt->rt_spec_dst = ort->rt_spec_dst; 2741 rt->rt_spec_dst = ort->rt_spec_dst;
2759 rt->peer = ort->peer; 2742 rt->peer = ort->peer;
2760 if (rt->peer) 2743 if (rt->peer)
2761 atomic_inc(&rt->peer->refcnt); 2744 atomic_inc(&rt->peer->refcnt);
2745 rt->fi = ort->fi;
2746 if (rt->fi)
2747 atomic_inc(&rt->fi->fib_clntref);
2762 2748
2763 dst_free(new); 2749 dst_free(new);
2764 } 2750 }
2765 2751
2766 dst_release(&(*rp)->dst); 2752 dst_release(dst_orig);
2767 *rp = rt; 2753
2768 return rt ? 0 : -ENOMEM; 2754 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
2769} 2755}
2770 2756
2771int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2757struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2772 struct sock *sk, int flags) 2758 struct sock *sk)
2773{ 2759{
2774 int err; 2760 struct rtable *rt = __ip_route_output_key(net, flp4);
2775 2761
2776 if ((err = __ip_route_output_key(net, rp, flp)) != 0) 2762 if (IS_ERR(rt))
2777 return err; 2763 return rt;
2778 2764
2779 if (flp->proto) { 2765 if (flp4->flowi4_proto) {
2780 if (!flp->fl4_src) 2766 if (!flp4->saddr)
2781 flp->fl4_src = (*rp)->rt_src; 2767 flp4->saddr = rt->rt_src;
2782 if (!flp->fl4_dst) 2768 if (!flp4->daddr)
2783 flp->fl4_dst = (*rp)->rt_dst; 2769 flp4->daddr = rt->rt_dst;
2784 err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 2770 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2785 flags ? XFRM_LOOKUP_WAIT : 0); 2771 flowi4_to_flowi(flp4),
2786 if (err == -EREMOTE) 2772 sk, 0);
2787 err = ipv4_dst_blackhole(net, rp, flp);
2788
2789 return err;
2790 } 2773 }
2791 2774
2792 return 0; 2775 return rt;
2793} 2776}
2794EXPORT_SYMBOL_GPL(ip_route_output_flow); 2777EXPORT_SYMBOL_GPL(ip_route_output_flow);
2795 2778
2796int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2797{
2798 return ip_route_output_flow(net, rp, flp, NULL, 0);
2799}
2800EXPORT_SYMBOL(ip_route_output_key);
2801
2802static int rt_fill_info(struct net *net, 2779static int rt_fill_info(struct net *net,
2803 struct sk_buff *skb, u32 pid, u32 seq, int event, 2780 struct sk_buff *skb, u32 pid, u32 seq, int event,
2804 int nowait, unsigned int flags) 2781 int nowait, unsigned int flags)
@@ -2817,7 +2794,7 @@ static int rt_fill_info(struct net *net,
2817 r->rtm_family = AF_INET; 2794 r->rtm_family = AF_INET;
2818 r->rtm_dst_len = 32; 2795 r->rtm_dst_len = 32;
2819 r->rtm_src_len = 0; 2796 r->rtm_src_len = 0;
2820 r->rtm_tos = rt->fl.fl4_tos; 2797 r->rtm_tos = rt->rt_tos;
2821 r->rtm_table = RT_TABLE_MAIN; 2798 r->rtm_table = RT_TABLE_MAIN;
2822 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2799 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2823 r->rtm_type = rt->rt_type; 2800 r->rtm_type = rt->rt_type;
@@ -2829,19 +2806,19 @@ static int rt_fill_info(struct net *net,
2829 2806
2830 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); 2807 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2831 2808
2832 if (rt->fl.fl4_src) { 2809 if (rt->rt_key_src) {
2833 r->rtm_src_len = 32; 2810 r->rtm_src_len = 32;
2834 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2811 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
2835 } 2812 }
2836 if (rt->dst.dev) 2813 if (rt->dst.dev)
2837 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2814 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2838#ifdef CONFIG_NET_CLS_ROUTE 2815#ifdef CONFIG_IP_ROUTE_CLASSID
2839 if (rt->dst.tclassid) 2816 if (rt->dst.tclassid)
2840 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2817 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2841#endif 2818#endif
2842 if (rt_is_input_route(rt)) 2819 if (rt_is_input_route(rt))
2843 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2820 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2844 else if (rt->rt_src != rt->fl.fl4_src) 2821 else if (rt->rt_src != rt->rt_key_src)
2845 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2822 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2846 2823
2847 if (rt->rt_dst != rt->rt_gateway) 2824 if (rt->rt_dst != rt->rt_gateway)
@@ -2850,11 +2827,12 @@ static int rt_fill_info(struct net *net,
2850 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2827 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2851 goto nla_put_failure; 2828 goto nla_put_failure;
2852 2829
2853 if (rt->fl.mark) 2830 if (rt->rt_mark)
2854 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); 2831 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
2855 2832
2856 error = rt->dst.error; 2833 error = rt->dst.error;
2857 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; 2834 expires = (rt->peer && rt->peer->pmtu_expires) ?
2835 rt->peer->pmtu_expires - jiffies : 0;
2858 if (rt->peer) { 2836 if (rt->peer) {
2859 inet_peer_refcheck(rt->peer); 2837 inet_peer_refcheck(rt->peer);
2860 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2838 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
@@ -2884,7 +2862,7 @@ static int rt_fill_info(struct net *net,
2884 } 2862 }
2885 } else 2863 } else
2886#endif 2864#endif
2887 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2865 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
2888 } 2866 }
2889 2867
2890 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 2868 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -2958,14 +2936,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2958 if (err == 0 && rt->dst.error) 2936 if (err == 0 && rt->dst.error)
2959 err = -rt->dst.error; 2937 err = -rt->dst.error;
2960 } else { 2938 } else {
2961 struct flowi fl = { 2939 struct flowi4 fl4 = {
2962 .fl4_dst = dst, 2940 .daddr = dst,
2963 .fl4_src = src, 2941 .saddr = src,
2964 .fl4_tos = rtm->rtm_tos, 2942 .flowi4_tos = rtm->rtm_tos,
2965 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2943 .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2966 .mark = mark, 2944 .flowi4_mark = mark,
2967 }; 2945 };
2968 err = ip_route_output_key(net, &rt, &fl); 2946 rt = ip_route_output_key(net, &fl4);
2947
2948 err = 0;
2949 if (IS_ERR(rt))
2950 err = PTR_ERR(rt);
2969 } 2951 }
2970 2952
2971 if (err) 2953 if (err)
@@ -3248,6 +3230,8 @@ static __net_init int rt_genid_init(struct net *net)
3248{ 3230{
3249 get_random_bytes(&net->ipv4.rt_genid, 3231 get_random_bytes(&net->ipv4.rt_genid,
3250 sizeof(net->ipv4.rt_genid)); 3232 sizeof(net->ipv4.rt_genid));
3233 get_random_bytes(&net->ipv4.dev_addr_genid,
3234 sizeof(net->ipv4.dev_addr_genid));
3251 return 0; 3235 return 0;
3252} 3236}
3253 3237
@@ -3256,9 +3240,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3256}; 3240};
3257 3241
3258 3242
3259#ifdef CONFIG_NET_CLS_ROUTE 3243#ifdef CONFIG_IP_ROUTE_CLASSID
3260struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3244struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3261#endif /* CONFIG_NET_CLS_ROUTE */ 3245#endif /* CONFIG_IP_ROUTE_CLASSID */
3262 3246
3263static __initdata unsigned long rhash_entries; 3247static __initdata unsigned long rhash_entries;
3264static int __init set_rhash_entries(char *str) 3248static int __init set_rhash_entries(char *str)
@@ -3274,7 +3258,7 @@ int __init ip_rt_init(void)
3274{ 3258{
3275 int rc = 0; 3259 int rc = 0;
3276 3260
3277#ifdef CONFIG_NET_CLS_ROUTE 3261#ifdef CONFIG_IP_ROUTE_CLASSID
3278 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3262 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3279 if (!ip_rt_acct) 3263 if (!ip_rt_acct)
3280 panic("IP: failed to allocate ip_rt_acct\n"); 3264 panic("IP: failed to allocate ip_rt_acct\n");
@@ -3311,14 +3295,6 @@ int __init ip_rt_init(void)
3311 devinet_init(); 3295 devinet_init();
3312 ip_fib_init(); 3296 ip_fib_init();
3313 3297
3314 /* All the timers, started at system startup tend
3315 to synchronize. Perturb it a bit.
3316 */
3317 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3318 expires_ljiffies = jiffies;
3319 schedule_delayed_work(&expires_work,
3320 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3321
3322 if (ip_rt_proc_init()) 3298 if (ip_rt_proc_init())
3323 printk(KERN_ERR "Unable to create route proc files\n"); 3299 printk(KERN_ERR "Unable to create route proc files\n");
3324#ifdef CONFIG_XFRM 3300#ifdef CONFIG_XFRM