aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c511
1 files changed, 253 insertions, 258 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..562ce92de2a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly;
253static unsigned int rt_hash_log __read_mostly; 253static unsigned int rt_hash_log __read_mostly;
254 254
255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
256#define RT_CACHE_STAT_INC(field) \ 256#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
257 (__raw_get_cpu_var(rt_cache_stat).field++)
258 257
259static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
260 int genid) 259 int genid)
@@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
287 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
288 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
289 while (r) { 288 while (r) {
290 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 289 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
291 r->rt_genid == st->genid) 290 r->rt_genid == st->genid)
292 return r; 291 return r;
293 r = rcu_dereference_bh(r->u.dst.rt_next); 292 r = rcu_dereference_bh(r->dst.rt_next);
294 } 293 }
295 rcu_read_unlock_bh(); 294 rcu_read_unlock_bh();
296 } 295 }
@@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
302{ 301{
303 struct rt_cache_iter_state *st = seq->private; 302 struct rt_cache_iter_state *st = seq->private;
304 303
305 r = r->u.dst.rt_next; 304 r = r->dst.rt_next;
306 while (!r) { 305 while (!r) {
307 rcu_read_unlock_bh(); 306 rcu_read_unlock_bh();
308 do { 307 do {
@@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
320{ 319{
321 struct rt_cache_iter_state *st = seq->private; 320 struct rt_cache_iter_state *st = seq->private;
322 while ((r = __rt_cache_get_next(seq, r)) != NULL) { 321 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
323 if (dev_net(r->u.dst.dev) != seq_file_net(seq)) 322 if (dev_net(r->dst.dev) != seq_file_net(seq))
324 continue; 323 continue;
325 if (r->rt_genid == st->genid) 324 if (r->rt_genid == st->genid)
326 break; 325 break;
@@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 377
379 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 378 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
380 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 379 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
381 r->u.dst.dev ? r->u.dst.dev->name : "*", 380 r->dst.dev ? r->dst.dev->name : "*",
382 (__force u32)r->rt_dst, 381 (__force u32)r->rt_dst,
383 (__force u32)r->rt_gateway, 382 (__force u32)r->rt_gateway,
384 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 383 r->rt_flags, atomic_read(&r->dst.__refcnt),
385 r->u.dst.__use, 0, (__force u32)r->rt_src, 384 r->dst.__use, 0, (__force u32)r->rt_src,
386 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 385 (dst_metric(&r->dst, RTAX_ADVMSS) ?
387 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 386 (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
388 dst_metric(&r->u.dst, RTAX_WINDOW), 387 dst_metric(&r->dst, RTAX_WINDOW),
389 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
390 dst_metric(&r->u.dst, RTAX_RTTVAR)), 389 dst_metric(&r->dst, RTAX_RTTVAR)),
391 r->fl.fl4_tos, 390 r->fl.fl4_tos,
392 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 391 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
393 r->u.dst.hh ? (r->u.dst.hh->hh_output == 392 r->dst.hh ? (r->dst.hh->hh_output ==
394 dev_queue_xmit) : 0, 393 dev_queue_xmit) : 0,
395 r->rt_spec_dst, &len); 394 r->rt_spec_dst, &len);
396 395
@@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void)
609 608
610static inline void rt_free(struct rtable *rt) 609static inline void rt_free(struct rtable *rt)
611{ 610{
612 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 611 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
613} 612}
614 613
615static inline void rt_drop(struct rtable *rt) 614static inline void rt_drop(struct rtable *rt)
616{ 615{
617 ip_rt_put(rt); 616 ip_rt_put(rt);
618 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 617 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
619} 618}
620 619
621static inline int rt_fast_clean(struct rtable *rth) 620static inline int rt_fast_clean(struct rtable *rth)
@@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
623 /* Kill broadcast/multicast entries very aggresively, if they 622 /* Kill broadcast/multicast entries very aggresively, if they
624 collide in hash table with more useful entries */ 623 collide in hash table with more useful entries */
625 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 624 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
626 rth->fl.iif && rth->u.dst.rt_next; 625 rth->fl.iif && rth->dst.rt_next;
627} 626}
628 627
629static inline int rt_valuable(struct rtable *rth) 628static inline int rt_valuable(struct rtable *rth)
630{ 629{
631 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 630 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
632 rth->u.dst.expires; 631 rth->dst.expires;
633} 632}
634 633
635static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 634static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
637 unsigned long age; 636 unsigned long age;
638 int ret = 0; 637 int ret = 0;
639 638
640 if (atomic_read(&rth->u.dst.__refcnt)) 639 if (atomic_read(&rth->dst.__refcnt))
641 goto out; 640 goto out;
642 641
643 ret = 1; 642 ret = 1;
644 if (rth->u.dst.expires && 643 if (rth->dst.expires &&
645 time_after_eq(jiffies, rth->u.dst.expires)) 644 time_after_eq(jiffies, rth->dst.expires))
646 goto out; 645 goto out;
647 646
648 age = jiffies - rth->u.dst.lastuse; 647 age = jiffies - rth->dst.lastuse;
649 ret = 0; 648 ret = 0;
650 if ((age <= tmo1 && !rt_fast_clean(rth)) || 649 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
651 (age <= tmo2 && rt_valuable(rth))) 650 (age <= tmo2 && rt_valuable(rth)))
@@ -661,7 +660,7 @@ out: return ret;
661 */ 660 */
662static inline u32 rt_score(struct rtable *rt) 661static inline u32 rt_score(struct rtable *rt)
663{ 662{
664 u32 score = jiffies - rt->u.dst.lastuse; 663 u32 score = jiffies - rt->dst.lastuse;
665 664
666 score = ~score & ~(3<<30); 665 score = ~score & ~(3<<30);
667 666
@@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
701 700
702static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 701static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
703{ 702{
704 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); 703 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
705} 704}
706 705
707static inline int rt_is_expired(struct rtable *rth) 706static inline int rt_is_expired(struct rtable *rth)
708{ 707{
709 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); 708 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
710} 709}
711 710
712/* 711/*
@@ -735,7 +734,7 @@ static void rt_do_flush(int process_context)
735 rth = rt_hash_table[i].chain; 734 rth = rt_hash_table[i].chain;
736 735
737 /* defer releasing the head of the list after spin_unlock */ 736 /* defer releasing the head of the list after spin_unlock */
738 for (tail = rth; tail; tail = tail->u.dst.rt_next) 737 for (tail = rth; tail; tail = tail->dst.rt_next)
739 if (!rt_is_expired(tail)) 738 if (!rt_is_expired(tail))
740 break; 739 break;
741 if (rth != tail) 740 if (rth != tail)
@@ -744,9 +743,9 @@ static void rt_do_flush(int process_context)
744 /* call rt_free on entries after the tail requiring flush */ 743 /* call rt_free on entries after the tail requiring flush */
745 prev = &rt_hash_table[i].chain; 744 prev = &rt_hash_table[i].chain;
746 for (p = *prev; p; p = next) { 745 for (p = *prev; p; p = next) {
747 next = p->u.dst.rt_next; 746 next = p->dst.rt_next;
748 if (!rt_is_expired(p)) { 747 if (!rt_is_expired(p)) {
749 prev = &p->u.dst.rt_next; 748 prev = &p->dst.rt_next;
750 } else { 749 } else {
751 *prev = next; 750 *prev = next;
752 rt_free(p); 751 rt_free(p);
@@ -761,7 +760,7 @@ static void rt_do_flush(int process_context)
761 spin_unlock_bh(rt_hash_lock_addr(i)); 760 spin_unlock_bh(rt_hash_lock_addr(i));
762 761
763 for (; rth != tail; rth = next) { 762 for (; rth != tail; rth = next) {
764 next = rth->u.dst.rt_next; 763 next = rth->dst.rt_next;
765 rt_free(rth); 764 rt_free(rth);
766 } 765 }
767 } 766 }
@@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
792 while (aux != rth) { 791 while (aux != rth) {
793 if (compare_hash_inputs(&aux->fl, &rth->fl)) 792 if (compare_hash_inputs(&aux->fl, &rth->fl))
794 return 0; 793 return 0;
795 aux = aux->u.dst.rt_next; 794 aux = aux->dst.rt_next;
796 } 795 }
797 return ONE; 796 return ONE;
798} 797}
@@ -832,18 +831,18 @@ static void rt_check_expire(void)
832 length = 0; 831 length = 0;
833 spin_lock_bh(rt_hash_lock_addr(i)); 832 spin_lock_bh(rt_hash_lock_addr(i));
834 while ((rth = *rthp) != NULL) { 833 while ((rth = *rthp) != NULL) {
835 prefetch(rth->u.dst.rt_next); 834 prefetch(rth->dst.rt_next);
836 if (rt_is_expired(rth)) { 835 if (rt_is_expired(rth)) {
837 *rthp = rth->u.dst.rt_next; 836 *rthp = rth->dst.rt_next;
838 rt_free(rth); 837 rt_free(rth);
839 continue; 838 continue;
840 } 839 }
841 if (rth->u.dst.expires) { 840 if (rth->dst.expires) {
842 /* Entry is expired even if it is in use */ 841 /* Entry is expired even if it is in use */
843 if (time_before_eq(jiffies, rth->u.dst.expires)) { 842 if (time_before_eq(jiffies, rth->dst.expires)) {
844nofree: 843nofree:
845 tmo >>= 1; 844 tmo >>= 1;
846 rthp = &rth->u.dst.rt_next; 845 rthp = &rth->dst.rt_next;
847 /* 846 /*
848 * We only count entries on 847 * We only count entries on
849 * a chain with equal hash inputs once 848 * a chain with equal hash inputs once
@@ -859,7 +858,7 @@ nofree:
859 goto nofree; 858 goto nofree;
860 859
861 /* Cleanup aged off entries. */ 860 /* Cleanup aged off entries. */
862 *rthp = rth->u.dst.rt_next; 861 *rthp = rth->dst.rt_next;
863 rt_free(rth); 862 rt_free(rth);
864 } 863 }
865 spin_unlock_bh(rt_hash_lock_addr(i)); 864 spin_unlock_bh(rt_hash_lock_addr(i));
@@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
1000 if (!rt_is_expired(rth) && 999 if (!rt_is_expired(rth) &&
1001 !rt_may_expire(rth, tmo, expire)) { 1000 !rt_may_expire(rth, tmo, expire)) {
1002 tmo >>= 1; 1001 tmo >>= 1;
1003 rthp = &rth->u.dst.rt_next; 1002 rthp = &rth->dst.rt_next;
1004 continue; 1003 continue;
1005 } 1004 }
1006 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->dst.rt_next;
1007 rt_free(rth); 1006 rt_free(rth);
1008 goal--; 1007 goal--;
1009 } 1008 }
@@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
1069 1068
1070 while (rth) { 1069 while (rth) {
1071 length += has_noalias(head, rth); 1070 length += has_noalias(head, rth);
1072 rth = rth->u.dst.rt_next; 1071 rth = rth->dst.rt_next;
1073 } 1072 }
1074 return length >> FRACT_BITS; 1073 return length >> FRACT_BITS;
1075} 1074}
@@ -1091,7 +1090,7 @@ restart:
1091 candp = NULL; 1090 candp = NULL;
1092 now = jiffies; 1091 now = jiffies;
1093 1092
1094 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1093 if (!rt_caching(dev_net(rt->dst.dev))) {
1095 /* 1094 /*
1096 * If we're not caching, just tell the caller we 1095 * If we're not caching, just tell the caller we
1097 * were successful and don't touch the route. The 1096 * were successful and don't touch the route. The
@@ -1109,7 +1108,7 @@ restart:
1109 */ 1108 */
1110 1109
1111 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1112 int err = arp_bind_neighbour(&rt->u.dst); 1111 int err = arp_bind_neighbour(&rt->dst);
1113 if (err) { 1112 if (err) {
1114 if (net_ratelimit()) 1113 if (net_ratelimit())
1115 printk(KERN_WARNING 1114 printk(KERN_WARNING
@@ -1128,19 +1127,19 @@ restart:
1128 spin_lock_bh(rt_hash_lock_addr(hash)); 1127 spin_lock_bh(rt_hash_lock_addr(hash));
1129 while ((rth = *rthp) != NULL) { 1128 while ((rth = *rthp) != NULL) {
1130 if (rt_is_expired(rth)) { 1129 if (rt_is_expired(rth)) {
1131 *rthp = rth->u.dst.rt_next; 1130 *rthp = rth->dst.rt_next;
1132 rt_free(rth); 1131 rt_free(rth);
1133 continue; 1132 continue;
1134 } 1133 }
1135 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1134 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
1136 /* Put it first */ 1135 /* Put it first */
1137 *rthp = rth->u.dst.rt_next; 1136 *rthp = rth->dst.rt_next;
1138 /* 1137 /*
1139 * Since lookup is lockfree, the deletion 1138 * Since lookup is lockfree, the deletion
1140 * must be visible to another weakly ordered CPU before 1139 * must be visible to another weakly ordered CPU before
1141 * the insertion at the start of the hash chain. 1140 * the insertion at the start of the hash chain.
1142 */ 1141 */
1143 rcu_assign_pointer(rth->u.dst.rt_next, 1142 rcu_assign_pointer(rth->dst.rt_next,
1144 rt_hash_table[hash].chain); 1143 rt_hash_table[hash].chain);
1145 /* 1144 /*
1146 * Since lookup is lockfree, the update writes 1145 * Since lookup is lockfree, the update writes
@@ -1148,18 +1147,18 @@ restart:
1148 */ 1147 */
1149 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 1148 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1150 1149
1151 dst_use(&rth->u.dst, now); 1150 dst_use(&rth->dst, now);
1152 spin_unlock_bh(rt_hash_lock_addr(hash)); 1151 spin_unlock_bh(rt_hash_lock_addr(hash));
1153 1152
1154 rt_drop(rt); 1153 rt_drop(rt);
1155 if (rp) 1154 if (rp)
1156 *rp = rth; 1155 *rp = rth;
1157 else 1156 else
1158 skb_dst_set(skb, &rth->u.dst); 1157 skb_dst_set(skb, &rth->dst);
1159 return 0; 1158 return 0;
1160 } 1159 }
1161 1160
1162 if (!atomic_read(&rth->u.dst.__refcnt)) { 1161 if (!atomic_read(&rth->dst.__refcnt)) {
1163 u32 score = rt_score(rth); 1162 u32 score = rt_score(rth);
1164 1163
1165 if (score <= min_score) { 1164 if (score <= min_score) {
@@ -1171,7 +1170,7 @@ restart:
1171 1170
1172 chain_length++; 1171 chain_length++;
1173 1172
1174 rthp = &rth->u.dst.rt_next; 1173 rthp = &rth->dst.rt_next;
1175 } 1174 }
1176 1175
1177 if (cand) { 1176 if (cand) {
@@ -1182,17 +1181,17 @@ restart:
1182 * only 2 entries per bucket. We will see. 1181 * only 2 entries per bucket. We will see.
1183 */ 1182 */
1184 if (chain_length > ip_rt_gc_elasticity) { 1183 if (chain_length > ip_rt_gc_elasticity) {
1185 *candp = cand->u.dst.rt_next; 1184 *candp = cand->dst.rt_next;
1186 rt_free(cand); 1185 rt_free(cand);
1187 } 1186 }
1188 } else { 1187 } else {
1189 if (chain_length > rt_chain_length_max && 1188 if (chain_length > rt_chain_length_max &&
1190 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1189 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1191 struct net *net = dev_net(rt->u.dst.dev); 1190 struct net *net = dev_net(rt->dst.dev);
1192 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1191 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1193 if (!rt_caching(net)) { 1192 if (!rt_caching(net)) {
1194 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1193 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1195 rt->u.dst.dev->name, num); 1194 rt->dst.dev->name, num);
1196 } 1195 }
1197 rt_emergency_hash_rebuild(net); 1196 rt_emergency_hash_rebuild(net);
1198 spin_unlock_bh(rt_hash_lock_addr(hash)); 1197 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1207,7 +1206,7 @@ restart:
1207 route or unicast forwarding path. 1206 route or unicast forwarding path.
1208 */ 1207 */
1209 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1208 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1210 int err = arp_bind_neighbour(&rt->u.dst); 1209 int err = arp_bind_neighbour(&rt->dst);
1211 if (err) { 1210 if (err) {
1212 spin_unlock_bh(rt_hash_lock_addr(hash)); 1211 spin_unlock_bh(rt_hash_lock_addr(hash));
1213 1212
@@ -1238,14 +1237,14 @@ restart:
1238 } 1237 }
1239 } 1238 }
1240 1239
1241 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1240 rt->dst.rt_next = rt_hash_table[hash].chain;
1242 1241
1243#if RT_CACHE_DEBUG >= 2 1242#if RT_CACHE_DEBUG >= 2
1244 if (rt->u.dst.rt_next) { 1243 if (rt->dst.rt_next) {
1245 struct rtable *trt; 1244 struct rtable *trt;
1246 printk(KERN_DEBUG "rt_cache @%02x: %pI4", 1245 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1247 hash, &rt->rt_dst); 1246 hash, &rt->rt_dst);
1248 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1247 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1249 printk(" . %pI4", &trt->rt_dst); 1248 printk(" . %pI4", &trt->rt_dst);
1250 printk("\n"); 1249 printk("\n");
1251 } 1250 }
@@ -1263,7 +1262,7 @@ skip_hashing:
1263 if (rp) 1262 if (rp)
1264 *rp = rt; 1263 *rp = rt;
1265 else 1264 else
1266 skb_dst_set(skb, &rt->u.dst); 1265 skb_dst_set(skb, &rt->dst);
1267 return 0; 1266 return 0;
1268} 1267}
1269 1268
@@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1325 1324
1326 ip_select_fb_ident(iph); 1325 ip_select_fb_ident(iph);
1327} 1326}
1327EXPORT_SYMBOL(__ip_select_ident);
1328 1328
1329static void rt_del(unsigned hash, struct rtable *rt) 1329static void rt_del(unsigned hash, struct rtable *rt)
1330{ 1330{
@@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt)
1335 ip_rt_put(rt); 1335 ip_rt_put(rt);
1336 while ((aux = *rthp) != NULL) { 1336 while ((aux = *rthp) != NULL) {
1337 if (aux == rt || rt_is_expired(aux)) { 1337 if (aux == rt || rt_is_expired(aux)) {
1338 *rthp = aux->u.dst.rt_next; 1338 *rthp = aux->dst.rt_next;
1339 rt_free(aux); 1339 rt_free(aux);
1340 continue; 1340 continue;
1341 } 1341 }
1342 rthp = &aux->u.dst.rt_next; 1342 rthp = &aux->dst.rt_next;
1343 } 1343 }
1344 spin_unlock_bh(rt_hash_lock_addr(hash)); 1344 spin_unlock_bh(rt_hash_lock_addr(hash));
1345} 1345}
1346 1346
1347/* called in rcu_read_lock() section */
1347void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1348void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1348 __be32 saddr, struct net_device *dev) 1349 __be32 saddr, struct net_device *dev)
1349{ 1350{
1350 int i, k; 1351 int i, k;
1351 struct in_device *in_dev = in_dev_get(dev); 1352 struct in_device *in_dev = __in_dev_get_rcu(dev);
1352 struct rtable *rth, **rthp; 1353 struct rtable *rth, **rthp;
1353 __be32 skeys[2] = { saddr, 0 }; 1354 __be32 skeys[2] = { saddr, 0 };
1354 int ikeys[2] = { dev->ifindex, 0 }; 1355 int ikeys[2] = { dev->ifindex, 0 };
@@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1384 1385
1385 rthp=&rt_hash_table[hash].chain; 1386 rthp=&rt_hash_table[hash].chain;
1386 1387
1387 rcu_read_lock();
1388 while ((rth = rcu_dereference(*rthp)) != NULL) { 1388 while ((rth = rcu_dereference(*rthp)) != NULL) {
1389 struct rtable *rt; 1389 struct rtable *rt;
1390 1390
@@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1393 rth->fl.oif != ikeys[k] || 1393 rth->fl.oif != ikeys[k] ||
1394 rth->fl.iif != 0 || 1394 rth->fl.iif != 0 ||
1395 rt_is_expired(rth) || 1395 rt_is_expired(rth) ||
1396 !net_eq(dev_net(rth->u.dst.dev), net)) { 1396 !net_eq(dev_net(rth->dst.dev), net)) {
1397 rthp = &rth->u.dst.rt_next; 1397 rthp = &rth->dst.rt_next;
1398 continue; 1398 continue;
1399 } 1399 }
1400 1400
1401 if (rth->rt_dst != daddr || 1401 if (rth->rt_dst != daddr ||
1402 rth->rt_src != saddr || 1402 rth->rt_src != saddr ||
1403 rth->u.dst.error || 1403 rth->dst.error ||
1404 rth->rt_gateway != old_gw || 1404 rth->rt_gateway != old_gw ||
1405 rth->u.dst.dev != dev) 1405 rth->dst.dev != dev)
1406 break; 1406 break;
1407 1407
1408 dst_hold(&rth->u.dst); 1408 dst_hold(&rth->dst);
1409 rcu_read_unlock();
1410 1409
1411 rt = dst_alloc(&ipv4_dst_ops); 1410 rt = dst_alloc(&ipv4_dst_ops);
1412 if (rt == NULL) { 1411 if (rt == NULL) {
1413 ip_rt_put(rth); 1412 ip_rt_put(rth);
1414 in_dev_put(in_dev);
1415 return; 1413 return;
1416 } 1414 }
1417 1415
1418 /* Copy all the information. */ 1416 /* Copy all the information. */
1419 *rt = *rth; 1417 *rt = *rth;
1420 rt->u.dst.__use = 1; 1418 rt->dst.__use = 1;
1421 atomic_set(&rt->u.dst.__refcnt, 1); 1419 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->u.dst.child = NULL; 1420 rt->dst.child = NULL;
1423 if (rt->u.dst.dev) 1421 if (rt->dst.dev)
1424 dev_hold(rt->u.dst.dev); 1422 dev_hold(rt->dst.dev);
1425 if (rt->idev) 1423 if (rt->idev)
1426 in_dev_hold(rt->idev); 1424 in_dev_hold(rt->idev);
1427 rt->u.dst.obsolete = -1; 1425 rt->dst.obsolete = -1;
1428 rt->u.dst.lastuse = jiffies; 1426 rt->dst.lastuse = jiffies;
1429 rt->u.dst.path = &rt->u.dst; 1427 rt->dst.path = &rt->dst;
1430 rt->u.dst.neighbour = NULL; 1428 rt->dst.neighbour = NULL;
1431 rt->u.dst.hh = NULL; 1429 rt->dst.hh = NULL;
1432#ifdef CONFIG_XFRM 1430#ifdef CONFIG_XFRM
1433 rt->u.dst.xfrm = NULL; 1431 rt->dst.xfrm = NULL;
1434#endif 1432#endif
1435 rt->rt_genid = rt_genid(net); 1433 rt->rt_genid = rt_genid(net);
1436 rt->rt_flags |= RTCF_REDIRECTED; 1434 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1439 rt->rt_gateway = new_gw; 1437 rt->rt_gateway = new_gw;
1440 1438
1441 /* Redirect received -> path was valid */ 1439 /* Redirect received -> path was valid */
1442 dst_confirm(&rth->u.dst); 1440 dst_confirm(&rth->dst);
1443 1441
1444 if (rt->peer) 1442 if (rt->peer)
1445 atomic_inc(&rt->peer->refcnt); 1443 atomic_inc(&rt->peer->refcnt);
1446 1444
1447 if (arp_bind_neighbour(&rt->u.dst) || 1445 if (arp_bind_neighbour(&rt->dst) ||
1448 !(rt->u.dst.neighbour->nud_state & 1446 !(rt->dst.neighbour->nud_state &
1449 NUD_VALID)) { 1447 NUD_VALID)) {
1450 if (rt->u.dst.neighbour) 1448 if (rt->dst.neighbour)
1451 neigh_event_send(rt->u.dst.neighbour, NULL); 1449 neigh_event_send(rt->dst.neighbour, NULL);
1452 ip_rt_put(rth); 1450 ip_rt_put(rth);
1453 rt_drop(rt); 1451 rt_drop(rt);
1454 goto do_next; 1452 goto do_next;
1455 } 1453 }
1456 1454
1457 netevent.old = &rth->u.dst; 1455 netevent.old = &rth->dst;
1458 netevent.new = &rt->u.dst; 1456 netevent.new = &rt->dst;
1459 call_netevent_notifiers(NETEVENT_REDIRECT, 1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1460 &netevent); 1458 &netevent);
1461 1459
@@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1464 ip_rt_put(rt); 1462 ip_rt_put(rt);
1465 goto do_next; 1463 goto do_next;
1466 } 1464 }
1467 rcu_read_unlock();
1468 do_next: 1465 do_next:
1469 ; 1466 ;
1470 } 1467 }
1471 } 1468 }
1472 in_dev_put(in_dev);
1473 return; 1469 return;
1474 1470
1475reject_redirect: 1471reject_redirect:
@@ -1480,7 +1476,7 @@ reject_redirect:
1480 &old_gw, dev->name, &new_gw, 1476 &old_gw, dev->name, &new_gw,
1481 &saddr, &daddr); 1477 &saddr, &daddr);
1482#endif 1478#endif
1483 in_dev_put(in_dev); 1479 ;
1484} 1480}
1485 1481
1486static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1482static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1493 ip_rt_put(rt); 1489 ip_rt_put(rt);
1494 ret = NULL; 1490 ret = NULL;
1495 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1496 (rt->u.dst.expires && 1492 (rt->dst.expires &&
1497 time_after_eq(jiffies, rt->u.dst.expires))) { 1493 time_after_eq(jiffies, rt->dst.expires))) {
1498 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1499 rt->fl.oif, 1495 rt->fl.oif,
1500 rt_genid(dev_net(dst->dev))); 1496 rt_genid(dev_net(dst->dev)));
@@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1532 int log_martians; 1528 int log_martians;
1533 1529
1534 rcu_read_lock(); 1530 rcu_read_lock();
1535 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 1531 in_dev = __in_dev_get_rcu(rt->dst.dev);
1536 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 1532 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1537 rcu_read_unlock(); 1533 rcu_read_unlock();
1538 return; 1534 return;
@@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1543 /* No redirected packets during ip_rt_redirect_silence; 1539 /* No redirected packets during ip_rt_redirect_silence;
1544 * reset the algorithm. 1540 * reset the algorithm.
1545 */ 1541 */
1546 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1547 rt->u.dst.rate_tokens = 0; 1543 rt->dst.rate_tokens = 0;
1548 1544
1549 /* Too many ignored redirects; do not send anything 1545 /* Too many ignored redirects; do not send anything
1550 * set u.dst.rate_last to the last seen redirected packet. 1546 * set dst.rate_last to the last seen redirected packet.
1551 */ 1547 */
1552 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1553 rt->u.dst.rate_last = jiffies; 1549 rt->dst.rate_last = jiffies;
1554 return; 1550 return;
1555 } 1551 }
1556 1552
1557 /* Check for load limit; set rate_last to the latest sent 1553 /* Check for load limit; set rate_last to the latest sent
1558 * redirect. 1554 * redirect.
1559 */ 1555 */
1560 if (rt->u.dst.rate_tokens == 0 || 1556 if (rt->dst.rate_tokens == 0 ||
1561 time_after(jiffies, 1557 time_after(jiffies,
1562 (rt->u.dst.rate_last + 1558 (rt->dst.rate_last +
1563 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1564 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1565 rt->u.dst.rate_last = jiffies; 1561 rt->dst.rate_last = jiffies;
1566 ++rt->u.dst.rate_tokens; 1562 ++rt->dst.rate_tokens;
1567#ifdef CONFIG_IP_ROUTE_VERBOSE 1563#ifdef CONFIG_IP_ROUTE_VERBOSE
1568 if (log_martians && 1564 if (log_martians &&
1569 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1565 rt->dst.rate_tokens == ip_rt_redirect_number &&
1570 net_ratelimit()) 1566 net_ratelimit())
1571 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1572 &rt->rt_src, rt->rt_iif, 1568 &rt->rt_src, rt->rt_iif,
@@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb)
1581 unsigned long now; 1577 unsigned long now;
1582 int code; 1578 int code;
1583 1579
1584 switch (rt->u.dst.error) { 1580 switch (rt->dst.error) {
1585 case EINVAL: 1581 case EINVAL:
1586 default: 1582 default:
1587 goto out; 1583 goto out;
@@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb)
1590 break; 1586 break;
1591 case ENETUNREACH: 1587 case ENETUNREACH:
1592 code = ICMP_NET_UNREACH; 1588 code = ICMP_NET_UNREACH;
1593 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), 1589 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1594 IPSTATS_MIB_INNOROUTES); 1590 IPSTATS_MIB_INNOROUTES);
1595 break; 1591 break;
1596 case EACCES: 1592 case EACCES:
@@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb)
1599 } 1595 }
1600 1596
1601 now = jiffies; 1597 now = jiffies;
1602 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1598 rt->dst.rate_tokens += now - rt->dst.rate_last;
1603 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1599 if (rt->dst.rate_tokens > ip_rt_error_burst)
1604 rt->u.dst.rate_tokens = ip_rt_error_burst; 1600 rt->dst.rate_tokens = ip_rt_error_burst;
1605 rt->u.dst.rate_last = now; 1601 rt->dst.rate_last = now;
1606 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1607 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1603 rt->dst.rate_tokens -= ip_rt_error_cost;
1608 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1609 } 1605 }
1610 1606
@@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1649 1645
1650 rcu_read_lock(); 1646 rcu_read_lock();
1651 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1652 rth = rcu_dereference(rth->u.dst.rt_next)) { 1648 rth = rcu_dereference(rth->dst.rt_next)) {
1653 unsigned short mtu = new_mtu; 1649 unsigned short mtu = new_mtu;
1654 1650
1655 if (rth->fl.fl4_dst != daddr || 1651 if (rth->fl.fl4_dst != daddr ||
@@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1658 rth->rt_src != iph->saddr || 1654 rth->rt_src != iph->saddr ||
1659 rth->fl.oif != ikeys[k] || 1655 rth->fl.oif != ikeys[k] ||
1660 rth->fl.iif != 0 || 1656 rth->fl.iif != 0 ||
1661 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1662 !net_eq(dev_net(rth->u.dst.dev), net) || 1658 !net_eq(dev_net(rth->dst.dev), net) ||
1663 rt_is_expired(rth)) 1659 rt_is_expired(rth))
1664 continue; 1660 continue;
1665 1661
@@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1667 1663
1668 /* BSD 4.2 compatibility hack :-( */ 1664 /* BSD 4.2 compatibility hack :-( */
1669 if (mtu == 0 && 1665 if (mtu == 0 &&
1670 old_mtu >= dst_mtu(&rth->u.dst) && 1666 old_mtu >= dst_mtu(&rth->dst) &&
1671 old_mtu >= 68 + (iph->ihl << 2)) 1667 old_mtu >= 68 + (iph->ihl << 2))
1672 old_mtu -= iph->ihl << 2; 1668 old_mtu -= iph->ihl << 2;
1673 1669
1674 mtu = guess_mtu(old_mtu); 1670 mtu = guess_mtu(old_mtu);
1675 } 1671 }
1676 if (mtu <= dst_mtu(&rth->u.dst)) { 1672 if (mtu <= dst_mtu(&rth->dst)) {
1677 if (mtu < dst_mtu(&rth->u.dst)) { 1673 if (mtu < dst_mtu(&rth->dst)) {
1678 dst_confirm(&rth->u.dst); 1674 dst_confirm(&rth->dst);
1679 if (mtu < ip_rt_min_pmtu) { 1675 if (mtu < ip_rt_min_pmtu) {
1680 mtu = ip_rt_min_pmtu; 1676 mtu = ip_rt_min_pmtu;
1681 rth->u.dst.metrics[RTAX_LOCK-1] |= 1677 rth->dst.metrics[RTAX_LOCK-1] |=
1682 (1 << RTAX_MTU); 1678 (1 << RTAX_MTU);
1683 } 1679 }
1684 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1680 rth->dst.metrics[RTAX_MTU-1] = mtu;
1685 dst_set_expires(&rth->u.dst, 1681 dst_set_expires(&rth->dst,
1686 ip_rt_mtu_expires); 1682 ip_rt_mtu_expires);
1687 } 1683 }
1688 est_mtu = mtu; 1684 est_mtu = mtu;
@@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1755 1751
1756 rt = skb_rtable(skb); 1752 rt = skb_rtable(skb);
1757 if (rt) 1753 if (rt)
1758 dst_set_expires(&rt->u.dst, 0); 1754 dst_set_expires(&rt->dst, 0);
1759} 1755}
1760 1756
1761static int ip_rt_bug(struct sk_buff *skb) 1757static int ip_rt_bug(struct sk_buff *skb)
@@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1783 1779
1784 if (rt->fl.iif == 0) 1780 if (rt->fl.iif == 0)
1785 src = rt->rt_src; 1781 src = rt->rt_src;
1786 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { 1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
1787 src = FIB_RES_PREFSRC(res); 1783 src = FIB_RES_PREFSRC(res);
1788 fib_res_put(&res); 1784 fib_res_put(&res);
1789 } else 1785 } else
1790 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1791 RT_SCOPE_UNIVERSE); 1787 RT_SCOPE_UNIVERSE);
1792 memcpy(addr, &src, 4); 1788 memcpy(addr, &src, 4);
1793} 1789}
@@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1795#ifdef CONFIG_NET_CLS_ROUTE 1791#ifdef CONFIG_NET_CLS_ROUTE
1796static void set_class_tag(struct rtable *rt, u32 tag) 1792static void set_class_tag(struct rtable *rt, u32 tag)
1797{ 1793{
1798 if (!(rt->u.dst.tclassid & 0xFFFF)) 1794 if (!(rt->dst.tclassid & 0xFFFF))
1799 rt->u.dst.tclassid |= tag & 0xFFFF; 1795 rt->dst.tclassid |= tag & 0xFFFF;
1800 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1796 if (!(rt->dst.tclassid & 0xFFFF0000))
1801 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1797 rt->dst.tclassid |= tag & 0xFFFF0000;
1802} 1798}
1803#endif 1799#endif
1804 1800
@@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1810 if (FIB_RES_GW(*res) && 1806 if (FIB_RES_GW(*res) &&
1811 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1807 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1812 rt->rt_gateway = FIB_RES_GW(*res); 1808 rt->rt_gateway = FIB_RES_GW(*res);
1813 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1809 memcpy(rt->dst.metrics, fi->fib_metrics,
1814 sizeof(rt->u.dst.metrics)); 1810 sizeof(rt->dst.metrics));
1815 if (fi->fib_mtu == 0) { 1811 if (fi->fib_mtu == 0) {
1816 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1812 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
1817 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1813 if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
1818 rt->rt_gateway != rt->rt_dst && 1814 rt->rt_gateway != rt->rt_dst &&
1819 rt->u.dst.dev->mtu > 576) 1815 rt->dst.dev->mtu > 576)
1820 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1816 rt->dst.metrics[RTAX_MTU-1] = 576;
1821 } 1817 }
1822#ifdef CONFIG_NET_CLS_ROUTE 1818#ifdef CONFIG_NET_CLS_ROUTE
1823 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1819 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1824#endif 1820#endif
1825 } else 1821 } else
1826 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1822 rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
1827 1823
1828 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1824 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1829 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1825 rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1830 if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) 1826 if (dst_mtu(&rt->dst) > IP_MAX_MTU)
1831 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1827 rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1832 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) 1828 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
1833 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1829 rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
1834 ip_rt_min_advmss); 1830 ip_rt_min_advmss);
1835 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) 1831 if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
1836 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1832 rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1837 1833
1838#ifdef CONFIG_NET_CLS_ROUTE 1834#ifdef CONFIG_NET_CLS_ROUTE
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1835#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1844 rt->rt_type = res->type; 1840 rt->rt_type = res->type;
1845} 1841}
1846 1842
1843/* called in rcu_read_lock() section */
1847static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1844static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1848 u8 tos, struct net_device *dev, int our) 1845 u8 tos, struct net_device *dev, int our)
1849{ 1846{
1850 unsigned hash; 1847 unsigned int hash;
1851 struct rtable *rth; 1848 struct rtable *rth;
1852 __be32 spec_dst; 1849 __be32 spec_dst;
1853 struct in_device *in_dev = in_dev_get(dev); 1850 struct in_device *in_dev = __in_dev_get_rcu(dev);
1854 u32 itag = 0; 1851 u32 itag = 0;
1852 int err;
1855 1853
1856 /* Primary sanity checks. */ 1854 /* Primary sanity checks. */
1857 1855
@@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1866 if (!ipv4_is_local_multicast(daddr)) 1864 if (!ipv4_is_local_multicast(daddr))
1867 goto e_inval; 1865 goto e_inval;
1868 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1866 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1869 } else if (fib_validate_source(saddr, 0, tos, 0, 1867 } else {
1870 dev, &spec_dst, &itag, 0) < 0) 1868 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1871 goto e_inval; 1869 &itag, 0);
1872 1870 if (err < 0)
1871 goto e_err;
1872 }
1873 rth = dst_alloc(&ipv4_dst_ops); 1873 rth = dst_alloc(&ipv4_dst_ops);
1874 if (!rth) 1874 if (!rth)
1875 goto e_nobufs; 1875 goto e_nobufs;
1876 1876
1877 rth->u.dst.output = ip_rt_bug; 1877 rth->dst.output = ip_rt_bug;
1878 rth->u.dst.obsolete = -1; 1878 rth->dst.obsolete = -1;
1879 1879
1880 atomic_set(&rth->u.dst.__refcnt, 1); 1880 atomic_set(&rth->dst.__refcnt, 1);
1881 rth->u.dst.flags= DST_HOST; 1881 rth->dst.flags= DST_HOST;
1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1883 rth->u.dst.flags |= DST_NOPOLICY; 1883 rth->dst.flags |= DST_NOPOLICY;
1884 rth->fl.fl4_dst = daddr; 1884 rth->fl.fl4_dst = daddr;
1885 rth->rt_dst = daddr; 1885 rth->rt_dst = daddr;
1886 rth->fl.fl4_tos = tos; 1886 rth->fl.fl4_tos = tos;
@@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1888 rth->fl.fl4_src = saddr; 1888 rth->fl.fl4_src = saddr;
1889 rth->rt_src = saddr; 1889 rth->rt_src = saddr;
1890#ifdef CONFIG_NET_CLS_ROUTE 1890#ifdef CONFIG_NET_CLS_ROUTE
1891 rth->u.dst.tclassid = itag; 1891 rth->dst.tclassid = itag;
1892#endif 1892#endif
1893 rth->rt_iif = 1893 rth->rt_iif =
1894 rth->fl.iif = dev->ifindex; 1894 rth->fl.iif = dev->ifindex;
1895 rth->u.dst.dev = init_net.loopback_dev; 1895 rth->dst.dev = init_net.loopback_dev;
1896 dev_hold(rth->u.dst.dev); 1896 dev_hold(rth->dst.dev);
1897 rth->idev = in_dev_get(rth->u.dst.dev); 1897 rth->idev = in_dev_get(rth->dst.dev);
1898 rth->fl.oif = 0; 1898 rth->fl.oif = 0;
1899 rth->rt_gateway = daddr; 1899 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1900 rth->rt_spec_dst= spec_dst;
@@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1902 rth->rt_flags = RTCF_MULTICAST; 1902 rth->rt_flags = RTCF_MULTICAST;
1903 rth->rt_type = RTN_MULTICAST; 1903 rth->rt_type = RTN_MULTICAST;
1904 if (our) { 1904 if (our) {
1905 rth->u.dst.input= ip_local_deliver; 1905 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1906 rth->rt_flags |= RTCF_LOCAL;
1907 } 1907 }
1908 1908
1909#ifdef CONFIG_IP_MROUTE 1909#ifdef CONFIG_IP_MROUTE
1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1911 rth->u.dst.input = ip_mr_input; 1911 rth->dst.input = ip_mr_input;
1912#endif 1912#endif
1913 RT_CACHE_STAT_INC(in_slow_mc); 1913 RT_CACHE_STAT_INC(in_slow_mc);
1914 1914
1915 in_dev_put(in_dev);
1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1915 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1917 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1916 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1918 1917
1919e_nobufs: 1918e_nobufs:
1920 in_dev_put(in_dev);
1921 return -ENOBUFS; 1919 return -ENOBUFS;
1922
1923e_inval: 1920e_inval:
1924 in_dev_put(in_dev);
1925 return -EINVAL; 1921 return -EINVAL;
1922e_err:
1923 return err;
1926} 1924}
1927 1925
1928 1926
@@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev,
1956#endif 1954#endif
1957} 1955}
1958 1956
1957/* called in rcu_read_lock() section */
1959static int __mkroute_input(struct sk_buff *skb, 1958static int __mkroute_input(struct sk_buff *skb,
1960 struct fib_result *res, 1959 struct fib_result *res,
1961 struct in_device *in_dev, 1960 struct in_device *in_dev,
1962 __be32 daddr, __be32 saddr, u32 tos, 1961 __be32 daddr, __be32 saddr, u32 tos,
1963 struct rtable **result) 1962 struct rtable **result)
1964{ 1963{
1965
1966 struct rtable *rth; 1964 struct rtable *rth;
1967 int err; 1965 int err;
1968 struct in_device *out_dev; 1966 struct in_device *out_dev;
1969 unsigned flags = 0; 1967 unsigned int flags = 0;
1970 __be32 spec_dst; 1968 __be32 spec_dst;
1971 u32 itag; 1969 u32 itag;
1972 1970
1973 /* get a working reference to the output device */ 1971 /* get a working reference to the output device */
1974 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1972 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1975 if (out_dev == NULL) { 1973 if (out_dev == NULL) {
1976 if (net_ratelimit()) 1974 if (net_ratelimit())
1977 printk(KERN_CRIT "Bug in ip_route_input" \ 1975 printk(KERN_CRIT "Bug in ip_route_input" \
@@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb,
1986 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1984 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1987 saddr); 1985 saddr);
1988 1986
1989 err = -EINVAL;
1990 goto cleanup; 1987 goto cleanup;
1991 } 1988 }
1992 1989
@@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb,
2020 goto cleanup; 2017 goto cleanup;
2021 } 2018 }
2022 2019
2023 atomic_set(&rth->u.dst.__refcnt, 1); 2020 atomic_set(&rth->dst.__refcnt, 1);
2024 rth->u.dst.flags= DST_HOST; 2021 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2022 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->u.dst.flags |= DST_NOPOLICY; 2023 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2024 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->u.dst.flags |= DST_NOXFRM; 2025 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr; 2026 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2027 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2028 rth->fl.fl4_tos = tos;
@@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb,
2035 rth->rt_gateway = daddr; 2032 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2033 rth->rt_iif =
2037 rth->fl.iif = in_dev->dev->ifindex; 2034 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->u.dst.dev = (out_dev)->dev; 2035 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->u.dst.dev); 2036 dev_hold(rth->dst.dev);
2040 rth->idev = in_dev_get(rth->u.dst.dev); 2037 rth->idev = in_dev_get(rth->dst.dev);
2041 rth->fl.oif = 0; 2038 rth->fl.oif = 0;
2042 rth->rt_spec_dst= spec_dst; 2039 rth->rt_spec_dst= spec_dst;
2043 2040
2044 rth->u.dst.obsolete = -1; 2041 rth->dst.obsolete = -1;
2045 rth->u.dst.input = ip_forward; 2042 rth->dst.input = ip_forward;
2046 rth->u.dst.output = ip_output; 2043 rth->dst.output = ip_output;
2047 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2044 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2048 2045
2049 rt_set_nexthop(rth, res, itag); 2046 rt_set_nexthop(rth, res, itag);
2050 2047
@@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb,
2053 *result = rth; 2050 *result = rth;
2054 err = 0; 2051 err = 0;
2055 cleanup: 2052 cleanup:
2056 /* release the working reference to the output device */
2057 in_dev_put(out_dev);
2058 return err; 2053 return err;
2059} 2054}
2060 2055
@@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2080 2075
2081 /* put it into the cache */ 2076 /* put it into the cache */
2082 hash = rt_hash(daddr, saddr, fl->iif, 2077 hash = rt_hash(daddr, saddr, fl->iif,
2083 rt_genid(dev_net(rth->u.dst.dev))); 2078 rt_genid(dev_net(rth->dst.dev)));
2084 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2079 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2085} 2080}
2086 2081
@@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2098 u8 tos, struct net_device *dev) 2093 u8 tos, struct net_device *dev)
2099{ 2094{
2100 struct fib_result res; 2095 struct fib_result res;
2101 struct in_device *in_dev = in_dev_get(dev); 2096 struct in_device *in_dev = __in_dev_get_rcu(dev);
2102 struct flowi fl = { .nl_u = { .ip4_u = 2097 struct flowi fl = { .nl_u = { .ip4_u =
2103 { .daddr = daddr, 2098 { .daddr = daddr,
2104 .saddr = saddr, 2099 .saddr = saddr,
@@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2158 goto brd_input; 2153 goto brd_input;
2159 2154
2160 if (res.type == RTN_LOCAL) { 2155 if (res.type == RTN_LOCAL) {
2161 int result; 2156 err = fib_validate_source(saddr, daddr, tos,
2162 result = fib_validate_source(saddr, daddr, tos,
2163 net->loopback_dev->ifindex, 2157 net->loopback_dev->ifindex,
2164 dev, &spec_dst, &itag, skb->mark); 2158 dev, &spec_dst, &itag, skb->mark);
2165 if (result < 0) 2159 if (err < 0)
2166 goto martian_source; 2160 goto martian_source_keep_err;
2167 if (result) 2161 if (err)
2168 flags |= RTCF_DIRECTSRC; 2162 flags |= RTCF_DIRECTSRC;
2169 spec_dst = daddr; 2163 spec_dst = daddr;
2170 goto local_input; 2164 goto local_input;
@@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2177 2171
2178 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2179done: 2173done:
2180 in_dev_put(in_dev);
2181 if (free_res) 2174 if (free_res)
2182 fib_res_put(&res); 2175 fib_res_put(&res);
2183out: return err; 2176out: return err;
@@ -2192,7 +2185,7 @@ brd_input:
2192 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2185 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2193 &itag, skb->mark); 2186 &itag, skb->mark);
2194 if (err < 0) 2187 if (err < 0)
2195 goto martian_source; 2188 goto martian_source_keep_err;
2196 if (err) 2189 if (err)
2197 flags |= RTCF_DIRECTSRC; 2190 flags |= RTCF_DIRECTSRC;
2198 } 2191 }
@@ -2205,14 +2198,14 @@ local_input:
2205 if (!rth) 2198 if (!rth)
2206 goto e_nobufs; 2199 goto e_nobufs;
2207 2200
2208 rth->u.dst.output= ip_rt_bug; 2201 rth->dst.output= ip_rt_bug;
2209 rth->u.dst.obsolete = -1; 2202 rth->dst.obsolete = -1;
2210 rth->rt_genid = rt_genid(net); 2203 rth->rt_genid = rt_genid(net);
2211 2204
2212 atomic_set(&rth->u.dst.__refcnt, 1); 2205 atomic_set(&rth->dst.__refcnt, 1);
2213 rth->u.dst.flags= DST_HOST; 2206 rth->dst.flags= DST_HOST;
2214 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2207 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2215 rth->u.dst.flags |= DST_NOPOLICY; 2208 rth->dst.flags |= DST_NOPOLICY;
2216 rth->fl.fl4_dst = daddr; 2209 rth->fl.fl4_dst = daddr;
2217 rth->rt_dst = daddr; 2210 rth->rt_dst = daddr;
2218 rth->fl.fl4_tos = tos; 2211 rth->fl.fl4_tos = tos;
@@ -2220,20 +2213,20 @@ local_input:
2220 rth->fl.fl4_src = saddr; 2213 rth->fl.fl4_src = saddr;
2221 rth->rt_src = saddr; 2214 rth->rt_src = saddr;
2222#ifdef CONFIG_NET_CLS_ROUTE 2215#ifdef CONFIG_NET_CLS_ROUTE
2223 rth->u.dst.tclassid = itag; 2216 rth->dst.tclassid = itag;
2224#endif 2217#endif
2225 rth->rt_iif = 2218 rth->rt_iif =
2226 rth->fl.iif = dev->ifindex; 2219 rth->fl.iif = dev->ifindex;
2227 rth->u.dst.dev = net->loopback_dev; 2220 rth->dst.dev = net->loopback_dev;
2228 dev_hold(rth->u.dst.dev); 2221 dev_hold(rth->dst.dev);
2229 rth->idev = in_dev_get(rth->u.dst.dev); 2222 rth->idev = in_dev_get(rth->dst.dev);
2230 rth->rt_gateway = daddr; 2223 rth->rt_gateway = daddr;
2231 rth->rt_spec_dst= spec_dst; 2224 rth->rt_spec_dst= spec_dst;
2232 rth->u.dst.input= ip_local_deliver; 2225 rth->dst.input= ip_local_deliver;
2233 rth->rt_flags = flags|RTCF_LOCAL; 2226 rth->rt_flags = flags|RTCF_LOCAL;
2234 if (res.type == RTN_UNREACHABLE) { 2227 if (res.type == RTN_UNREACHABLE) {
2235 rth->u.dst.input= ip_error; 2228 rth->dst.input= ip_error;
2236 rth->u.dst.error= -err; 2229 rth->dst.error= -err;
2237 rth->rt_flags &= ~RTCF_LOCAL; 2230 rth->rt_flags &= ~RTCF_LOCAL;
2238 } 2231 }
2239 rth->rt_type = res.type; 2232 rth->rt_type = res.type;
@@ -2273,8 +2266,10 @@ e_nobufs:
2273 goto done; 2266 goto done;
2274 2267
2275martian_source: 2268martian_source:
2269 err = -EINVAL;
2270martian_source_keep_err:
2276 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2277 goto e_inval; 2272 goto done;
2278} 2273}
2279 2274
2280int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2284 unsigned hash; 2279 unsigned hash;
2285 int iif = dev->ifindex; 2280 int iif = dev->ifindex;
2286 struct net *net; 2281 struct net *net;
2282 int res;
2287 2283
2288 net = dev_net(dev); 2284 net = dev_net(dev);
2289 2285
2286 rcu_read_lock();
2287
2290 if (!rt_caching(net)) 2288 if (!rt_caching(net))
2291 goto skip_cache; 2289 goto skip_cache;
2292 2290
2293 tos &= IPTOS_RT_MASK; 2291 tos &= IPTOS_RT_MASK;
2294 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2292 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2295 2293
2296 rcu_read_lock();
2297 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2294 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2298 rth = rcu_dereference(rth->u.dst.rt_next)) { 2295 rth = rcu_dereference(rth->dst.rt_next)) {
2299 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2296 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2300 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2297 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2301 (rth->fl.iif ^ iif) | 2298 (rth->fl.iif ^ iif) |
2302 rth->fl.oif | 2299 rth->fl.oif |
2303 (rth->fl.fl4_tos ^ tos)) == 0 && 2300 (rth->fl.fl4_tos ^ tos)) == 0 &&
2304 rth->fl.mark == skb->mark && 2301 rth->fl.mark == skb->mark &&
2305 net_eq(dev_net(rth->u.dst.dev), net) && 2302 net_eq(dev_net(rth->dst.dev), net) &&
2306 !rt_is_expired(rth)) { 2303 !rt_is_expired(rth)) {
2307 if (noref) { 2304 if (noref) {
2308 dst_use_noref(&rth->u.dst, jiffies); 2305 dst_use_noref(&rth->dst, jiffies);
2309 skb_dst_set_noref(skb, &rth->u.dst); 2306 skb_dst_set_noref(skb, &rth->dst);
2310 } else { 2307 } else {
2311 dst_use(&rth->u.dst, jiffies); 2308 dst_use(&rth->dst, jiffies);
2312 skb_dst_set(skb, &rth->u.dst); 2309 skb_dst_set(skb, &rth->dst);
2313 } 2310 }
2314 RT_CACHE_STAT_INC(in_hit); 2311 RT_CACHE_STAT_INC(in_hit);
2315 rcu_read_unlock(); 2312 rcu_read_unlock();
@@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2317 } 2314 }
2318 RT_CACHE_STAT_INC(in_hlist_search); 2315 RT_CACHE_STAT_INC(in_hlist_search);
2319 } 2316 }
2320 rcu_read_unlock();
2321 2317
2322skip_cache: 2318skip_cache:
2323 /* Multicast recognition logic is moved from route cache to here. 2319 /* Multicast recognition logic is moved from route cache to here.
@@ -2332,12 +2328,11 @@ skip_cache:
2332 route cache entry is created eventually. 2328 route cache entry is created eventually.
2333 */ 2329 */
2334 if (ipv4_is_multicast(daddr)) { 2330 if (ipv4_is_multicast(daddr)) {
2335 struct in_device *in_dev; 2331 struct in_device *in_dev = __in_dev_get_rcu(dev);
2336 2332
2337 rcu_read_lock(); 2333 if (in_dev) {
2338 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2339 int our = ip_check_mc(in_dev, daddr, saddr, 2334 int our = ip_check_mc(in_dev, daddr, saddr,
2340 ip_hdr(skb)->protocol); 2335 ip_hdr(skb)->protocol);
2341 if (our 2336 if (our
2342#ifdef CONFIG_IP_MROUTE 2337#ifdef CONFIG_IP_MROUTE
2343 || 2338 ||
@@ -2345,15 +2340,18 @@ skip_cache:
2345 IN_DEV_MFORWARD(in_dev)) 2340 IN_DEV_MFORWARD(in_dev))
2346#endif 2341#endif
2347 ) { 2342 ) {
2343 int res = ip_route_input_mc(skb, daddr, saddr,
2344 tos, dev, our);
2348 rcu_read_unlock(); 2345 rcu_read_unlock();
2349 return ip_route_input_mc(skb, daddr, saddr, 2346 return res;
2350 tos, dev, our);
2351 } 2347 }
2352 } 2348 }
2353 rcu_read_unlock(); 2349 rcu_read_unlock();
2354 return -EINVAL; 2350 return -EINVAL;
2355 } 2351 }
2356 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2352 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2353 rcu_read_unlock();
2354 return res;
2357} 2355}
2358EXPORT_SYMBOL(ip_route_input_common); 2356EXPORT_SYMBOL(ip_route_input_common);
2359 2357
@@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result,
2415 goto cleanup; 2413 goto cleanup;
2416 } 2414 }
2417 2415
2418 atomic_set(&rth->u.dst.__refcnt, 1); 2416 atomic_set(&rth->dst.__refcnt, 1);
2419 rth->u.dst.flags= DST_HOST; 2417 rth->dst.flags= DST_HOST;
2420 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2418 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2421 rth->u.dst.flags |= DST_NOXFRM; 2419 rth->dst.flags |= DST_NOXFRM;
2422 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2420 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2423 rth->u.dst.flags |= DST_NOPOLICY; 2421 rth->dst.flags |= DST_NOPOLICY;
2424 2422
2425 rth->fl.fl4_dst = oldflp->fl4_dst; 2423 rth->fl.fl4_dst = oldflp->fl4_dst;
2426 rth->fl.fl4_tos = tos; 2424 rth->fl.fl4_tos = tos;
@@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result,
2432 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2430 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2433 /* get references to the devices that are to be hold by the routing 2431 /* get references to the devices that are to be hold by the routing
2434 cache entry */ 2432 cache entry */
2435 rth->u.dst.dev = dev_out; 2433 rth->dst.dev = dev_out;
2436 dev_hold(dev_out); 2434 dev_hold(dev_out);
2437 rth->idev = in_dev_get(dev_out); 2435 rth->idev = in_dev_get(dev_out);
2438 rth->rt_gateway = fl->fl4_dst; 2436 rth->rt_gateway = fl->fl4_dst;
2439 rth->rt_spec_dst= fl->fl4_src; 2437 rth->rt_spec_dst= fl->fl4_src;
2440 2438
2441 rth->u.dst.output=ip_output; 2439 rth->dst.output=ip_output;
2442 rth->u.dst.obsolete = -1; 2440 rth->dst.obsolete = -1;
2443 rth->rt_genid = rt_genid(dev_net(dev_out)); 2441 rth->rt_genid = rt_genid(dev_net(dev_out));
2444 2442
2445 RT_CACHE_STAT_INC(out_slow_tot); 2443 RT_CACHE_STAT_INC(out_slow_tot);
2446 2444
2447 if (flags & RTCF_LOCAL) { 2445 if (flags & RTCF_LOCAL) {
2448 rth->u.dst.input = ip_local_deliver; 2446 rth->dst.input = ip_local_deliver;
2449 rth->rt_spec_dst = fl->fl4_dst; 2447 rth->rt_spec_dst = fl->fl4_dst;
2450 } 2448 }
2451 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2449 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2452 rth->rt_spec_dst = fl->fl4_src; 2450 rth->rt_spec_dst = fl->fl4_src;
2453 if (flags & RTCF_LOCAL && 2451 if (flags & RTCF_LOCAL &&
2454 !(dev_out->flags & IFF_LOOPBACK)) { 2452 !(dev_out->flags & IFF_LOOPBACK)) {
2455 rth->u.dst.output = ip_mc_output; 2453 rth->dst.output = ip_mc_output;
2456 RT_CACHE_STAT_INC(out_slow_mc); 2454 RT_CACHE_STAT_INC(out_slow_mc);
2457 } 2455 }
2458#ifdef CONFIG_IP_MROUTE 2456#ifdef CONFIG_IP_MROUTE
2459 if (res->type == RTN_MULTICAST) { 2457 if (res->type == RTN_MULTICAST) {
2460 if (IN_DEV_MFORWARD(in_dev) && 2458 if (IN_DEV_MFORWARD(in_dev) &&
2461 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2459 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2462 rth->u.dst.input = ip_mr_input; 2460 rth->dst.input = ip_mr_input;
2463 rth->u.dst.output = ip_mc_output; 2461 rth->dst.output = ip_mc_output;
2464 } 2462 }
2465 } 2463 }
2466#endif 2464#endif
@@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2715 2713
2716 rcu_read_lock_bh(); 2714 rcu_read_lock_bh();
2717 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2715 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2718 rth = rcu_dereference_bh(rth->u.dst.rt_next)) { 2716 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2719 if (rth->fl.fl4_dst == flp->fl4_dst && 2717 if (rth->fl.fl4_dst == flp->fl4_dst &&
2720 rth->fl.fl4_src == flp->fl4_src && 2718 rth->fl.fl4_src == flp->fl4_src &&
2721 rth->fl.iif == 0 && 2719 rth->fl.iif == 0 &&
@@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2723 rth->fl.mark == flp->mark && 2721 rth->fl.mark == flp->mark &&
2724 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2722 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2725 (IPTOS_RT_MASK | RTO_ONLINK)) && 2723 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2726 net_eq(dev_net(rth->u.dst.dev), net) && 2724 net_eq(dev_net(rth->dst.dev), net) &&
2727 !rt_is_expired(rth)) { 2725 !rt_is_expired(rth)) {
2728 dst_use(&rth->u.dst, jiffies); 2726 dst_use(&rth->dst, jiffies);
2729 RT_CACHE_STAT_INC(out_hit); 2727 RT_CACHE_STAT_INC(out_hit);
2730 rcu_read_unlock_bh(); 2728 rcu_read_unlock_bh();
2731 *rp = rth; 2729 *rp = rth;
@@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2738slow_output: 2736slow_output:
2739 return ip_route_output_slow(net, rp, flp); 2737 return ip_route_output_slow(net, rp, flp);
2740} 2738}
2741
2742EXPORT_SYMBOL_GPL(__ip_route_output_key); 2739EXPORT_SYMBOL_GPL(__ip_route_output_key);
2743 2740
2744static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2741static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2762 dst_alloc(&ipv4_dst_blackhole_ops); 2759 dst_alloc(&ipv4_dst_blackhole_ops);
2763 2760
2764 if (rt) { 2761 if (rt) {
2765 struct dst_entry *new = &rt->u.dst; 2762 struct dst_entry *new = &rt->dst;
2766 2763
2767 atomic_set(&new->__refcnt, 1); 2764 atomic_set(&new->__refcnt, 1);
2768 new->__use = 1; 2765 new->__use = 1;
2769 new->input = dst_discard; 2766 new->input = dst_discard;
2770 new->output = dst_discard; 2767 new->output = dst_discard;
2771 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2768 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
2772 2769
2773 new->dev = ort->u.dst.dev; 2770 new->dev = ort->dst.dev;
2774 if (new->dev) 2771 if (new->dev)
2775 dev_hold(new->dev); 2772 dev_hold(new->dev);
2776 2773
@@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2794 dst_free(new); 2791 dst_free(new);
2795 } 2792 }
2796 2793
2797 dst_release(&(*rp)->u.dst); 2794 dst_release(&(*rp)->dst);
2798 *rp = rt; 2795 *rp = rt;
2799 return (rt ? 0 : -ENOMEM); 2796 return (rt ? 0 : -ENOMEM);
2800} 2797}
@@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2822 2819
2823 return 0; 2820 return 0;
2824} 2821}
2825
2826EXPORT_SYMBOL_GPL(ip_route_output_flow); 2822EXPORT_SYMBOL_GPL(ip_route_output_flow);
2827 2823
2828int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) 2824int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2829{ 2825{
2830 return ip_route_output_flow(net, rp, flp, NULL, 0); 2826 return ip_route_output_flow(net, rp, flp, NULL, 0);
2831} 2827}
2828EXPORT_SYMBOL(ip_route_output_key);
2832 2829
2833static int rt_fill_info(struct net *net, 2830static int rt_fill_info(struct net *net,
2834 struct sk_buff *skb, u32 pid, u32 seq, int event, 2831 struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net,
2864 r->rtm_src_len = 32; 2861 r->rtm_src_len = 32;
2865 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2862 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2866 } 2863 }
2867 if (rt->u.dst.dev) 2864 if (rt->dst.dev)
2868 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); 2865 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2869#ifdef CONFIG_NET_CLS_ROUTE 2866#ifdef CONFIG_NET_CLS_ROUTE
2870 if (rt->u.dst.tclassid) 2867 if (rt->dst.tclassid)
2871 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2868 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2872#endif 2869#endif
2873 if (rt->fl.iif) 2870 if (rt->fl.iif)
2874 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2871 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2878,12 +2875,13 @@ static int rt_fill_info(struct net *net,
2878 if (rt->rt_dst != rt->rt_gateway) 2875 if (rt->rt_dst != rt->rt_gateway)
2879 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2876 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2880 2877
2881 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2878 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2882 goto nla_put_failure; 2879 goto nla_put_failure;
2883 2880
2884 error = rt->u.dst.error; 2881 error = rt->dst.error;
2885 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2882 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2886 if (rt->peer) { 2883 if (rt->peer) {
2884 inet_peer_refcheck(rt->peer);
2887 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2885 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2888 if (rt->peer->tcp_ts_stamp) { 2886 if (rt->peer->tcp_ts_stamp) {
2889 ts = rt->peer->tcp_ts; 2887 ts = rt->peer->tcp_ts;
@@ -2914,7 +2912,7 @@ static int rt_fill_info(struct net *net,
2914 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2912 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2915 } 2913 }
2916 2914
2917 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, 2915 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2918 expires, error) < 0) 2916 expires, error) < 0)
2919 goto nla_put_failure; 2917 goto nla_put_failure;
2920 2918
@@ -2979,8 +2977,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2979 local_bh_enable(); 2977 local_bh_enable();
2980 2978
2981 rt = skb_rtable(skb); 2979 rt = skb_rtable(skb);
2982 if (err == 0 && rt->u.dst.error) 2980 if (err == 0 && rt->dst.error)
2983 err = -rt->u.dst.error; 2981 err = -rt->dst.error;
2984 } else { 2982 } else {
2985 struct flowi fl = { 2983 struct flowi fl = {
2986 .nl_u = { 2984 .nl_u = {
@@ -2998,7 +2996,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2998 if (err) 2996 if (err)
2999 goto errout_free; 2997 goto errout_free;
3000 2998
3001 skb_dst_set(skb, &rt->u.dst); 2999 skb_dst_set(skb, &rt->dst);
3002 if (rtm->rtm_flags & RTM_F_NOTIFY) 3000 if (rtm->rtm_flags & RTM_F_NOTIFY)
3003 rt->rt_flags |= RTCF_NOTIFY; 3001 rt->rt_flags |= RTCF_NOTIFY;
3004 3002
@@ -3034,12 +3032,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3034 continue; 3032 continue;
3035 rcu_read_lock_bh(); 3033 rcu_read_lock_bh();
3036 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; 3034 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3037 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { 3035 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3038 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3036 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
3039 continue; 3037 continue;
3040 if (rt_is_expired(rt)) 3038 if (rt_is_expired(rt))
3041 continue; 3039 continue;
3042 skb_dst_set_noref(skb, &rt->u.dst); 3040 skb_dst_set_noref(skb, &rt->dst);
3043 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3041 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
3044 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3042 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
3045 1, NLM_F_MULTI) <= 0) { 3043 1, NLM_F_MULTI) <= 0) {
@@ -3365,6 +3363,3 @@ void __init ip_static_sysctl_init(void)
3365 register_sysctl_paths(ipv4_path, ipv4_skeleton); 3363 register_sysctl_paths(ipv4_path, ipv4_skeleton);
3366} 3364}
3367#endif 3365#endif
3368
3369EXPORT_SYMBOL(__ip_select_ident);
3370EXPORT_SYMBOL(ip_route_output_key);