aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c504
1 files changed, 251 insertions, 253 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..03430de46166 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly;
253static unsigned int rt_hash_log __read_mostly; 253static unsigned int rt_hash_log __read_mostly;
254 254
255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
256#define RT_CACHE_STAT_INC(field) \ 256#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
257 (__raw_get_cpu_var(rt_cache_stat).field++)
258 257
259static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
260 int genid) 259 int genid)
@@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
287 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
288 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
289 while (r) { 288 while (r) {
290 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 289 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
291 r->rt_genid == st->genid) 290 r->rt_genid == st->genid)
292 return r; 291 return r;
293 r = rcu_dereference_bh(r->u.dst.rt_next); 292 r = rcu_dereference_bh(r->dst.rt_next);
294 } 293 }
295 rcu_read_unlock_bh(); 294 rcu_read_unlock_bh();
296 } 295 }
@@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
302{ 301{
303 struct rt_cache_iter_state *st = seq->private; 302 struct rt_cache_iter_state *st = seq->private;
304 303
305 r = r->u.dst.rt_next; 304 r = r->dst.rt_next;
306 while (!r) { 305 while (!r) {
307 rcu_read_unlock_bh(); 306 rcu_read_unlock_bh();
308 do { 307 do {
@@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
320{ 319{
321 struct rt_cache_iter_state *st = seq->private; 320 struct rt_cache_iter_state *st = seq->private;
322 while ((r = __rt_cache_get_next(seq, r)) != NULL) { 321 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
323 if (dev_net(r->u.dst.dev) != seq_file_net(seq)) 322 if (dev_net(r->dst.dev) != seq_file_net(seq))
324 continue; 323 continue;
325 if (r->rt_genid == st->genid) 324 if (r->rt_genid == st->genid)
326 break; 325 break;
@@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 377
379 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 378 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
380 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 379 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
381 r->u.dst.dev ? r->u.dst.dev->name : "*", 380 r->dst.dev ? r->dst.dev->name : "*",
382 (__force u32)r->rt_dst, 381 (__force u32)r->rt_dst,
383 (__force u32)r->rt_gateway, 382 (__force u32)r->rt_gateway,
384 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 383 r->rt_flags, atomic_read(&r->dst.__refcnt),
385 r->u.dst.__use, 0, (__force u32)r->rt_src, 384 r->dst.__use, 0, (__force u32)r->rt_src,
386 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 385 (dst_metric(&r->dst, RTAX_ADVMSS) ?
387 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 386 (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
388 dst_metric(&r->u.dst, RTAX_WINDOW), 387 dst_metric(&r->dst, RTAX_WINDOW),
389 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
390 dst_metric(&r->u.dst, RTAX_RTTVAR)), 389 dst_metric(&r->dst, RTAX_RTTVAR)),
391 r->fl.fl4_tos, 390 r->fl.fl4_tos,
392 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 391 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
393 r->u.dst.hh ? (r->u.dst.hh->hh_output == 392 r->dst.hh ? (r->dst.hh->hh_output ==
394 dev_queue_xmit) : 0, 393 dev_queue_xmit) : 0,
395 r->rt_spec_dst, &len); 394 r->rt_spec_dst, &len);
396 395
@@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void)
609 608
610static inline void rt_free(struct rtable *rt) 609static inline void rt_free(struct rtable *rt)
611{ 610{
612 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 611 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
613} 612}
614 613
615static inline void rt_drop(struct rtable *rt) 614static inline void rt_drop(struct rtable *rt)
616{ 615{
617 ip_rt_put(rt); 616 ip_rt_put(rt);
618 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 617 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
619} 618}
620 619
621static inline int rt_fast_clean(struct rtable *rth) 620static inline int rt_fast_clean(struct rtable *rth)
@@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
623 /* Kill broadcast/multicast entries very aggresively, if they 622 /* Kill broadcast/multicast entries very aggresively, if they
624 collide in hash table with more useful entries */ 623 collide in hash table with more useful entries */
625 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 624 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
626 rth->fl.iif && rth->u.dst.rt_next; 625 rth->fl.iif && rth->dst.rt_next;
627} 626}
628 627
629static inline int rt_valuable(struct rtable *rth) 628static inline int rt_valuable(struct rtable *rth)
630{ 629{
631 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 630 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
632 rth->u.dst.expires; 631 rth->dst.expires;
633} 632}
634 633
635static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 634static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
637 unsigned long age; 636 unsigned long age;
638 int ret = 0; 637 int ret = 0;
639 638
640 if (atomic_read(&rth->u.dst.__refcnt)) 639 if (atomic_read(&rth->dst.__refcnt))
641 goto out; 640 goto out;
642 641
643 ret = 1; 642 ret = 1;
644 if (rth->u.dst.expires && 643 if (rth->dst.expires &&
645 time_after_eq(jiffies, rth->u.dst.expires)) 644 time_after_eq(jiffies, rth->dst.expires))
646 goto out; 645 goto out;
647 646
648 age = jiffies - rth->u.dst.lastuse; 647 age = jiffies - rth->dst.lastuse;
649 ret = 0; 648 ret = 0;
650 if ((age <= tmo1 && !rt_fast_clean(rth)) || 649 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
651 (age <= tmo2 && rt_valuable(rth))) 650 (age <= tmo2 && rt_valuable(rth)))
@@ -661,7 +660,7 @@ out: return ret;
661 */ 660 */
662static inline u32 rt_score(struct rtable *rt) 661static inline u32 rt_score(struct rtable *rt)
663{ 662{
664 u32 score = jiffies - rt->u.dst.lastuse; 663 u32 score = jiffies - rt->dst.lastuse;
665 664
666 score = ~score & ~(3<<30); 665 score = ~score & ~(3<<30);
667 666
@@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
701 700
702static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 701static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
703{ 702{
704 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); 703 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
705} 704}
706 705
707static inline int rt_is_expired(struct rtable *rth) 706static inline int rt_is_expired(struct rtable *rth)
708{ 707{
709 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); 708 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
710} 709}
711 710
712/* 711/*
@@ -735,7 +734,7 @@ static void rt_do_flush(int process_context)
735 rth = rt_hash_table[i].chain; 734 rth = rt_hash_table[i].chain;
736 735
737 /* defer releasing the head of the list after spin_unlock */ 736 /* defer releasing the head of the list after spin_unlock */
738 for (tail = rth; tail; tail = tail->u.dst.rt_next) 737 for (tail = rth; tail; tail = tail->dst.rt_next)
739 if (!rt_is_expired(tail)) 738 if (!rt_is_expired(tail))
740 break; 739 break;
741 if (rth != tail) 740 if (rth != tail)
@@ -744,9 +743,9 @@ static void rt_do_flush(int process_context)
744 /* call rt_free on entries after the tail requiring flush */ 743 /* call rt_free on entries after the tail requiring flush */
745 prev = &rt_hash_table[i].chain; 744 prev = &rt_hash_table[i].chain;
746 for (p = *prev; p; p = next) { 745 for (p = *prev; p; p = next) {
747 next = p->u.dst.rt_next; 746 next = p->dst.rt_next;
748 if (!rt_is_expired(p)) { 747 if (!rt_is_expired(p)) {
749 prev = &p->u.dst.rt_next; 748 prev = &p->dst.rt_next;
750 } else { 749 } else {
751 *prev = next; 750 *prev = next;
752 rt_free(p); 751 rt_free(p);
@@ -761,7 +760,7 @@ static void rt_do_flush(int process_context)
761 spin_unlock_bh(rt_hash_lock_addr(i)); 760 spin_unlock_bh(rt_hash_lock_addr(i));
762 761
763 for (; rth != tail; rth = next) { 762 for (; rth != tail; rth = next) {
764 next = rth->u.dst.rt_next; 763 next = rth->dst.rt_next;
765 rt_free(rth); 764 rt_free(rth);
766 } 765 }
767 } 766 }
@@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
792 while (aux != rth) { 791 while (aux != rth) {
793 if (compare_hash_inputs(&aux->fl, &rth->fl)) 792 if (compare_hash_inputs(&aux->fl, &rth->fl))
794 return 0; 793 return 0;
795 aux = aux->u.dst.rt_next; 794 aux = aux->dst.rt_next;
796 } 795 }
797 return ONE; 796 return ONE;
798} 797}
@@ -832,18 +831,18 @@ static void rt_check_expire(void)
832 length = 0; 831 length = 0;
833 spin_lock_bh(rt_hash_lock_addr(i)); 832 spin_lock_bh(rt_hash_lock_addr(i));
834 while ((rth = *rthp) != NULL) { 833 while ((rth = *rthp) != NULL) {
835 prefetch(rth->u.dst.rt_next); 834 prefetch(rth->dst.rt_next);
836 if (rt_is_expired(rth)) { 835 if (rt_is_expired(rth)) {
837 *rthp = rth->u.dst.rt_next; 836 *rthp = rth->dst.rt_next;
838 rt_free(rth); 837 rt_free(rth);
839 continue; 838 continue;
840 } 839 }
841 if (rth->u.dst.expires) { 840 if (rth->dst.expires) {
842 /* Entry is expired even if it is in use */ 841 /* Entry is expired even if it is in use */
843 if (time_before_eq(jiffies, rth->u.dst.expires)) { 842 if (time_before_eq(jiffies, rth->dst.expires)) {
844nofree: 843nofree:
845 tmo >>= 1; 844 tmo >>= 1;
846 rthp = &rth->u.dst.rt_next; 845 rthp = &rth->dst.rt_next;
847 /* 846 /*
848 * We only count entries on 847 * We only count entries on
849 * a chain with equal hash inputs once 848 * a chain with equal hash inputs once
@@ -859,7 +858,7 @@ nofree:
859 goto nofree; 858 goto nofree;
860 859
861 /* Cleanup aged off entries. */ 860 /* Cleanup aged off entries. */
862 *rthp = rth->u.dst.rt_next; 861 *rthp = rth->dst.rt_next;
863 rt_free(rth); 862 rt_free(rth);
864 } 863 }
865 spin_unlock_bh(rt_hash_lock_addr(i)); 864 spin_unlock_bh(rt_hash_lock_addr(i));
@@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
1000 if (!rt_is_expired(rth) && 999 if (!rt_is_expired(rth) &&
1001 !rt_may_expire(rth, tmo, expire)) { 1000 !rt_may_expire(rth, tmo, expire)) {
1002 tmo >>= 1; 1001 tmo >>= 1;
1003 rthp = &rth->u.dst.rt_next; 1002 rthp = &rth->dst.rt_next;
1004 continue; 1003 continue;
1005 } 1004 }
1006 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->dst.rt_next;
1007 rt_free(rth); 1006 rt_free(rth);
1008 goal--; 1007 goal--;
1009 } 1008 }
@@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
1069 1068
1070 while (rth) { 1069 while (rth) {
1071 length += has_noalias(head, rth); 1070 length += has_noalias(head, rth);
1072 rth = rth->u.dst.rt_next; 1071 rth = rth->dst.rt_next;
1073 } 1072 }
1074 return length >> FRACT_BITS; 1073 return length >> FRACT_BITS;
1075} 1074}
@@ -1091,7 +1090,7 @@ restart:
1091 candp = NULL; 1090 candp = NULL;
1092 now = jiffies; 1091 now = jiffies;
1093 1092
1094 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1093 if (!rt_caching(dev_net(rt->dst.dev))) {
1095 /* 1094 /*
1096 * If we're not caching, just tell the caller we 1095 * If we're not caching, just tell the caller we
1097 * were successful and don't touch the route. The 1096 * were successful and don't touch the route. The
@@ -1109,7 +1108,7 @@ restart:
1109 */ 1108 */
1110 1109
1111 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1112 int err = arp_bind_neighbour(&rt->u.dst); 1111 int err = arp_bind_neighbour(&rt->dst);
1113 if (err) { 1112 if (err) {
1114 if (net_ratelimit()) 1113 if (net_ratelimit())
1115 printk(KERN_WARNING 1114 printk(KERN_WARNING
@@ -1128,19 +1127,19 @@ restart:
1128 spin_lock_bh(rt_hash_lock_addr(hash)); 1127 spin_lock_bh(rt_hash_lock_addr(hash));
1129 while ((rth = *rthp) != NULL) { 1128 while ((rth = *rthp) != NULL) {
1130 if (rt_is_expired(rth)) { 1129 if (rt_is_expired(rth)) {
1131 *rthp = rth->u.dst.rt_next; 1130 *rthp = rth->dst.rt_next;
1132 rt_free(rth); 1131 rt_free(rth);
1133 continue; 1132 continue;
1134 } 1133 }
1135 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1134 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
1136 /* Put it first */ 1135 /* Put it first */
1137 *rthp = rth->u.dst.rt_next; 1136 *rthp = rth->dst.rt_next;
1138 /* 1137 /*
1139 * Since lookup is lockfree, the deletion 1138 * Since lookup is lockfree, the deletion
1140 * must be visible to another weakly ordered CPU before 1139 * must be visible to another weakly ordered CPU before
1141 * the insertion at the start of the hash chain. 1140 * the insertion at the start of the hash chain.
1142 */ 1141 */
1143 rcu_assign_pointer(rth->u.dst.rt_next, 1142 rcu_assign_pointer(rth->dst.rt_next,
1144 rt_hash_table[hash].chain); 1143 rt_hash_table[hash].chain);
1145 /* 1144 /*
1146 * Since lookup is lockfree, the update writes 1145 * Since lookup is lockfree, the update writes
@@ -1148,18 +1147,18 @@ restart:
1148 */ 1147 */
1149 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 1148 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1150 1149
1151 dst_use(&rth->u.dst, now); 1150 dst_use(&rth->dst, now);
1152 spin_unlock_bh(rt_hash_lock_addr(hash)); 1151 spin_unlock_bh(rt_hash_lock_addr(hash));
1153 1152
1154 rt_drop(rt); 1153 rt_drop(rt);
1155 if (rp) 1154 if (rp)
1156 *rp = rth; 1155 *rp = rth;
1157 else 1156 else
1158 skb_dst_set(skb, &rth->u.dst); 1157 skb_dst_set(skb, &rth->dst);
1159 return 0; 1158 return 0;
1160 } 1159 }
1161 1160
1162 if (!atomic_read(&rth->u.dst.__refcnt)) { 1161 if (!atomic_read(&rth->dst.__refcnt)) {
1163 u32 score = rt_score(rth); 1162 u32 score = rt_score(rth);
1164 1163
1165 if (score <= min_score) { 1164 if (score <= min_score) {
@@ -1171,7 +1170,7 @@ restart:
1171 1170
1172 chain_length++; 1171 chain_length++;
1173 1172
1174 rthp = &rth->u.dst.rt_next; 1173 rthp = &rth->dst.rt_next;
1175 } 1174 }
1176 1175
1177 if (cand) { 1176 if (cand) {
@@ -1182,17 +1181,17 @@ restart:
1182 * only 2 entries per bucket. We will see. 1181 * only 2 entries per bucket. We will see.
1183 */ 1182 */
1184 if (chain_length > ip_rt_gc_elasticity) { 1183 if (chain_length > ip_rt_gc_elasticity) {
1185 *candp = cand->u.dst.rt_next; 1184 *candp = cand->dst.rt_next;
1186 rt_free(cand); 1185 rt_free(cand);
1187 } 1186 }
1188 } else { 1187 } else {
1189 if (chain_length > rt_chain_length_max && 1188 if (chain_length > rt_chain_length_max &&
1190 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1189 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1191 struct net *net = dev_net(rt->u.dst.dev); 1190 struct net *net = dev_net(rt->dst.dev);
1192 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1191 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1193 if (!rt_caching(net)) { 1192 if (!rt_caching(net)) {
1194 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1193 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1195 rt->u.dst.dev->name, num); 1194 rt->dst.dev->name, num);
1196 } 1195 }
1197 rt_emergency_hash_rebuild(net); 1196 rt_emergency_hash_rebuild(net);
1198 spin_unlock_bh(rt_hash_lock_addr(hash)); 1197 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1207,7 +1206,7 @@ restart:
1207 route or unicast forwarding path. 1206 route or unicast forwarding path.
1208 */ 1207 */
1209 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1208 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1210 int err = arp_bind_neighbour(&rt->u.dst); 1209 int err = arp_bind_neighbour(&rt->dst);
1211 if (err) { 1210 if (err) {
1212 spin_unlock_bh(rt_hash_lock_addr(hash)); 1211 spin_unlock_bh(rt_hash_lock_addr(hash));
1213 1212
@@ -1238,14 +1237,14 @@ restart:
1238 } 1237 }
1239 } 1238 }
1240 1239
1241 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1240 rt->dst.rt_next = rt_hash_table[hash].chain;
1242 1241
1243#if RT_CACHE_DEBUG >= 2 1242#if RT_CACHE_DEBUG >= 2
1244 if (rt->u.dst.rt_next) { 1243 if (rt->dst.rt_next) {
1245 struct rtable *trt; 1244 struct rtable *trt;
1246 printk(KERN_DEBUG "rt_cache @%02x: %pI4", 1245 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1247 hash, &rt->rt_dst); 1246 hash, &rt->rt_dst);
1248 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1247 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1249 printk(" . %pI4", &trt->rt_dst); 1248 printk(" . %pI4", &trt->rt_dst);
1250 printk("\n"); 1249 printk("\n");
1251 } 1250 }
@@ -1263,7 +1262,7 @@ skip_hashing:
1263 if (rp) 1262 if (rp)
1264 *rp = rt; 1263 *rp = rt;
1265 else 1264 else
1266 skb_dst_set(skb, &rt->u.dst); 1265 skb_dst_set(skb, &rt->dst);
1267 return 0; 1266 return 0;
1268} 1267}
1269 1268
@@ -1335,20 +1334,21 @@ static void rt_del(unsigned hash, struct rtable *rt)
1335 ip_rt_put(rt); 1334 ip_rt_put(rt);
1336 while ((aux = *rthp) != NULL) { 1335 while ((aux = *rthp) != NULL) {
1337 if (aux == rt || rt_is_expired(aux)) { 1336 if (aux == rt || rt_is_expired(aux)) {
1338 *rthp = aux->u.dst.rt_next; 1337 *rthp = aux->dst.rt_next;
1339 rt_free(aux); 1338 rt_free(aux);
1340 continue; 1339 continue;
1341 } 1340 }
1342 rthp = &aux->u.dst.rt_next; 1341 rthp = &aux->dst.rt_next;
1343 } 1342 }
1344 spin_unlock_bh(rt_hash_lock_addr(hash)); 1343 spin_unlock_bh(rt_hash_lock_addr(hash));
1345} 1344}
1346 1345
1346/* called in rcu_read_lock() section */
1347void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1347void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1348 __be32 saddr, struct net_device *dev) 1348 __be32 saddr, struct net_device *dev)
1349{ 1349{
1350 int i, k; 1350 int i, k;
1351 struct in_device *in_dev = in_dev_get(dev); 1351 struct in_device *in_dev = __in_dev_get_rcu(dev);
1352 struct rtable *rth, **rthp; 1352 struct rtable *rth, **rthp;
1353 __be32 skeys[2] = { saddr, 0 }; 1353 __be32 skeys[2] = { saddr, 0 };
1354 int ikeys[2] = { dev->ifindex, 0 }; 1354 int ikeys[2] = { dev->ifindex, 0 };
@@ -1384,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1384 1384
1385 rthp=&rt_hash_table[hash].chain; 1385 rthp=&rt_hash_table[hash].chain;
1386 1386
1387 rcu_read_lock();
1388 while ((rth = rcu_dereference(*rthp)) != NULL) { 1387 while ((rth = rcu_dereference(*rthp)) != NULL) {
1389 struct rtable *rt; 1388 struct rtable *rt;
1390 1389
@@ -1393,44 +1392,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1393 rth->fl.oif != ikeys[k] || 1392 rth->fl.oif != ikeys[k] ||
1394 rth->fl.iif != 0 || 1393 rth->fl.iif != 0 ||
1395 rt_is_expired(rth) || 1394 rt_is_expired(rth) ||
1396 !net_eq(dev_net(rth->u.dst.dev), net)) { 1395 !net_eq(dev_net(rth->dst.dev), net)) {
1397 rthp = &rth->u.dst.rt_next; 1396 rthp = &rth->dst.rt_next;
1398 continue; 1397 continue;
1399 } 1398 }
1400 1399
1401 if (rth->rt_dst != daddr || 1400 if (rth->rt_dst != daddr ||
1402 rth->rt_src != saddr || 1401 rth->rt_src != saddr ||
1403 rth->u.dst.error || 1402 rth->dst.error ||
1404 rth->rt_gateway != old_gw || 1403 rth->rt_gateway != old_gw ||
1405 rth->u.dst.dev != dev) 1404 rth->dst.dev != dev)
1406 break; 1405 break;
1407 1406
1408 dst_hold(&rth->u.dst); 1407 dst_hold(&rth->dst);
1409 rcu_read_unlock();
1410 1408
1411 rt = dst_alloc(&ipv4_dst_ops); 1409 rt = dst_alloc(&ipv4_dst_ops);
1412 if (rt == NULL) { 1410 if (rt == NULL) {
1413 ip_rt_put(rth); 1411 ip_rt_put(rth);
1414 in_dev_put(in_dev);
1415 return; 1412 return;
1416 } 1413 }
1417 1414
1418 /* Copy all the information. */ 1415 /* Copy all the information. */
1419 *rt = *rth; 1416 *rt = *rth;
1420 rt->u.dst.__use = 1; 1417 rt->dst.__use = 1;
1421 atomic_set(&rt->u.dst.__refcnt, 1); 1418 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->u.dst.child = NULL; 1419 rt->dst.child = NULL;
1423 if (rt->u.dst.dev) 1420 if (rt->dst.dev)
1424 dev_hold(rt->u.dst.dev); 1421 dev_hold(rt->dst.dev);
1425 if (rt->idev) 1422 if (rt->idev)
1426 in_dev_hold(rt->idev); 1423 in_dev_hold(rt->idev);
1427 rt->u.dst.obsolete = -1; 1424 rt->dst.obsolete = -1;
1428 rt->u.dst.lastuse = jiffies; 1425 rt->dst.lastuse = jiffies;
1429 rt->u.dst.path = &rt->u.dst; 1426 rt->dst.path = &rt->dst;
1430 rt->u.dst.neighbour = NULL; 1427 rt->dst.neighbour = NULL;
1431 rt->u.dst.hh = NULL; 1428 rt->dst.hh = NULL;
1432#ifdef CONFIG_XFRM 1429#ifdef CONFIG_XFRM
1433 rt->u.dst.xfrm = NULL; 1430 rt->dst.xfrm = NULL;
1434#endif 1431#endif
1435 rt->rt_genid = rt_genid(net); 1432 rt->rt_genid = rt_genid(net);
1436 rt->rt_flags |= RTCF_REDIRECTED; 1433 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1439,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1439 rt->rt_gateway = new_gw; 1436 rt->rt_gateway = new_gw;
1440 1437
1441 /* Redirect received -> path was valid */ 1438 /* Redirect received -> path was valid */
1442 dst_confirm(&rth->u.dst); 1439 dst_confirm(&rth->dst);
1443 1440
1444 if (rt->peer) 1441 if (rt->peer)
1445 atomic_inc(&rt->peer->refcnt); 1442 atomic_inc(&rt->peer->refcnt);
1446 1443
1447 if (arp_bind_neighbour(&rt->u.dst) || 1444 if (arp_bind_neighbour(&rt->dst) ||
1448 !(rt->u.dst.neighbour->nud_state & 1445 !(rt->dst.neighbour->nud_state &
1449 NUD_VALID)) { 1446 NUD_VALID)) {
1450 if (rt->u.dst.neighbour) 1447 if (rt->dst.neighbour)
1451 neigh_event_send(rt->u.dst.neighbour, NULL); 1448 neigh_event_send(rt->dst.neighbour, NULL);
1452 ip_rt_put(rth); 1449 ip_rt_put(rth);
1453 rt_drop(rt); 1450 rt_drop(rt);
1454 goto do_next; 1451 goto do_next;
1455 } 1452 }
1456 1453
1457 netevent.old = &rth->u.dst; 1454 netevent.old = &rth->dst;
1458 netevent.new = &rt->u.dst; 1455 netevent.new = &rt->dst;
1459 call_netevent_notifiers(NETEVENT_REDIRECT, 1456 call_netevent_notifiers(NETEVENT_REDIRECT,
1460 &netevent); 1457 &netevent);
1461 1458
@@ -1464,12 +1461,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1464 ip_rt_put(rt); 1461 ip_rt_put(rt);
1465 goto do_next; 1462 goto do_next;
1466 } 1463 }
1467 rcu_read_unlock();
1468 do_next: 1464 do_next:
1469 ; 1465 ;
1470 } 1466 }
1471 } 1467 }
1472 in_dev_put(in_dev);
1473 return; 1468 return;
1474 1469
1475reject_redirect: 1470reject_redirect:
@@ -1480,7 +1475,7 @@ reject_redirect:
1480 &old_gw, dev->name, &new_gw, 1475 &old_gw, dev->name, &new_gw,
1481 &saddr, &daddr); 1476 &saddr, &daddr);
1482#endif 1477#endif
1483 in_dev_put(in_dev); 1478 ;
1484} 1479}
1485 1480
1486static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1481static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -1493,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1493 ip_rt_put(rt); 1488 ip_rt_put(rt);
1494 ret = NULL; 1489 ret = NULL;
1495 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1490 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1496 (rt->u.dst.expires && 1491 (rt->dst.expires &&
1497 time_after_eq(jiffies, rt->u.dst.expires))) { 1492 time_after_eq(jiffies, rt->dst.expires))) {
1498 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1493 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1499 rt->fl.oif, 1494 rt->fl.oif,
1500 rt_genid(dev_net(dst->dev))); 1495 rt_genid(dev_net(dst->dev)));
@@ -1532,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1532 int log_martians; 1527 int log_martians;
1533 1528
1534 rcu_read_lock(); 1529 rcu_read_lock();
1535 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 1530 in_dev = __in_dev_get_rcu(rt->dst.dev);
1536 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 1531 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1537 rcu_read_unlock(); 1532 rcu_read_unlock();
1538 return; 1533 return;
@@ -1543,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1543 /* No redirected packets during ip_rt_redirect_silence; 1538 /* No redirected packets during ip_rt_redirect_silence;
1544 * reset the algorithm. 1539 * reset the algorithm.
1545 */ 1540 */
1546 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1541 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1547 rt->u.dst.rate_tokens = 0; 1542 rt->dst.rate_tokens = 0;
1548 1543
1549 /* Too many ignored redirects; do not send anything 1544 /* Too many ignored redirects; do not send anything
1550 * set u.dst.rate_last to the last seen redirected packet. 1545 * set dst.rate_last to the last seen redirected packet.
1551 */ 1546 */
1552 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1547 if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1553 rt->u.dst.rate_last = jiffies; 1548 rt->dst.rate_last = jiffies;
1554 return; 1549 return;
1555 } 1550 }
1556 1551
1557 /* Check for load limit; set rate_last to the latest sent 1552 /* Check for load limit; set rate_last to the latest sent
1558 * redirect. 1553 * redirect.
1559 */ 1554 */
1560 if (rt->u.dst.rate_tokens == 0 || 1555 if (rt->dst.rate_tokens == 0 ||
1561 time_after(jiffies, 1556 time_after(jiffies,
1562 (rt->u.dst.rate_last + 1557 (rt->dst.rate_last +
1563 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1558 (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1564 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1559 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1565 rt->u.dst.rate_last = jiffies; 1560 rt->dst.rate_last = jiffies;
1566 ++rt->u.dst.rate_tokens; 1561 ++rt->dst.rate_tokens;
1567#ifdef CONFIG_IP_ROUTE_VERBOSE 1562#ifdef CONFIG_IP_ROUTE_VERBOSE
1568 if (log_martians && 1563 if (log_martians &&
1569 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1564 rt->dst.rate_tokens == ip_rt_redirect_number &&
1570 net_ratelimit()) 1565 net_ratelimit())
1571 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1566 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1572 &rt->rt_src, rt->rt_iif, 1567 &rt->rt_src, rt->rt_iif,
@@ -1581,7 +1576,7 @@ static int ip_error(struct sk_buff *skb)
1581 unsigned long now; 1576 unsigned long now;
1582 int code; 1577 int code;
1583 1578
1584 switch (rt->u.dst.error) { 1579 switch (rt->dst.error) {
1585 case EINVAL: 1580 case EINVAL:
1586 default: 1581 default:
1587 goto out; 1582 goto out;
@@ -1590,7 +1585,7 @@ static int ip_error(struct sk_buff *skb)
1590 break; 1585 break;
1591 case ENETUNREACH: 1586 case ENETUNREACH:
1592 code = ICMP_NET_UNREACH; 1587 code = ICMP_NET_UNREACH;
1593 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), 1588 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1594 IPSTATS_MIB_INNOROUTES); 1589 IPSTATS_MIB_INNOROUTES);
1595 break; 1590 break;
1596 case EACCES: 1591 case EACCES:
@@ -1599,12 +1594,12 @@ static int ip_error(struct sk_buff *skb)
1599 } 1594 }
1600 1595
1601 now = jiffies; 1596 now = jiffies;
1602 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1597 rt->dst.rate_tokens += now - rt->dst.rate_last;
1603 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1598 if (rt->dst.rate_tokens > ip_rt_error_burst)
1604 rt->u.dst.rate_tokens = ip_rt_error_burst; 1599 rt->dst.rate_tokens = ip_rt_error_burst;
1605 rt->u.dst.rate_last = now; 1600 rt->dst.rate_last = now;
1606 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1601 if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1607 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1602 rt->dst.rate_tokens -= ip_rt_error_cost;
1608 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1603 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1609 } 1604 }
1610 1605
@@ -1649,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1649 1644
1650 rcu_read_lock(); 1645 rcu_read_lock();
1651 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1646 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1652 rth = rcu_dereference(rth->u.dst.rt_next)) { 1647 rth = rcu_dereference(rth->dst.rt_next)) {
1653 unsigned short mtu = new_mtu; 1648 unsigned short mtu = new_mtu;
1654 1649
1655 if (rth->fl.fl4_dst != daddr || 1650 if (rth->fl.fl4_dst != daddr ||
@@ -1658,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1658 rth->rt_src != iph->saddr || 1653 rth->rt_src != iph->saddr ||
1659 rth->fl.oif != ikeys[k] || 1654 rth->fl.oif != ikeys[k] ||
1660 rth->fl.iif != 0 || 1655 rth->fl.iif != 0 ||
1661 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1656 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1662 !net_eq(dev_net(rth->u.dst.dev), net) || 1657 !net_eq(dev_net(rth->dst.dev), net) ||
1663 rt_is_expired(rth)) 1658 rt_is_expired(rth))
1664 continue; 1659 continue;
1665 1660
@@ -1667,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1667 1662
1668 /* BSD 4.2 compatibility hack :-( */ 1663 /* BSD 4.2 compatibility hack :-( */
1669 if (mtu == 0 && 1664 if (mtu == 0 &&
1670 old_mtu >= dst_mtu(&rth->u.dst) && 1665 old_mtu >= dst_mtu(&rth->dst) &&
1671 old_mtu >= 68 + (iph->ihl << 2)) 1666 old_mtu >= 68 + (iph->ihl << 2))
1672 old_mtu -= iph->ihl << 2; 1667 old_mtu -= iph->ihl << 2;
1673 1668
1674 mtu = guess_mtu(old_mtu); 1669 mtu = guess_mtu(old_mtu);
1675 } 1670 }
1676 if (mtu <= dst_mtu(&rth->u.dst)) { 1671 if (mtu <= dst_mtu(&rth->dst)) {
1677 if (mtu < dst_mtu(&rth->u.dst)) { 1672 if (mtu < dst_mtu(&rth->dst)) {
1678 dst_confirm(&rth->u.dst); 1673 dst_confirm(&rth->dst);
1679 if (mtu < ip_rt_min_pmtu) { 1674 if (mtu < ip_rt_min_pmtu) {
1680 mtu = ip_rt_min_pmtu; 1675 mtu = ip_rt_min_pmtu;
1681 rth->u.dst.metrics[RTAX_LOCK-1] |= 1676 rth->dst.metrics[RTAX_LOCK-1] |=
1682 (1 << RTAX_MTU); 1677 (1 << RTAX_MTU);
1683 } 1678 }
1684 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1679 rth->dst.metrics[RTAX_MTU-1] = mtu;
1685 dst_set_expires(&rth->u.dst, 1680 dst_set_expires(&rth->dst,
1686 ip_rt_mtu_expires); 1681 ip_rt_mtu_expires);
1687 } 1682 }
1688 est_mtu = mtu; 1683 est_mtu = mtu;
@@ -1755,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1755 1750
1756 rt = skb_rtable(skb); 1751 rt = skb_rtable(skb);
1757 if (rt) 1752 if (rt)
1758 dst_set_expires(&rt->u.dst, 0); 1753 dst_set_expires(&rt->dst, 0);
1759} 1754}
1760 1755
1761static int ip_rt_bug(struct sk_buff *skb) 1756static int ip_rt_bug(struct sk_buff *skb)
@@ -1783,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1783 1778
1784 if (rt->fl.iif == 0) 1779 if (rt->fl.iif == 0)
1785 src = rt->rt_src; 1780 src = rt->rt_src;
1786 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { 1781 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
1787 src = FIB_RES_PREFSRC(res); 1782 src = FIB_RES_PREFSRC(res);
1788 fib_res_put(&res); 1783 fib_res_put(&res);
1789 } else 1784 } else
1790 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1785 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1791 RT_SCOPE_UNIVERSE); 1786 RT_SCOPE_UNIVERSE);
1792 memcpy(addr, &src, 4); 1787 memcpy(addr, &src, 4);
1793} 1788}
@@ -1795,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1795#ifdef CONFIG_NET_CLS_ROUTE 1790#ifdef CONFIG_NET_CLS_ROUTE
1796static void set_class_tag(struct rtable *rt, u32 tag) 1791static void set_class_tag(struct rtable *rt, u32 tag)
1797{ 1792{
1798 if (!(rt->u.dst.tclassid & 0xFFFF)) 1793 if (!(rt->dst.tclassid & 0xFFFF))
1799 rt->u.dst.tclassid |= tag & 0xFFFF; 1794 rt->dst.tclassid |= tag & 0xFFFF;
1800 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1795 if (!(rt->dst.tclassid & 0xFFFF0000))
1801 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1796 rt->dst.tclassid |= tag & 0xFFFF0000;
1802} 1797}
1803#endif 1798#endif
1804 1799
@@ -1810,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1810 if (FIB_RES_GW(*res) && 1805 if (FIB_RES_GW(*res) &&
1811 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1806 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1812 rt->rt_gateway = FIB_RES_GW(*res); 1807 rt->rt_gateway = FIB_RES_GW(*res);
1813 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1808 memcpy(rt->dst.metrics, fi->fib_metrics,
1814 sizeof(rt->u.dst.metrics)); 1809 sizeof(rt->dst.metrics));
1815 if (fi->fib_mtu == 0) { 1810 if (fi->fib_mtu == 0) {
1816 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1811 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
1817 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1812 if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
1818 rt->rt_gateway != rt->rt_dst && 1813 rt->rt_gateway != rt->rt_dst &&
1819 rt->u.dst.dev->mtu > 576) 1814 rt->dst.dev->mtu > 576)
1820 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1815 rt->dst.metrics[RTAX_MTU-1] = 576;
1821 } 1816 }
1822#ifdef CONFIG_NET_CLS_ROUTE 1817#ifdef CONFIG_NET_CLS_ROUTE
1823 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1818 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1824#endif 1819#endif
1825 } else 1820 } else
1826 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1821 rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
1827 1822
1828 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1823 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1829 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1824 rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1830 if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) 1825 if (dst_mtu(&rt->dst) > IP_MAX_MTU)
1831 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1826 rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1832 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) 1827 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
1833 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1828 rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
1834 ip_rt_min_advmss); 1829 ip_rt_min_advmss);
1835 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) 1830 if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
1836 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1831 rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1837 1832
1838#ifdef CONFIG_NET_CLS_ROUTE 1833#ifdef CONFIG_NET_CLS_ROUTE
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1834#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1844,14 +1839,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1844 rt->rt_type = res->type; 1839 rt->rt_type = res->type;
1845} 1840}
1846 1841
1842/* called in rcu_read_lock() section */
1847static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1843static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1848 u8 tos, struct net_device *dev, int our) 1844 u8 tos, struct net_device *dev, int our)
1849{ 1845{
1850 unsigned hash; 1846 unsigned int hash;
1851 struct rtable *rth; 1847 struct rtable *rth;
1852 __be32 spec_dst; 1848 __be32 spec_dst;
1853 struct in_device *in_dev = in_dev_get(dev); 1849 struct in_device *in_dev = __in_dev_get_rcu(dev);
1854 u32 itag = 0; 1850 u32 itag = 0;
1851 int err;
1855 1852
1856 /* Primary sanity checks. */ 1853 /* Primary sanity checks. */
1857 1854
@@ -1866,21 +1863,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1866 if (!ipv4_is_local_multicast(daddr)) 1863 if (!ipv4_is_local_multicast(daddr))
1867 goto e_inval; 1864 goto e_inval;
1868 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1865 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1869 } else if (fib_validate_source(saddr, 0, tos, 0, 1866 } else {
1870 dev, &spec_dst, &itag, 0) < 0) 1867 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1871 goto e_inval; 1868 &itag, 0);
1872 1869 if (err < 0)
1870 goto e_err;
1871 }
1873 rth = dst_alloc(&ipv4_dst_ops); 1872 rth = dst_alloc(&ipv4_dst_ops);
1874 if (!rth) 1873 if (!rth)
1875 goto e_nobufs; 1874 goto e_nobufs;
1876 1875
1877 rth->u.dst.output = ip_rt_bug; 1876 rth->dst.output = ip_rt_bug;
1878 rth->u.dst.obsolete = -1; 1877 rth->dst.obsolete = -1;
1879 1878
1880 atomic_set(&rth->u.dst.__refcnt, 1); 1879 atomic_set(&rth->dst.__refcnt, 1);
1881 rth->u.dst.flags= DST_HOST; 1880 rth->dst.flags= DST_HOST;
1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1881 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1883 rth->u.dst.flags |= DST_NOPOLICY; 1882 rth->dst.flags |= DST_NOPOLICY;
1884 rth->fl.fl4_dst = daddr; 1883 rth->fl.fl4_dst = daddr;
1885 rth->rt_dst = daddr; 1884 rth->rt_dst = daddr;
1886 rth->fl.fl4_tos = tos; 1885 rth->fl.fl4_tos = tos;
@@ -1888,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1888 rth->fl.fl4_src = saddr; 1887 rth->fl.fl4_src = saddr;
1889 rth->rt_src = saddr; 1888 rth->rt_src = saddr;
1890#ifdef CONFIG_NET_CLS_ROUTE 1889#ifdef CONFIG_NET_CLS_ROUTE
1891 rth->u.dst.tclassid = itag; 1890 rth->dst.tclassid = itag;
1892#endif 1891#endif
1893 rth->rt_iif = 1892 rth->rt_iif =
1894 rth->fl.iif = dev->ifindex; 1893 rth->fl.iif = dev->ifindex;
1895 rth->u.dst.dev = init_net.loopback_dev; 1894 rth->dst.dev = init_net.loopback_dev;
1896 dev_hold(rth->u.dst.dev); 1895 dev_hold(rth->dst.dev);
1897 rth->idev = in_dev_get(rth->u.dst.dev); 1896 rth->idev = in_dev_get(rth->dst.dev);
1898 rth->fl.oif = 0; 1897 rth->fl.oif = 0;
1899 rth->rt_gateway = daddr; 1898 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1899 rth->rt_spec_dst= spec_dst;
@@ -1902,27 +1901,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1902 rth->rt_flags = RTCF_MULTICAST; 1901 rth->rt_flags = RTCF_MULTICAST;
1903 rth->rt_type = RTN_MULTICAST; 1902 rth->rt_type = RTN_MULTICAST;
1904 if (our) { 1903 if (our) {
1905 rth->u.dst.input= ip_local_deliver; 1904 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1905 rth->rt_flags |= RTCF_LOCAL;
1907 } 1906 }
1908 1907
1909#ifdef CONFIG_IP_MROUTE 1908#ifdef CONFIG_IP_MROUTE
1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1909 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1911 rth->u.dst.input = ip_mr_input; 1910 rth->dst.input = ip_mr_input;
1912#endif 1911#endif
1913 RT_CACHE_STAT_INC(in_slow_mc); 1912 RT_CACHE_STAT_INC(in_slow_mc);
1914 1913
1915 in_dev_put(in_dev);
1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1914 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1917 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1915 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1918 1916
1919e_nobufs: 1917e_nobufs:
1920 in_dev_put(in_dev);
1921 return -ENOBUFS; 1918 return -ENOBUFS;
1922
1923e_inval: 1919e_inval:
1924 in_dev_put(in_dev);
1925 return -EINVAL; 1920 return -EINVAL;
1921e_err:
1922 return err;
1926} 1923}
1927 1924
1928 1925
@@ -1956,22 +1953,22 @@ static void ip_handle_martian_source(struct net_device *dev,
1956#endif 1953#endif
1957} 1954}
1958 1955
1956/* called in rcu_read_lock() section */
1959static int __mkroute_input(struct sk_buff *skb, 1957static int __mkroute_input(struct sk_buff *skb,
1960 struct fib_result *res, 1958 struct fib_result *res,
1961 struct in_device *in_dev, 1959 struct in_device *in_dev,
1962 __be32 daddr, __be32 saddr, u32 tos, 1960 __be32 daddr, __be32 saddr, u32 tos,
1963 struct rtable **result) 1961 struct rtable **result)
1964{ 1962{
1965
1966 struct rtable *rth; 1963 struct rtable *rth;
1967 int err; 1964 int err;
1968 struct in_device *out_dev; 1965 struct in_device *out_dev;
1969 unsigned flags = 0; 1966 unsigned int flags = 0;
1970 __be32 spec_dst; 1967 __be32 spec_dst;
1971 u32 itag; 1968 u32 itag;
1972 1969
1973 /* get a working reference to the output device */ 1970 /* get a working reference to the output device */
1974 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1971 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1975 if (out_dev == NULL) { 1972 if (out_dev == NULL) {
1976 if (net_ratelimit()) 1973 if (net_ratelimit())
1977 printk(KERN_CRIT "Bug in ip_route_input" \ 1974 printk(KERN_CRIT "Bug in ip_route_input" \
@@ -1986,7 +1983,6 @@ static int __mkroute_input(struct sk_buff *skb,
1986 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1983 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1987 saddr); 1984 saddr);
1988 1985
1989 err = -EINVAL;
1990 goto cleanup; 1986 goto cleanup;
1991 } 1987 }
1992 1988
@@ -2020,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb,
2020 goto cleanup; 2016 goto cleanup;
2021 } 2017 }
2022 2018
2023 atomic_set(&rth->u.dst.__refcnt, 1); 2019 atomic_set(&rth->dst.__refcnt, 1);
2024 rth->u.dst.flags= DST_HOST; 2020 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2021 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->u.dst.flags |= DST_NOPOLICY; 2022 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2023 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->u.dst.flags |= DST_NOXFRM; 2024 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr; 2025 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2026 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2027 rth->fl.fl4_tos = tos;
@@ -2035,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb,
2035 rth->rt_gateway = daddr; 2031 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2032 rth->rt_iif =
2037 rth->fl.iif = in_dev->dev->ifindex; 2033 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->u.dst.dev = (out_dev)->dev; 2034 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->u.dst.dev); 2035 dev_hold(rth->dst.dev);
2040 rth->idev = in_dev_get(rth->u.dst.dev); 2036 rth->idev = in_dev_get(rth->dst.dev);
2041 rth->fl.oif = 0; 2037 rth->fl.oif = 0;
2042 rth->rt_spec_dst= spec_dst; 2038 rth->rt_spec_dst= spec_dst;
2043 2039
2044 rth->u.dst.obsolete = -1; 2040 rth->dst.obsolete = -1;
2045 rth->u.dst.input = ip_forward; 2041 rth->dst.input = ip_forward;
2046 rth->u.dst.output = ip_output; 2042 rth->dst.output = ip_output;
2047 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2043 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2048 2044
2049 rt_set_nexthop(rth, res, itag); 2045 rt_set_nexthop(rth, res, itag);
2050 2046
@@ -2053,8 +2049,6 @@ static int __mkroute_input(struct sk_buff *skb,
2053 *result = rth; 2049 *result = rth;
2054 err = 0; 2050 err = 0;
2055 cleanup: 2051 cleanup:
2056 /* release the working reference to the output device */
2057 in_dev_put(out_dev);
2058 return err; 2052 return err;
2059} 2053}
2060 2054
@@ -2080,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2080 2074
2081 /* put it into the cache */ 2075 /* put it into the cache */
2082 hash = rt_hash(daddr, saddr, fl->iif, 2076 hash = rt_hash(daddr, saddr, fl->iif,
2083 rt_genid(dev_net(rth->u.dst.dev))); 2077 rt_genid(dev_net(rth->dst.dev)));
2084 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2078 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2085} 2079}
2086 2080
@@ -2098,7 +2092,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2098 u8 tos, struct net_device *dev) 2092 u8 tos, struct net_device *dev)
2099{ 2093{
2100 struct fib_result res; 2094 struct fib_result res;
2101 struct in_device *in_dev = in_dev_get(dev); 2095 struct in_device *in_dev = __in_dev_get_rcu(dev);
2102 struct flowi fl = { .nl_u = { .ip4_u = 2096 struct flowi fl = { .nl_u = { .ip4_u =
2103 { .daddr = daddr, 2097 { .daddr = daddr,
2104 .saddr = saddr, 2098 .saddr = saddr,
@@ -2158,13 +2152,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2158 goto brd_input; 2152 goto brd_input;
2159 2153
2160 if (res.type == RTN_LOCAL) { 2154 if (res.type == RTN_LOCAL) {
2161 int result; 2155 err = fib_validate_source(saddr, daddr, tos,
2162 result = fib_validate_source(saddr, daddr, tos,
2163 net->loopback_dev->ifindex, 2156 net->loopback_dev->ifindex,
2164 dev, &spec_dst, &itag, skb->mark); 2157 dev, &spec_dst, &itag, skb->mark);
2165 if (result < 0) 2158 if (err < 0)
2166 goto martian_source; 2159 goto martian_source_keep_err;
2167 if (result) 2160 if (err)
2168 flags |= RTCF_DIRECTSRC; 2161 flags |= RTCF_DIRECTSRC;
2169 spec_dst = daddr; 2162 spec_dst = daddr;
2170 goto local_input; 2163 goto local_input;
@@ -2177,7 +2170,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2177 2170
2178 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2171 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2179done: 2172done:
2180 in_dev_put(in_dev);
2181 if (free_res) 2173 if (free_res)
2182 fib_res_put(&res); 2174 fib_res_put(&res);
2183out: return err; 2175out: return err;
@@ -2192,7 +2184,7 @@ brd_input:
2192 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2184 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2193 &itag, skb->mark); 2185 &itag, skb->mark);
2194 if (err < 0) 2186 if (err < 0)
2195 goto martian_source; 2187 goto martian_source_keep_err;
2196 if (err) 2188 if (err)
2197 flags |= RTCF_DIRECTSRC; 2189 flags |= RTCF_DIRECTSRC;
2198 } 2190 }
@@ -2205,14 +2197,14 @@ local_input:
2205 if (!rth) 2197 if (!rth)
2206 goto e_nobufs; 2198 goto e_nobufs;
2207 2199
2208 rth->u.dst.output= ip_rt_bug; 2200 rth->dst.output= ip_rt_bug;
2209 rth->u.dst.obsolete = -1; 2201 rth->dst.obsolete = -1;
2210 rth->rt_genid = rt_genid(net); 2202 rth->rt_genid = rt_genid(net);
2211 2203
2212 atomic_set(&rth->u.dst.__refcnt, 1); 2204 atomic_set(&rth->dst.__refcnt, 1);
2213 rth->u.dst.flags= DST_HOST; 2205 rth->dst.flags= DST_HOST;
2214 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2206 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2215 rth->u.dst.flags |= DST_NOPOLICY; 2207 rth->dst.flags |= DST_NOPOLICY;
2216 rth->fl.fl4_dst = daddr; 2208 rth->fl.fl4_dst = daddr;
2217 rth->rt_dst = daddr; 2209 rth->rt_dst = daddr;
2218 rth->fl.fl4_tos = tos; 2210 rth->fl.fl4_tos = tos;
@@ -2220,20 +2212,20 @@ local_input:
2220 rth->fl.fl4_src = saddr; 2212 rth->fl.fl4_src = saddr;
2221 rth->rt_src = saddr; 2213 rth->rt_src = saddr;
2222#ifdef CONFIG_NET_CLS_ROUTE 2214#ifdef CONFIG_NET_CLS_ROUTE
2223 rth->u.dst.tclassid = itag; 2215 rth->dst.tclassid = itag;
2224#endif 2216#endif
2225 rth->rt_iif = 2217 rth->rt_iif =
2226 rth->fl.iif = dev->ifindex; 2218 rth->fl.iif = dev->ifindex;
2227 rth->u.dst.dev = net->loopback_dev; 2219 rth->dst.dev = net->loopback_dev;
2228 dev_hold(rth->u.dst.dev); 2220 dev_hold(rth->dst.dev);
2229 rth->idev = in_dev_get(rth->u.dst.dev); 2221 rth->idev = in_dev_get(rth->dst.dev);
2230 rth->rt_gateway = daddr; 2222 rth->rt_gateway = daddr;
2231 rth->rt_spec_dst= spec_dst; 2223 rth->rt_spec_dst= spec_dst;
2232 rth->u.dst.input= ip_local_deliver; 2224 rth->dst.input= ip_local_deliver;
2233 rth->rt_flags = flags|RTCF_LOCAL; 2225 rth->rt_flags = flags|RTCF_LOCAL;
2234 if (res.type == RTN_UNREACHABLE) { 2226 if (res.type == RTN_UNREACHABLE) {
2235 rth->u.dst.input= ip_error; 2227 rth->dst.input= ip_error;
2236 rth->u.dst.error= -err; 2228 rth->dst.error= -err;
2237 rth->rt_flags &= ~RTCF_LOCAL; 2229 rth->rt_flags &= ~RTCF_LOCAL;
2238 } 2230 }
2239 rth->rt_type = res.type; 2231 rth->rt_type = res.type;
@@ -2273,8 +2265,10 @@ e_nobufs:
2273 goto done; 2265 goto done;
2274 2266
2275martian_source: 2267martian_source:
2268 err = -EINVAL;
2269martian_source_keep_err:
2276 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2270 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2277 goto e_inval; 2271 goto done;
2278} 2272}
2279 2273
2280int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2274int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2284,32 +2278,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2284 unsigned hash; 2278 unsigned hash;
2285 int iif = dev->ifindex; 2279 int iif = dev->ifindex;
2286 struct net *net; 2280 struct net *net;
2281 int res;
2287 2282
2288 net = dev_net(dev); 2283 net = dev_net(dev);
2289 2284
2285 rcu_read_lock();
2286
2290 if (!rt_caching(net)) 2287 if (!rt_caching(net))
2291 goto skip_cache; 2288 goto skip_cache;
2292 2289
2293 tos &= IPTOS_RT_MASK; 2290 tos &= IPTOS_RT_MASK;
2294 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2291 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2295 2292
2296 rcu_read_lock();
2297 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2293 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2298 rth = rcu_dereference(rth->u.dst.rt_next)) { 2294 rth = rcu_dereference(rth->dst.rt_next)) {
2299 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2295 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2300 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2296 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2301 (rth->fl.iif ^ iif) | 2297 (rth->fl.iif ^ iif) |
2302 rth->fl.oif | 2298 rth->fl.oif |
2303 (rth->fl.fl4_tos ^ tos)) == 0 && 2299 (rth->fl.fl4_tos ^ tos)) == 0 &&
2304 rth->fl.mark == skb->mark && 2300 rth->fl.mark == skb->mark &&
2305 net_eq(dev_net(rth->u.dst.dev), net) && 2301 net_eq(dev_net(rth->dst.dev), net) &&
2306 !rt_is_expired(rth)) { 2302 !rt_is_expired(rth)) {
2307 if (noref) { 2303 if (noref) {
2308 dst_use_noref(&rth->u.dst, jiffies); 2304 dst_use_noref(&rth->dst, jiffies);
2309 skb_dst_set_noref(skb, &rth->u.dst); 2305 skb_dst_set_noref(skb, &rth->dst);
2310 } else { 2306 } else {
2311 dst_use(&rth->u.dst, jiffies); 2307 dst_use(&rth->dst, jiffies);
2312 skb_dst_set(skb, &rth->u.dst); 2308 skb_dst_set(skb, &rth->dst);
2313 } 2309 }
2314 RT_CACHE_STAT_INC(in_hit); 2310 RT_CACHE_STAT_INC(in_hit);
2315 rcu_read_unlock(); 2311 rcu_read_unlock();
@@ -2317,7 +2313,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2317 } 2313 }
2318 RT_CACHE_STAT_INC(in_hlist_search); 2314 RT_CACHE_STAT_INC(in_hlist_search);
2319 } 2315 }
2320 rcu_read_unlock();
2321 2316
2322skip_cache: 2317skip_cache:
2323 /* Multicast recognition logic is moved from route cache to here. 2318 /* Multicast recognition logic is moved from route cache to here.
@@ -2332,12 +2327,11 @@ skip_cache:
2332 route cache entry is created eventually. 2327 route cache entry is created eventually.
2333 */ 2328 */
2334 if (ipv4_is_multicast(daddr)) { 2329 if (ipv4_is_multicast(daddr)) {
2335 struct in_device *in_dev; 2330 struct in_device *in_dev = __in_dev_get_rcu(dev);
2336 2331
2337 rcu_read_lock(); 2332 if (in_dev) {
2338 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2339 int our = ip_check_mc(in_dev, daddr, saddr, 2333 int our = ip_check_mc(in_dev, daddr, saddr,
2340 ip_hdr(skb)->protocol); 2334 ip_hdr(skb)->protocol);
2341 if (our 2335 if (our
2342#ifdef CONFIG_IP_MROUTE 2336#ifdef CONFIG_IP_MROUTE
2343 || 2337 ||
@@ -2345,15 +2339,18 @@ skip_cache:
2345 IN_DEV_MFORWARD(in_dev)) 2339 IN_DEV_MFORWARD(in_dev))
2346#endif 2340#endif
2347 ) { 2341 ) {
2342 int res = ip_route_input_mc(skb, daddr, saddr,
2343 tos, dev, our);
2348 rcu_read_unlock(); 2344 rcu_read_unlock();
2349 return ip_route_input_mc(skb, daddr, saddr, 2345 return res;
2350 tos, dev, our);
2351 } 2346 }
2352 } 2347 }
2353 rcu_read_unlock(); 2348 rcu_read_unlock();
2354 return -EINVAL; 2349 return -EINVAL;
2355 } 2350 }
2356 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2351 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2352 rcu_read_unlock();
2353 return res;
2357} 2354}
2358EXPORT_SYMBOL(ip_route_input_common); 2355EXPORT_SYMBOL(ip_route_input_common);
2359 2356
@@ -2415,12 +2412,12 @@ static int __mkroute_output(struct rtable **result,
2415 goto cleanup; 2412 goto cleanup;
2416 } 2413 }
2417 2414
2418 atomic_set(&rth->u.dst.__refcnt, 1); 2415 atomic_set(&rth->dst.__refcnt, 1);
2419 rth->u.dst.flags= DST_HOST; 2416 rth->dst.flags= DST_HOST;
2420 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2417 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2421 rth->u.dst.flags |= DST_NOXFRM; 2418 rth->dst.flags |= DST_NOXFRM;
2422 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2419 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2423 rth->u.dst.flags |= DST_NOPOLICY; 2420 rth->dst.flags |= DST_NOPOLICY;
2424 2421
2425 rth->fl.fl4_dst = oldflp->fl4_dst; 2422 rth->fl.fl4_dst = oldflp->fl4_dst;
2426 rth->fl.fl4_tos = tos; 2423 rth->fl.fl4_tos = tos;
@@ -2432,35 +2429,35 @@ static int __mkroute_output(struct rtable **result,
2432 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2429 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2433 /* get references to the devices that are to be hold by the routing 2430 /* get references to the devices that are to be hold by the routing
2434 cache entry */ 2431 cache entry */
2435 rth->u.dst.dev = dev_out; 2432 rth->dst.dev = dev_out;
2436 dev_hold(dev_out); 2433 dev_hold(dev_out);
2437 rth->idev = in_dev_get(dev_out); 2434 rth->idev = in_dev_get(dev_out);
2438 rth->rt_gateway = fl->fl4_dst; 2435 rth->rt_gateway = fl->fl4_dst;
2439 rth->rt_spec_dst= fl->fl4_src; 2436 rth->rt_spec_dst= fl->fl4_src;
2440 2437
2441 rth->u.dst.output=ip_output; 2438 rth->dst.output=ip_output;
2442 rth->u.dst.obsolete = -1; 2439 rth->dst.obsolete = -1;
2443 rth->rt_genid = rt_genid(dev_net(dev_out)); 2440 rth->rt_genid = rt_genid(dev_net(dev_out));
2444 2441
2445 RT_CACHE_STAT_INC(out_slow_tot); 2442 RT_CACHE_STAT_INC(out_slow_tot);
2446 2443
2447 if (flags & RTCF_LOCAL) { 2444 if (flags & RTCF_LOCAL) {
2448 rth->u.dst.input = ip_local_deliver; 2445 rth->dst.input = ip_local_deliver;
2449 rth->rt_spec_dst = fl->fl4_dst; 2446 rth->rt_spec_dst = fl->fl4_dst;
2450 } 2447 }
2451 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2448 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2452 rth->rt_spec_dst = fl->fl4_src; 2449 rth->rt_spec_dst = fl->fl4_src;
2453 if (flags & RTCF_LOCAL && 2450 if (flags & RTCF_LOCAL &&
2454 !(dev_out->flags & IFF_LOOPBACK)) { 2451 !(dev_out->flags & IFF_LOOPBACK)) {
2455 rth->u.dst.output = ip_mc_output; 2452 rth->dst.output = ip_mc_output;
2456 RT_CACHE_STAT_INC(out_slow_mc); 2453 RT_CACHE_STAT_INC(out_slow_mc);
2457 } 2454 }
2458#ifdef CONFIG_IP_MROUTE 2455#ifdef CONFIG_IP_MROUTE
2459 if (res->type == RTN_MULTICAST) { 2456 if (res->type == RTN_MULTICAST) {
2460 if (IN_DEV_MFORWARD(in_dev) && 2457 if (IN_DEV_MFORWARD(in_dev) &&
2461 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2458 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2462 rth->u.dst.input = ip_mr_input; 2459 rth->dst.input = ip_mr_input;
2463 rth->u.dst.output = ip_mc_output; 2460 rth->dst.output = ip_mc_output;
2464 } 2461 }
2465 } 2462 }
2466#endif 2463#endif
@@ -2715,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2715 2712
2716 rcu_read_lock_bh(); 2713 rcu_read_lock_bh();
2717 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2714 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2718 rth = rcu_dereference_bh(rth->u.dst.rt_next)) { 2715 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2719 if (rth->fl.fl4_dst == flp->fl4_dst && 2716 if (rth->fl.fl4_dst == flp->fl4_dst &&
2720 rth->fl.fl4_src == flp->fl4_src && 2717 rth->fl.fl4_src == flp->fl4_src &&
2721 rth->fl.iif == 0 && 2718 rth->fl.iif == 0 &&
@@ -2723,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2723 rth->fl.mark == flp->mark && 2720 rth->fl.mark == flp->mark &&
2724 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2721 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2725 (IPTOS_RT_MASK | RTO_ONLINK)) && 2722 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2726 net_eq(dev_net(rth->u.dst.dev), net) && 2723 net_eq(dev_net(rth->dst.dev), net) &&
2727 !rt_is_expired(rth)) { 2724 !rt_is_expired(rth)) {
2728 dst_use(&rth->u.dst, jiffies); 2725 dst_use(&rth->dst, jiffies);
2729 RT_CACHE_STAT_INC(out_hit); 2726 RT_CACHE_STAT_INC(out_hit);
2730 rcu_read_unlock_bh(); 2727 rcu_read_unlock_bh();
2731 *rp = rth; 2728 *rp = rth;
@@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2762 dst_alloc(&ipv4_dst_blackhole_ops); 2759 dst_alloc(&ipv4_dst_blackhole_ops);
2763 2760
2764 if (rt) { 2761 if (rt) {
2765 struct dst_entry *new = &rt->u.dst; 2762 struct dst_entry *new = &rt->dst;
2766 2763
2767 atomic_set(&new->__refcnt, 1); 2764 atomic_set(&new->__refcnt, 1);
2768 new->__use = 1; 2765 new->__use = 1;
2769 new->input = dst_discard; 2766 new->input = dst_discard;
2770 new->output = dst_discard; 2767 new->output = dst_discard;
2771 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2768 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
2772 2769
2773 new->dev = ort->u.dst.dev; 2770 new->dev = ort->dst.dev;
2774 if (new->dev) 2771 if (new->dev)
2775 dev_hold(new->dev); 2772 dev_hold(new->dev);
2776 2773
@@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2794 dst_free(new); 2791 dst_free(new);
2795 } 2792 }
2796 2793
2797 dst_release(&(*rp)->u.dst); 2794 dst_release(&(*rp)->dst);
2798 *rp = rt; 2795 *rp = rt;
2799 return (rt ? 0 : -ENOMEM); 2796 return (rt ? 0 : -ENOMEM);
2800} 2797}
@@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net,
2864 r->rtm_src_len = 32; 2861 r->rtm_src_len = 32;
2865 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2862 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2866 } 2863 }
2867 if (rt->u.dst.dev) 2864 if (rt->dst.dev)
2868 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); 2865 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2869#ifdef CONFIG_NET_CLS_ROUTE 2866#ifdef CONFIG_NET_CLS_ROUTE
2870 if (rt->u.dst.tclassid) 2867 if (rt->dst.tclassid)
2871 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2868 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2872#endif 2869#endif
2873 if (rt->fl.iif) 2870 if (rt->fl.iif)
2874 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2871 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2878,12 +2875,13 @@ static int rt_fill_info(struct net *net,
2878 if (rt->rt_dst != rt->rt_gateway) 2875 if (rt->rt_dst != rt->rt_gateway)
2879 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2876 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2880 2877
2881 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2878 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2882 goto nla_put_failure; 2879 goto nla_put_failure;
2883 2880
2884 error = rt->u.dst.error; 2881 error = rt->dst.error;
2885 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2882 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2886 if (rt->peer) { 2883 if (rt->peer) {
2884 inet_peer_refcheck(rt->peer);
2887 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2885 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2888 if (rt->peer->tcp_ts_stamp) { 2886 if (rt->peer->tcp_ts_stamp) {
2889 ts = rt->peer->tcp_ts; 2887 ts = rt->peer->tcp_ts;
@@ -2914,7 +2912,7 @@ static int rt_fill_info(struct net *net,
2914 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2912 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2915 } 2913 }
2916 2914
2917 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, 2915 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2918 expires, error) < 0) 2916 expires, error) < 0)
2919 goto nla_put_failure; 2917 goto nla_put_failure;
2920 2918
@@ -2979,8 +2977,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2979 local_bh_enable(); 2977 local_bh_enable();
2980 2978
2981 rt = skb_rtable(skb); 2979 rt = skb_rtable(skb);
2982 if (err == 0 && rt->u.dst.error) 2980 if (err == 0 && rt->dst.error)
2983 err = -rt->u.dst.error; 2981 err = -rt->dst.error;
2984 } else { 2982 } else {
2985 struct flowi fl = { 2983 struct flowi fl = {
2986 .nl_u = { 2984 .nl_u = {
@@ -2998,7 +2996,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2998 if (err) 2996 if (err)
2999 goto errout_free; 2997 goto errout_free;
3000 2998
3001 skb_dst_set(skb, &rt->u.dst); 2999 skb_dst_set(skb, &rt->dst);
3002 if (rtm->rtm_flags & RTM_F_NOTIFY) 3000 if (rtm->rtm_flags & RTM_F_NOTIFY)
3003 rt->rt_flags |= RTCF_NOTIFY; 3001 rt->rt_flags |= RTCF_NOTIFY;
3004 3002
@@ -3034,12 +3032,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3034 continue; 3032 continue;
3035 rcu_read_lock_bh(); 3033 rcu_read_lock_bh();
3036 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; 3034 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3037 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { 3035 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3038 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3036 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
3039 continue; 3037 continue;
3040 if (rt_is_expired(rt)) 3038 if (rt_is_expired(rt))
3041 continue; 3039 continue;
3042 skb_dst_set_noref(skb, &rt->u.dst); 3040 skb_dst_set_noref(skb, &rt->dst);
3043 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3041 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
3044 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3042 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
3045 1, NLM_F_MULTI) <= 0) { 3043 1, NLM_F_MULTI) <= 0) {