aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:43:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:43:13 -0400
commitfd37ce34bd512f2b1a503f82abf8768da556a955 (patch)
tree557ff43ff5291d1704527e31293633fbc2f956d5 /net
parent4b24ff71108164e047cf2c95990b77651163e315 (diff)
parentcaacf05e5ad1abf0a2864863da4e33024bc68ec6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking update from David S. Miller: "I think Eric Dumazet and I have dealt with all of the known routing cache removal fallout. Some other minor fixes all around. 1) Fix RCU of cached routes, particular of output routes which require liberation via call_rcu() instead of call_rcu_bh(). From Eric Dumazet. 2) Make sure we purge net device references in cached routes properly. 3) TG3 driver bug fixes from Michael Chan. 4) Fix reported 'expires' value in ipv6 routes, from Li Wei. 5) TUN driver ioctl leaks kernel bytes to userspace, from Mathias Krause." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (22 commits) ipv4: Properly purge netdev references on uncached routes. ipv4: Cache routes in nexthop exception entries. ipv4: percpu nh_rth_output cache ipv4: Restore old dst_free() behavior. bridge: make port attributes const ipv4: remove rt_cache_rebuild_count net: ipv4: fix RCU races on dst refcounts net: TCP early demux cleanup tun: Fix formatting. net/tun: fix ioctl() based info leaks tg3: Update version to 3.124 tg3: Fix race condition in tg3_get_stats64() tg3: Add New 5719 Read DMA workaround tg3: Fix Read DMA workaround for 5719 A0. tg3: Request APE_LOCK_PHY before PHY access ipv6: fix incorrect route 'expires' value passed to userspace mISDN: Bugfix only few bytes are transfered on a connection seeq: use PTR_RET at init_module of driver bnx2x: remove cast around the kmalloc in bnx2x_prev_mark_path ipv4: clean up put_child ...
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_sysfs_if.c6
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c42
-rw-r--r--net/ipv4/fib_trie.c53
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/route.c183
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/route.c8
14 files changed, 216 insertions, 119 deletions
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 6229b62749e8..13b36bdc76a7 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -27,7 +27,7 @@ struct brport_attribute {
27}; 27};
28 28
29#define BRPORT_ATTR(_name,_mode,_show,_store) \ 29#define BRPORT_ATTR(_name,_mode,_show,_store) \
30struct brport_attribute brport_attr_##_name = { \ 30const struct brport_attribute brport_attr_##_name = { \
31 .attr = {.name = __stringify(_name), \ 31 .attr = {.name = __stringify(_name), \
32 .mode = _mode }, \ 32 .mode = _mode }, \
33 .show = _show, \ 33 .show = _show, \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
164 store_multicast_router); 164 store_multicast_router);
165#endif 165#endif
166 166
167static struct brport_attribute *brport_attrs[] = { 167static const struct brport_attribute *brport_attrs[] = {
168 &brport_attr_path_cost, 168 &brport_attr_path_cost,
169 &brport_attr_priority, 169 &brport_attr_priority,
170 &brport_attr_port_id, 170 &brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
241int br_sysfs_addif(struct net_bridge_port *p) 241int br_sysfs_addif(struct net_bridge_port *p)
242{ 242{
243 struct net_bridge *br = p->br; 243 struct net_bridge *br = p->br;
244 struct brport_attribute **a; 244 const struct brport_attribute **a;
245 int err; 245 int err;
246 246
247 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, 247 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc9e380f0abf..5ff949dc954f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
625 .rta_id = id, 625 .rta_id = id,
626 }; 626 };
627 627
628 if (expires) 628 if (expires) {
629 ci.rta_expires = jiffies_to_clock_t(expires); 629 unsigned long clock;
630 630
631 clock = jiffies_to_clock_t(abs(expires));
632 clock = min_t(unsigned long, clock, INT_MAX);
633 ci.rta_expires = (expires > 0) ? clock : -clock;
634 }
631 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); 635 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
632} 636}
633EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); 637EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8732cc7920ed..c43ae3fba792 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1046 1046
1047 if (event == NETDEV_UNREGISTER) { 1047 if (event == NETDEV_UNREGISTER) {
1048 fib_disable_ip(dev, 2, -1); 1048 fib_disable_ip(dev, 2, -1);
1049 rt_flush_dev(dev);
1049 return NOTIFY_DONE; 1050 return NOTIFY_DONE;
1050 } 1051 }
1051 1052
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da0cc2e6b250..da80dc14cc76 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
140 }, 140 },
141}; 141};
142 142
143static void rt_fibinfo_free(struct rtable __rcu **rtp)
144{
145 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
146
147 if (!rt)
148 return;
149
150 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
151 * because we waited an RCU grace period before calling
152 * free_fib_info_rcu()
153 */
154
155 dst_free(&rt->dst);
156}
157
143static void free_nh_exceptions(struct fib_nh *nh) 158static void free_nh_exceptions(struct fib_nh *nh)
144{ 159{
145 struct fnhe_hash_bucket *hash = nh->nh_exceptions; 160 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
153 struct fib_nh_exception *next; 168 struct fib_nh_exception *next;
154 169
155 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 170 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
171
172 rt_fibinfo_free(&fnhe->fnhe_rth);
173
156 kfree(fnhe); 174 kfree(fnhe);
157 175
158 fnhe = next; 176 fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
161 kfree(hash); 179 kfree(hash);
162} 180}
163 181
182static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
183{
184 int cpu;
185
186 if (!rtp)
187 return;
188
189 for_each_possible_cpu(cpu) {
190 struct rtable *rt;
191
192 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
193 if (rt)
194 dst_free(&rt->dst);
195 }
196 free_percpu(rtp);
197}
198
164/* Release a nexthop info record */ 199/* Release a nexthop info record */
165static void free_fib_info_rcu(struct rcu_head *head) 200static void free_fib_info_rcu(struct rcu_head *head)
166{ 201{
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
171 dev_put(nexthop_nh->nh_dev); 206 dev_put(nexthop_nh->nh_dev);
172 if (nexthop_nh->nh_exceptions) 207 if (nexthop_nh->nh_exceptions)
173 free_nh_exceptions(nexthop_nh); 208 free_nh_exceptions(nexthop_nh);
174 if (nexthop_nh->nh_rth_output) 209 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
175 dst_free(&nexthop_nh->nh_rth_output->dst); 210 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
176 if (nexthop_nh->nh_rth_input)
177 dst_free(&nexthop_nh->nh_rth_input->dst);
178 } endfor_nexthops(fi); 211 } endfor_nexthops(fi);
179 212
180 release_net(fi->fib_net); 213 release_net(fi->fib_net);
@@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
804 fi->fib_nhs = nhs; 837 fi->fib_nhs = nhs;
805 change_nexthops(fi) { 838 change_nexthops(fi) {
806 nexthop_nh->nh_parent = fi; 839 nexthop_nh->nh_parent = fi;
840 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
807 } endfor_nexthops(fi) 841 } endfor_nexthops(fi)
808 842
809 if (cfg->fc_mx) { 843 if (cfg->fc_mx) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18cbc15b20d5..f0cdb30921c0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -159,7 +159,6 @@ struct trie {
159#endif 159#endif
160}; 160};
161 161
162static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
163static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 162static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
164 int wasfull); 163 int wasfull);
165static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); 164static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
473 } 472 }
474 473
475 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 474 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
476 sizeof(struct rt_trie_node) << bits); 475 sizeof(struct rt_trie_node *) << bits);
477 return tn; 476 return tn;
478} 477}
479 478
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
490 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 489 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
491} 490}
492 491
493static inline void put_child(struct trie *t, struct tnode *tn, int i, 492static inline void put_child(struct tnode *tn, int i,
494 struct rt_trie_node *n) 493 struct rt_trie_node *n)
495{ 494{
496 tnode_put_child_reorg(tn, i, n, -1); 495 tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
754 goto nomem; 753 goto nomem;
755 } 754 }
756 755
757 put_child(t, tn, 2*i, (struct rt_trie_node *) left); 756 put_child(tn, 2*i, (struct rt_trie_node *) left);
758 put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); 757 put_child(tn, 2*i+1, (struct rt_trie_node *) right);
759 } 758 }
760 } 759 }
761 760
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
776 if (tkey_extract_bits(node->key, 775 if (tkey_extract_bits(node->key,
777 oldtnode->pos + oldtnode->bits, 776 oldtnode->pos + oldtnode->bits,
778 1) == 0) 777 1) == 0)
779 put_child(t, tn, 2*i, node); 778 put_child(tn, 2*i, node);
780 else 779 else
781 put_child(t, tn, 2*i+1, node); 780 put_child(tn, 2*i+1, node);
782 continue; 781 continue;
783 } 782 }
784 783
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
786 inode = (struct tnode *) node; 785 inode = (struct tnode *) node;
787 786
788 if (inode->bits == 1) { 787 if (inode->bits == 1) {
789 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); 788 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
790 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); 789 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
791 790
792 tnode_free_safe(inode); 791 tnode_free_safe(inode);
793 continue; 792 continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
817 */ 816 */
818 817
819 left = (struct tnode *) tnode_get_child(tn, 2*i); 818 left = (struct tnode *) tnode_get_child(tn, 2*i);
820 put_child(t, tn, 2*i, NULL); 819 put_child(tn, 2*i, NULL);
821 820
822 BUG_ON(!left); 821 BUG_ON(!left);
823 822
824 right = (struct tnode *) tnode_get_child(tn, 2*i+1); 823 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
825 put_child(t, tn, 2*i+1, NULL); 824 put_child(tn, 2*i+1, NULL);
826 825
827 BUG_ON(!right); 826 BUG_ON(!right);
828 827
829 size = tnode_child_length(left); 828 size = tnode_child_length(left);
830 for (j = 0; j < size; j++) { 829 for (j = 0; j < size; j++) {
831 put_child(t, left, j, rtnl_dereference(inode->child[j])); 830 put_child(left, j, rtnl_dereference(inode->child[j]));
832 put_child(t, right, j, rtnl_dereference(inode->child[j + size])); 831 put_child(right, j, rtnl_dereference(inode->child[j + size]));
833 } 832 }
834 put_child(t, tn, 2*i, resize(t, left)); 833 put_child(tn, 2*i, resize(t, left));
835 put_child(t, tn, 2*i+1, resize(t, right)); 834 put_child(tn, 2*i+1, resize(t, right));
836 835
837 tnode_free_safe(inode); 836 tnode_free_safe(inode);
838 } 837 }
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
877 if (!newn) 876 if (!newn)
878 goto nomem; 877 goto nomem;
879 878
880 put_child(t, tn, i/2, (struct rt_trie_node *)newn); 879 put_child(tn, i/2, (struct rt_trie_node *)newn);
881 } 880 }
882 881
883 } 882 }
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
892 if (left == NULL) { 891 if (left == NULL) {
893 if (right == NULL) /* Both are empty */ 892 if (right == NULL) /* Both are empty */
894 continue; 893 continue;
895 put_child(t, tn, i/2, right); 894 put_child(tn, i/2, right);
896 continue; 895 continue;
897 } 896 }
898 897
899 if (right == NULL) { 898 if (right == NULL) {
900 put_child(t, tn, i/2, left); 899 put_child(tn, i/2, left);
901 continue; 900 continue;
902 } 901 }
903 902
904 /* Two nonempty children */ 903 /* Two nonempty children */
905 newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 904 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
906 put_child(t, tn, i/2, NULL); 905 put_child(tn, i/2, NULL);
907 put_child(t, newBinNode, 0, left); 906 put_child(newBinNode, 0, left);
908 put_child(t, newBinNode, 1, right); 907 put_child(newBinNode, 1, right);
909 put_child(t, tn, i/2, resize(t, newBinNode)); 908 put_child(tn, i/2, resize(t, newBinNode));
910 } 909 }
911 tnode_free_safe(oldtnode); 910 tnode_free_safe(oldtnode);
912 return tn; 911 return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1125 node_set_parent((struct rt_trie_node *)l, tp); 1124 node_set_parent((struct rt_trie_node *)l, tp);
1126 1125
1127 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1126 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1128 put_child(t, tp, cindex, (struct rt_trie_node *)l); 1127 put_child(tp, cindex, (struct rt_trie_node *)l);
1129 } else { 1128 } else {
1130 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1129 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1131 /* 1130 /*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1155 node_set_parent((struct rt_trie_node *)tn, tp); 1154 node_set_parent((struct rt_trie_node *)tn, tp);
1156 1155
1157 missbit = tkey_extract_bits(key, newpos, 1); 1156 missbit = tkey_extract_bits(key, newpos, 1);
1158 put_child(t, tn, missbit, (struct rt_trie_node *)l); 1157 put_child(tn, missbit, (struct rt_trie_node *)l);
1159 put_child(t, tn, 1-missbit, n); 1158 put_child(tn, 1-missbit, n);
1160 1159
1161 if (tp) { 1160 if (tp) {
1162 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1161 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1163 put_child(t, tp, cindex, (struct rt_trie_node *)tn); 1162 put_child(tp, cindex, (struct rt_trie_node *)tn);
1164 } else { 1163 } else {
1165 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1164 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1166 tp = tn; 1165 tp = tn;
@@ -1619,7 +1618,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1619 1618
1620 if (tp) { 1619 if (tp) {
1621 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); 1620 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
1622 put_child(t, tp, cindex, NULL); 1621 put_child(tp, cindex, NULL);
1623 trie_rebalance(t, tp); 1622 trie_rebalance(t, tp);
1624 } else 1623 } else
1625 RCU_INIT_POINTER(t->trie, NULL); 1624 RCU_INIT_POINTER(t->trie, NULL);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 981ff1eef28c..f1395a6fb35f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
325 const struct net_protocol *ipprot; 325 const struct net_protocol *ipprot;
326 int protocol = iph->protocol; 326 int protocol = iph->protocol;
327 327
328 rcu_read_lock();
329 ipprot = rcu_dereference(inet_protos[protocol]); 328 ipprot = rcu_dereference(inet_protos[protocol]);
330 if (ipprot && ipprot->early_demux) { 329 if (ipprot && ipprot->early_demux) {
331 ipprot->early_demux(skb); 330 ipprot->early_demux(skb);
332 /* must reload iph, skb->head might have changed */ 331 /* must reload iph, skb->head might have changed */
333 iph = ip_hdr(skb); 332 iph = ip_hdr(skb);
334 } 333 }
335 rcu_read_unlock();
336 } 334 }
337 335
338 /* 336 /*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc1a81ca79a7..c035251beb07 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb, u32 mtu); 147 struct sk_buff *skb, u32 mtu);
148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
149 struct sk_buff *skb); 149 struct sk_buff *skb);
150static void ipv4_dst_destroy(struct dst_entry *dst);
150 151
151static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 152static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152 int how) 153 int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
170 .default_advmss = ipv4_default_advmss, 171 .default_advmss = ipv4_default_advmss,
171 .mtu = ipv4_mtu, 172 .mtu = ipv4_mtu,
172 .cow_metrics = ipv4_cow_metrics, 173 .cow_metrics = ipv4_cow_metrics,
174 .destroy = ipv4_dst_destroy,
173 .ifdown = ipv4_dst_ifdown, 175 .ifdown = ipv4_dst_ifdown,
174 .negative_advice = ipv4_negative_advice, 176 .negative_advice = ipv4_negative_advice,
175 .link_failure = ipv4_link_failure, 177 .link_failure = ipv4_link_failure,
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
587 build_sk_flow_key(fl4, sk); 589 build_sk_flow_key(fl4, sk);
588} 590}
589 591
590static DEFINE_SEQLOCK(fnhe_seqlock); 592static inline void rt_free(struct rtable *rt)
593{
594 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
595}
596
597static DEFINE_SPINLOCK(fnhe_lock);
591 598
592static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 599static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
593{ 600{
594 struct fib_nh_exception *fnhe, *oldest; 601 struct fib_nh_exception *fnhe, *oldest;
602 struct rtable *orig;
595 603
596 oldest = rcu_dereference(hash->chain); 604 oldest = rcu_dereference(hash->chain);
597 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 605 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
599 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 607 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
600 oldest = fnhe; 608 oldest = fnhe;
601 } 609 }
610 orig = rcu_dereference(oldest->fnhe_rth);
611 if (orig) {
612 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
613 rt_free(orig);
614 }
602 return oldest; 615 return oldest;
603} 616}
604 617
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
620 int depth; 633 int depth;
621 u32 hval = fnhe_hashfun(daddr); 634 u32 hval = fnhe_hashfun(daddr);
622 635
623 write_seqlock_bh(&fnhe_seqlock); 636 spin_lock_bh(&fnhe_lock);
624 637
625 hash = nh->nh_exceptions; 638 hash = nh->nh_exceptions;
626 if (!hash) { 639 if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
667 fnhe->fnhe_stamp = jiffies; 680 fnhe->fnhe_stamp = jiffies;
668 681
669out_unlock: 682out_unlock:
670 write_sequnlock_bh(&fnhe_seqlock); 683 spin_unlock_bh(&fnhe_lock);
671 return; 684 return;
672} 685}
673 686
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1164 return NULL; 1177 return NULL;
1165} 1178}
1166 1179
1167static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1180static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1168 __be32 daddr) 1181 __be32 daddr)
1169{ 1182{
1170 __be32 fnhe_daddr, gw; 1183 bool ret = false;
1171 unsigned long expires; 1184
1172 unsigned int seq; 1185 spin_lock_bh(&fnhe_lock);
1173 u32 pmtu;
1174
1175restart:
1176 seq = read_seqbegin(&fnhe_seqlock);
1177 fnhe_daddr = fnhe->fnhe_daddr;
1178 gw = fnhe->fnhe_gw;
1179 pmtu = fnhe->fnhe_pmtu;
1180 expires = fnhe->fnhe_expires;
1181 if (read_seqretry(&fnhe_seqlock, seq))
1182 goto restart;
1183
1184 if (daddr != fnhe_daddr)
1185 return;
1186 1186
1187 if (pmtu) { 1187 if (daddr == fnhe->fnhe_daddr) {
1188 unsigned long diff = expires - jiffies; 1188 struct rtable *orig;
1189 1189
1190 if (time_before(jiffies, expires)) { 1190 if (fnhe->fnhe_pmtu) {
1191 rt->rt_pmtu = pmtu; 1191 unsigned long expires = fnhe->fnhe_expires;
1192 dst_set_expires(&rt->dst, diff); 1192 unsigned long diff = expires - jiffies;
1193
1194 if (time_before(jiffies, expires)) {
1195 rt->rt_pmtu = fnhe->fnhe_pmtu;
1196 dst_set_expires(&rt->dst, diff);
1197 }
1193 } 1198 }
1199 if (fnhe->fnhe_gw) {
1200 rt->rt_flags |= RTCF_REDIRECTED;
1201 rt->rt_gateway = fnhe->fnhe_gw;
1202 }
1203
1204 orig = rcu_dereference(fnhe->fnhe_rth);
1205 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1206 if (orig)
1207 rt_free(orig);
1208
1209 fnhe->fnhe_stamp = jiffies;
1210 ret = true;
1211 } else {
1212 /* Routes we intend to cache in nexthop exception have
1213 * the DST_NOCACHE bit clear. However, if we are
1214 * unsuccessful at storing this route into the cache
1215 * we really need to set it.
1216 */
1217 rt->dst.flags |= DST_NOCACHE;
1194 } 1218 }
1195 if (gw) { 1219 spin_unlock_bh(&fnhe_lock);
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = gw;
1198 }
1199 fnhe->fnhe_stamp = jiffies;
1200}
1201 1220
1202static inline void rt_free(struct rtable *rt) 1221 return ret;
1203{
1204 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
1205} 1222}
1206 1223
1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1224static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1208{ 1225{
1209 struct rtable *orig, *prev, **p = &nh->nh_rth_output; 1226 struct rtable *orig, *prev, **p;
1210 1227 bool ret = true;
1211 if (rt_is_input_route(rt))
1212 p = &nh->nh_rth_input;
1213 1228
1229 if (rt_is_input_route(rt)) {
1230 p = (struct rtable **)&nh->nh_rth_input;
1231 } else {
1232 if (!nh->nh_pcpu_rth_output)
1233 goto nocache;
1234 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1235 }
1214 orig = *p; 1236 orig = *p;
1215 1237
1216 prev = cmpxchg(p, orig, rt); 1238 prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1223 * unsuccessful at storing this route into the cache 1245 * unsuccessful at storing this route into the cache
1224 * we really need to set it. 1246 * we really need to set it.
1225 */ 1247 */
1248nocache:
1226 rt->dst.flags |= DST_NOCACHE; 1249 rt->dst.flags |= DST_NOCACHE;
1250 ret = false;
1251 }
1252
1253 return ret;
1254}
1255
1256static DEFINE_SPINLOCK(rt_uncached_lock);
1257static LIST_HEAD(rt_uncached_list);
1258
1259static void rt_add_uncached_list(struct rtable *rt)
1260{
1261 spin_lock_bh(&rt_uncached_lock);
1262 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1263 spin_unlock_bh(&rt_uncached_lock);
1264}
1265
1266static void ipv4_dst_destroy(struct dst_entry *dst)
1267{
1268 struct rtable *rt = (struct rtable *) dst;
1269
1270 if (dst->flags & DST_NOCACHE) {
1271 spin_lock_bh(&rt_uncached_lock);
1272 list_del(&rt->rt_uncached);
1273 spin_unlock_bh(&rt_uncached_lock);
1274 }
1275}
1276
1277void rt_flush_dev(struct net_device *dev)
1278{
1279 if (!list_empty(&rt_uncached_list)) {
1280 struct net *net = dev_net(dev);
1281 struct rtable *rt;
1282
1283 spin_lock_bh(&rt_uncached_lock);
1284 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1285 if (rt->dst.dev != dev)
1286 continue;
1287 rt->dst.dev = net->loopback_dev;
1288 dev_hold(rt->dst.dev);
1289 dev_put(dev);
1290 }
1291 spin_unlock_bh(&rt_uncached_lock);
1227 } 1292 }
1228} 1293}
1229 1294
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1239 struct fib_nh_exception *fnhe, 1304 struct fib_nh_exception *fnhe,
1240 struct fib_info *fi, u16 type, u32 itag) 1305 struct fib_info *fi, u16 type, u32 itag)
1241{ 1306{
1307 bool cached = false;
1308
1242 if (fi) { 1309 if (fi) {
1243 struct fib_nh *nh = &FIB_RES_NH(*res); 1310 struct fib_nh *nh = &FIB_RES_NH(*res);
1244 1311
1245 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1312 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1246 rt->rt_gateway = nh->nh_gw; 1313 rt->rt_gateway = nh->nh_gw;
1247 if (unlikely(fnhe))
1248 rt_bind_exception(rt, fnhe, daddr);
1249 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1314 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1250#ifdef CONFIG_IP_ROUTE_CLASSID 1315#ifdef CONFIG_IP_ROUTE_CLASSID
1251 rt->dst.tclassid = nh->nh_tclassid; 1316 rt->dst.tclassid = nh->nh_tclassid;
1252#endif 1317#endif
1253 if (!(rt->dst.flags & DST_NOCACHE)) 1318 if (unlikely(fnhe))
1254 rt_cache_route(nh, rt); 1319 cached = rt_bind_exception(rt, fnhe, daddr);
1320 else if (!(rt->dst.flags & DST_NOCACHE))
1321 cached = rt_cache_route(nh, rt);
1255 } 1322 }
1323 if (unlikely(!cached))
1324 rt_add_uncached_list(rt);
1256 1325
1257#ifdef CONFIG_IP_ROUTE_CLASSID 1326#ifdef CONFIG_IP_ROUTE_CLASSID
1258#ifdef CONFIG_IP_MULTIPLE_TABLES 1327#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1319 rth->rt_iif = 0; 1388 rth->rt_iif = 0;
1320 rth->rt_pmtu = 0; 1389 rth->rt_pmtu = 0;
1321 rth->rt_gateway = 0; 1390 rth->rt_gateway = 0;
1391 INIT_LIST_HEAD(&rth->rt_uncached);
1322 if (our) { 1392 if (our) {
1323 rth->dst.input= ip_local_deliver; 1393 rth->dst.input= ip_local_deliver;
1324 rth->rt_flags |= RTCF_LOCAL; 1394 rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb,
1420 do_cache = false; 1490 do_cache = false;
1421 if (res->fi) { 1491 if (res->fi) {
1422 if (!itag) { 1492 if (!itag) {
1423 rth = FIB_RES_NH(*res).nh_rth_input; 1493 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1424 if (rt_cache_valid(rth)) { 1494 if (rt_cache_valid(rth)) {
1425 skb_dst_set_noref(skb, &rth->dst); 1495 skb_dst_set_noref(skb, &rth->dst);
1426 goto out; 1496 goto out;
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
1444 rth->rt_iif = 0; 1514 rth->rt_iif = 0;
1445 rth->rt_pmtu = 0; 1515 rth->rt_pmtu = 0;
1446 rth->rt_gateway = 0; 1516 rth->rt_gateway = 0;
1517 INIT_LIST_HEAD(&rth->rt_uncached);
1447 1518
1448 rth->dst.input = ip_forward; 1519 rth->dst.input = ip_forward;
1449 rth->dst.output = ip_output; 1520 rth->dst.output = ip_output;
@@ -1582,7 +1653,7 @@ local_input:
1582 do_cache = false; 1653 do_cache = false;
1583 if (res.fi) { 1654 if (res.fi) {
1584 if (!itag) { 1655 if (!itag) {
1585 rth = FIB_RES_NH(res).nh_rth_input; 1656 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1586 if (rt_cache_valid(rth)) { 1657 if (rt_cache_valid(rth)) {
1587 skb_dst_set_noref(skb, &rth->dst); 1658 skb_dst_set_noref(skb, &rth->dst);
1588 err = 0; 1659 err = 0;
@@ -1610,6 +1681,7 @@ local_input:
1610 rth->rt_iif = 0; 1681 rth->rt_iif = 0;
1611 rth->rt_pmtu = 0; 1682 rth->rt_pmtu = 0;
1612 rth->rt_gateway = 0; 1683 rth->rt_gateway = 0;
1684 INIT_LIST_HEAD(&rth->rt_uncached);
1613 if (res.type == RTN_UNREACHABLE) { 1685 if (res.type == RTN_UNREACHABLE) {
1614 rth->dst.input= ip_error; 1686 rth->dst.input= ip_error;
1615 rth->dst.error= -err; 1687 rth->dst.error= -err;
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1748 1820
1749 fnhe = NULL; 1821 fnhe = NULL;
1750 if (fi) { 1822 if (fi) {
1823 struct rtable __rcu **prth;
1824
1751 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); 1825 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1752 if (!fnhe) { 1826 if (fnhe)
1753 rth = FIB_RES_NH(*res).nh_rth_output; 1827 prth = &fnhe->fnhe_rth;
1754 if (rt_cache_valid(rth)) { 1828 else
1755 dst_hold(&rth->dst); 1829 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1756 return rth; 1830 rth = rcu_dereference(*prth);
1757 } 1831 if (rt_cache_valid(rth)) {
1832 dst_hold(&rth->dst);
1833 return rth;
1758 } 1834 }
1759 } 1835 }
1760 rth = rt_dst_alloc(dev_out, 1836 rth = rt_dst_alloc(dev_out,
1761 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1837 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1762 IN_DEV_CONF_GET(in_dev, NOXFRM), 1838 IN_DEV_CONF_GET(in_dev, NOXFRM),
1763 fi && !fnhe); 1839 fi);
1764 if (!rth) 1840 if (!rth)
1765 return ERR_PTR(-ENOBUFS); 1841 return ERR_PTR(-ENOBUFS);
1766 1842
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1773 rth->rt_iif = orig_oif ? : 0; 1849 rth->rt_iif = orig_oif ? : 0;
1774 rth->rt_pmtu = 0; 1850 rth->rt_pmtu = 0;
1775 rth->rt_gateway = 0; 1851 rth->rt_gateway = 0;
1852 INIT_LIST_HEAD(&rth->rt_uncached);
1776 1853
1777 RT_CACHE_STAT_INC(out_slow_tot); 1854 RT_CACHE_STAT_INC(out_slow_tot);
1778 1855
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2052 rt->rt_type = ort->rt_type; 2129 rt->rt_type = ort->rt_type;
2053 rt->rt_gateway = ort->rt_gateway; 2130 rt->rt_gateway = ort->rt_gateway;
2054 2131
2132 INIT_LIST_HEAD(&rt->rt_uncached);
2133
2055 dst_free(new); 2134 dst_free(new);
2056 } 2135 }
2057 2136
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5840c3255721..4b6487a68279 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = {
784 .proc_handler = proc_dointvec 784 .proc_handler = proc_dointvec
785 }, 785 },
786 { 786 {
787 .procname = "rt_cache_rebuild_count",
788 .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
789 .maxlen = sizeof(int),
790 .mode = 0644,
791 .proc_handler = proc_dointvec
792 },
793 {
794 .procname = "ping_group_range", 787 .procname = "ping_group_range",
795 .data = &init_net.ipv4.sysctl_ping_group_range, 788 .data = &init_net.ipv4.sysctl_ping_group_range,
796 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), 789 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
@@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
829 table[5].data = 822 table[5].data =
830 &net->ipv4.sysctl_icmp_ratemask; 823 &net->ipv4.sysctl_icmp_ratemask;
831 table[6].data = 824 table[6].data =
832 &net->ipv4.sysctl_rt_cache_rebuild_count;
833 table[7].data =
834 &net->ipv4.sysctl_ping_group_range; 825 &net->ipv4.sysctl_ping_group_range;
835 826
836 } 827 }
@@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
842 net->ipv4.sysctl_ping_group_range[0] = 1; 833 net->ipv4.sysctl_ping_group_range[0] = 1;
843 net->ipv4.sysctl_ping_group_range[1] = 0; 834 net->ipv4.sysctl_ping_group_range[1] = 0;
844 835
845 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
846
847 tcp_init_mem(net); 836 tcp_init_mem(net);
848 837
849 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 838 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a356e1fecf9a..9be30b039ae3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5604 tcp_set_state(sk, TCP_ESTABLISHED); 5604 tcp_set_state(sk, TCP_ESTABLISHED);
5605 5605
5606 if (skb != NULL) { 5606 if (skb != NULL) {
5607 sk->sk_rx_dst = dst_clone(skb_dst(skb)); 5607 inet_sk_rx_dst_set(sk, skb);
5608 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
5609 security_inet_conn_established(sk, skb); 5608 security_inet_conn_established(sk, skb);
5610 } 5609 }
5611 5610
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2fbd9921253f..7f91e5ac8277 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1617#endif 1617#endif
1618 1618
1619 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1619 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1620 struct dst_entry *dst = sk->sk_rx_dst;
1621
1620 sock_rps_save_rxhash(sk, skb); 1622 sock_rps_save_rxhash(sk, skb);
1621 if (sk->sk_rx_dst) { 1623 if (dst) {
1622 struct dst_entry *dst = sk->sk_rx_dst;
1623 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1624 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1624 dst->ops->check(dst, 0) == NULL) { 1625 dst->ops->check(dst, 0) == NULL) {
1625 dst_release(dst); 1626 dst_release(dst);
1626 sk->sk_rx_dst = NULL; 1627 sk->sk_rx_dst = NULL;
1627 } 1628 }
1628 } 1629 }
1629 if (unlikely(sk->sk_rx_dst == NULL)) { 1630 if (unlikely(sk->sk_rx_dst == NULL))
1630 sk->sk_rx_dst = dst_clone(skb_dst(skb)); 1631 inet_sk_rx_dst_set(sk, skb);
1631 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 1632
1632 }
1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1634 rsk = sk; 1634 rsk = sk;
1635 goto reset; 1635 goto reset;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3f1cc2028edd..232a90c3ec86 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
387 struct tcp_sock *oldtp = tcp_sk(sk); 387 struct tcp_sock *oldtp = tcp_sk(sk);
388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values; 388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
389 389
390 newsk->sk_rx_dst = dst_clone(skb_dst(skb)); 390 inet_sk_rx_dst_set(newsk, skb);
391 inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif;
392 391
393 /* TCP Cookie Transactions require space for the cookie pair, 392 /* TCP Cookie Transactions require space for the cookie pair,
394 * as it differs for each connection. There is no need to 393 * as it differs for each connection. There is no need to
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c6281847f16a..681ea2f413e2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
92 xdst->u.rt.rt_type = rt->rt_type; 92 xdst->u.rt.rt_type = rt->rt_type;
93 xdst->u.rt.rt_gateway = rt->rt_gateway; 93 xdst->u.rt.rt_gateway = rt->rt_gateway;
94 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 94 xdst->u.rt.rt_pmtu = rt->rt_pmtu;
95 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
95 96
96 return 0; 97 return 0;
97} 98}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 47975e363fcd..a52d864d562b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb)
52 if (sysctl_ip_early_demux && !skb_dst(skb)) { 52 if (sysctl_ip_early_demux && !skb_dst(skb)) {
53 const struct inet6_protocol *ipprot; 53 const struct inet6_protocol *ipprot;
54 54
55 rcu_read_lock();
56 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); 55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
57 if (ipprot && ipprot->early_demux) 56 if (ipprot && ipprot->early_demux)
58 ipprot->early_demux(skb); 57 ipprot->early_demux(skb);
59 rcu_read_unlock();
60 } 58 }
61 if (!skb_dst(skb)) 59 if (!skb_dst(skb))
62 ip6_route_input(skb); 60 ip6_route_input(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf02cb97bbdd..8e80fd279100 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net,
2480 goto nla_put_failure; 2480 goto nla_put_failure;
2481 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2481 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2482 goto nla_put_failure; 2482 goto nla_put_failure;
2483 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2483
2484 expires = 0; 2484 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2485 else if (rt->dst.expires - jiffies < INT_MAX)
2486 expires = rt->dst.expires - jiffies;
2487 else
2488 expires = INT_MAX;
2489 2485
2490 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2486 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2491 goto nla_put_failure; 2487 goto nla_put_failure;