diff options
author | David S. Miller <davem@davemloft.net> | 2012-07-31 18:06:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-31 18:06:50 -0400 |
commit | caacf05e5ad1abf0a2864863da4e33024bc68ec6 (patch) | |
tree | 9a82e97a072e288f604bd3dbbe07e1796581fad0 | |
parent | c5038a8327b980a5b279fa193163c468011de009 (diff) |
ipv4: Properly purge netdev references on uncached routes.
When a device is unregistered, we have to purge all of the
references to it that may exist in the entire system.
If a route is uncached, we currently have no way of accomplishing
this.
So create a global list that is scanned when a network device goes
down. This mirrors the logic in net/core/dst.c's dst_ifdown().
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/route.h | 3 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 1 | ||||
-rw-r--r-- | net/ipv4/route.c | 68 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
4 files changed, 69 insertions, 4 deletions
diff --git a/include/net/route.h b/include/net/route.h index 8c52bc6f1c90..776a27f1ab78 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -57,6 +57,8 @@ struct rtable { | |||
57 | 57 | ||
58 | /* Miscellaneous cached information */ | 58 | /* Miscellaneous cached information */ |
59 | u32 rt_pmtu; | 59 | u32 rt_pmtu; |
60 | |||
61 | struct list_head rt_uncached; | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | static inline bool rt_is_input_route(const struct rtable *rt) | 64 | static inline bool rt_is_input_route(const struct rtable *rt) |
@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; | |||
107 | struct in_device; | 109 | struct in_device; |
108 | extern int ip_rt_init(void); | 110 | extern int ip_rt_init(void); |
109 | extern void rt_cache_flush(struct net *net, int how); | 111 | extern void rt_cache_flush(struct net *net, int how); |
112 | extern void rt_flush_dev(struct net_device *dev); | ||
110 | extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); | 113 | extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); |
111 | extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, | 114 | extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, |
112 | struct sock *sk); | 115 | struct sock *sk); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920ed..c43ae3fba792 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1046 | 1046 | ||
1047 | if (event == NETDEV_UNREGISTER) { | 1047 | if (event == NETDEV_UNREGISTER) { |
1048 | fib_disable_ip(dev, 2, -1); | 1048 | fib_disable_ip(dev, 2, -1); |
1049 | rt_flush_dev(dev); | ||
1049 | return NOTIFY_DONE; | 1050 | return NOTIFY_DONE; |
1050 | } | 1051 | } |
1051 | 1052 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b102eeb16e34..c035251beb07 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |||
147 | struct sk_buff *skb, u32 mtu); | 147 | struct sk_buff *skb, u32 mtu); |
148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, | 148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, |
149 | struct sk_buff *skb); | 149 | struct sk_buff *skb); |
150 | static void ipv4_dst_destroy(struct dst_entry *dst); | ||
150 | 151 | ||
151 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 152 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
152 | int how) | 153 | int how) |
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
170 | .default_advmss = ipv4_default_advmss, | 171 | .default_advmss = ipv4_default_advmss, |
171 | .mtu = ipv4_mtu, | 172 | .mtu = ipv4_mtu, |
172 | .cow_metrics = ipv4_cow_metrics, | 173 | .cow_metrics = ipv4_cow_metrics, |
174 | .destroy = ipv4_dst_destroy, | ||
173 | .ifdown = ipv4_dst_ifdown, | 175 | .ifdown = ipv4_dst_ifdown, |
174 | .negative_advice = ipv4_negative_advice, | 176 | .negative_advice = ipv4_negative_advice, |
175 | .link_failure = ipv4_link_failure, | 177 | .link_failure = ipv4_link_failure, |
@@ -1175,9 +1177,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) | |||
1175 | return NULL; | 1177 | return NULL; |
1176 | } | 1178 | } |
1177 | 1179 | ||
1178 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | 1180 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1179 | __be32 daddr) | 1181 | __be32 daddr) |
1180 | { | 1182 | { |
1183 | bool ret = false; | ||
1184 | |||
1181 | spin_lock_bh(&fnhe_lock); | 1185 | spin_lock_bh(&fnhe_lock); |
1182 | 1186 | ||
1183 | if (daddr == fnhe->fnhe_daddr) { | 1187 | if (daddr == fnhe->fnhe_daddr) { |
@@ -1203,6 +1207,7 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | |||
1203 | rt_free(orig); | 1207 | rt_free(orig); |
1204 | 1208 | ||
1205 | fnhe->fnhe_stamp = jiffies; | 1209 | fnhe->fnhe_stamp = jiffies; |
1210 | ret = true; | ||
1206 | } else { | 1211 | } else { |
1207 | /* Routes we intend to cache in nexthop exception have | 1212 | /* Routes we intend to cache in nexthop exception have |
1208 | * the DST_NOCACHE bit clear. However, if we are | 1213 | * the DST_NOCACHE bit clear. However, if we are |
@@ -1212,11 +1217,14 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | |||
1212 | rt->dst.flags |= DST_NOCACHE; | 1217 | rt->dst.flags |= DST_NOCACHE; |
1213 | } | 1218 | } |
1214 | spin_unlock_bh(&fnhe_lock); | 1219 | spin_unlock_bh(&fnhe_lock); |
1220 | |||
1221 | return ret; | ||
1215 | } | 1222 | } |
1216 | 1223 | ||
1217 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | 1224 | static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
1218 | { | 1225 | { |
1219 | struct rtable *orig, *prev, **p; | 1226 | struct rtable *orig, *prev, **p; |
1227 | bool ret = true; | ||
1220 | 1228 | ||
1221 | if (rt_is_input_route(rt)) { | 1229 | if (rt_is_input_route(rt)) { |
1222 | p = (struct rtable **)&nh->nh_rth_input; | 1230 | p = (struct rtable **)&nh->nh_rth_input; |
@@ -1239,6 +1247,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | |||
1239 | */ | 1247 | */ |
1240 | nocache: | 1248 | nocache: |
1241 | rt->dst.flags |= DST_NOCACHE; | 1249 | rt->dst.flags |= DST_NOCACHE; |
1250 | ret = false; | ||
1251 | } | ||
1252 | |||
1253 | return ret; | ||
1254 | } | ||
1255 | |||
1256 | static DEFINE_SPINLOCK(rt_uncached_lock); | ||
1257 | static LIST_HEAD(rt_uncached_list); | ||
1258 | |||
1259 | static void rt_add_uncached_list(struct rtable *rt) | ||
1260 | { | ||
1261 | spin_lock_bh(&rt_uncached_lock); | ||
1262 | list_add_tail(&rt->rt_uncached, &rt_uncached_list); | ||
1263 | spin_unlock_bh(&rt_uncached_lock); | ||
1264 | } | ||
1265 | |||
1266 | static void ipv4_dst_destroy(struct dst_entry *dst) | ||
1267 | { | ||
1268 | struct rtable *rt = (struct rtable *) dst; | ||
1269 | |||
1270 | if (dst->flags & DST_NOCACHE) { | ||
1271 | spin_lock_bh(&rt_uncached_lock); | ||
1272 | list_del(&rt->rt_uncached); | ||
1273 | spin_unlock_bh(&rt_uncached_lock); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | void rt_flush_dev(struct net_device *dev) | ||
1278 | { | ||
1279 | if (!list_empty(&rt_uncached_list)) { | ||
1280 | struct net *net = dev_net(dev); | ||
1281 | struct rtable *rt; | ||
1282 | |||
1283 | spin_lock_bh(&rt_uncached_lock); | ||
1284 | list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { | ||
1285 | if (rt->dst.dev != dev) | ||
1286 | continue; | ||
1287 | rt->dst.dev = net->loopback_dev; | ||
1288 | dev_hold(rt->dst.dev); | ||
1289 | dev_put(dev); | ||
1290 | } | ||
1291 | spin_unlock_bh(&rt_uncached_lock); | ||
1242 | } | 1292 | } |
1243 | } | 1293 | } |
1244 | 1294 | ||
@@ -1254,6 +1304,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1254 | struct fib_nh_exception *fnhe, | 1304 | struct fib_nh_exception *fnhe, |
1255 | struct fib_info *fi, u16 type, u32 itag) | 1305 | struct fib_info *fi, u16 type, u32 itag) |
1256 | { | 1306 | { |
1307 | bool cached = false; | ||
1308 | |||
1257 | if (fi) { | 1309 | if (fi) { |
1258 | struct fib_nh *nh = &FIB_RES_NH(*res); | 1310 | struct fib_nh *nh = &FIB_RES_NH(*res); |
1259 | 1311 | ||
@@ -1264,10 +1316,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1264 | rt->dst.tclassid = nh->nh_tclassid; | 1316 | rt->dst.tclassid = nh->nh_tclassid; |
1265 | #endif | 1317 | #endif |
1266 | if (unlikely(fnhe)) | 1318 | if (unlikely(fnhe)) |
1267 | rt_bind_exception(rt, fnhe, daddr); | 1319 | cached = rt_bind_exception(rt, fnhe, daddr); |
1268 | else if (!(rt->dst.flags & DST_NOCACHE)) | 1320 | else if (!(rt->dst.flags & DST_NOCACHE)) |
1269 | rt_cache_route(nh, rt); | 1321 | cached = rt_cache_route(nh, rt); |
1270 | } | 1322 | } |
1323 | if (unlikely(!cached)) | ||
1324 | rt_add_uncached_list(rt); | ||
1271 | 1325 | ||
1272 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1326 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1273 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1327 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -1334,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1334 | rth->rt_iif = 0; | 1388 | rth->rt_iif = 0; |
1335 | rth->rt_pmtu = 0; | 1389 | rth->rt_pmtu = 0; |
1336 | rth->rt_gateway = 0; | 1390 | rth->rt_gateway = 0; |
1391 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1337 | if (our) { | 1392 | if (our) { |
1338 | rth->dst.input= ip_local_deliver; | 1393 | rth->dst.input= ip_local_deliver; |
1339 | rth->rt_flags |= RTCF_LOCAL; | 1394 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1459,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1459 | rth->rt_iif = 0; | 1514 | rth->rt_iif = 0; |
1460 | rth->rt_pmtu = 0; | 1515 | rth->rt_pmtu = 0; |
1461 | rth->rt_gateway = 0; | 1516 | rth->rt_gateway = 0; |
1517 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1462 | 1518 | ||
1463 | rth->dst.input = ip_forward; | 1519 | rth->dst.input = ip_forward; |
1464 | rth->dst.output = ip_output; | 1520 | rth->dst.output = ip_output; |
@@ -1625,6 +1681,7 @@ local_input: | |||
1625 | rth->rt_iif = 0; | 1681 | rth->rt_iif = 0; |
1626 | rth->rt_pmtu = 0; | 1682 | rth->rt_pmtu = 0; |
1627 | rth->rt_gateway = 0; | 1683 | rth->rt_gateway = 0; |
1684 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1628 | if (res.type == RTN_UNREACHABLE) { | 1685 | if (res.type == RTN_UNREACHABLE) { |
1629 | rth->dst.input= ip_error; | 1686 | rth->dst.input= ip_error; |
1630 | rth->dst.error= -err; | 1687 | rth->dst.error= -err; |
@@ -1792,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1792 | rth->rt_iif = orig_oif ? : 0; | 1849 | rth->rt_iif = orig_oif ? : 0; |
1793 | rth->rt_pmtu = 0; | 1850 | rth->rt_pmtu = 0; |
1794 | rth->rt_gateway = 0; | 1851 | rth->rt_gateway = 0; |
1852 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
1795 | 1853 | ||
1796 | RT_CACHE_STAT_INC(out_slow_tot); | 1854 | RT_CACHE_STAT_INC(out_slow_tot); |
1797 | 1855 | ||
@@ -2071,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2071 | rt->rt_type = ort->rt_type; | 2129 | rt->rt_type = ort->rt_type; |
2072 | rt->rt_gateway = ort->rt_gateway; | 2130 | rt->rt_gateway = ort->rt_gateway; |
2073 | 2131 | ||
2132 | INIT_LIST_HEAD(&rt->rt_uncached); | ||
2133 | |||
2074 | dst_free(new); | 2134 | dst_free(new); |
2075 | } | 2135 | } |
2076 | 2136 | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16a..681ea2f413e2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
92 | xdst->u.rt.rt_type = rt->rt_type; | 92 | xdst->u.rt.rt_type = rt->rt_type; |
93 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 93 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; |
95 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | ||
95 | 96 | ||
96 | return 0; | 97 | return 0; |
97 | } | 98 | } |