diff options
author | Julian Anastasov <ja@ssi.bg> | 2012-10-08 07:41:18 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-10-08 17:42:36 -0400 |
commit | 155e8336c373d14d87a7f91e356d85ef4b93b8f9 (patch) | |
tree | 5d31e9d9c197129fcf26fd8ae9d54da770503a6c | |
parent | f8a17175c63fd3e8b573719f7538816f8c96abf4 (diff) |
ipv4: introduce rt_uses_gateway
Add new flag to remember when route is via gateway.
We will use it to allow rt_gateway to contain address of
directly connected host for the cases when DST_NOCACHE is
used or when the NH exception caches per-destination route
without DST_NOCACHE flag, i.e. when routes are not used for
other destinations. By this way we force the neighbour
resolving to work with the routed destination but we
can use different address in the packet, feature needed
for IPVS-DR where original packet for virtual IP is routed
via route to real IP.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/route.h | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 48 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
6 files changed, 34 insertions, 28 deletions
diff --git a/include/net/route.h b/include/net/route.h index da22243d2760..bc40b633a5c4 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -48,7 +48,8 @@ struct rtable { | |||
48 | int rt_genid; | 48 | int rt_genid; |
49 | unsigned int rt_flags; | 49 | unsigned int rt_flags; |
50 | __u16 rt_type; | 50 | __u16 rt_type; |
51 | __u16 rt_is_input; | 51 | __u8 rt_is_input; |
52 | __u8 rt_uses_gateway; | ||
52 | 53 | ||
53 | int rt_iif; | 54 | int rt_iif; |
54 | 55 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0c5b9c1a957..d34ce2972c8f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
406 | rt = ip_route_output_flow(net, fl4, sk); | 406 | rt = ip_route_output_flow(net, fl4, sk); |
407 | if (IS_ERR(rt)) | 407 | if (IS_ERR(rt)) |
408 | goto no_route; | 408 | goto no_route; |
409 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) | 409 | if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) |
410 | goto route_err; | 410 | goto route_err; |
411 | return &rt->dst; | 411 | return &rt->dst; |
412 | 412 | ||
@@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | |||
442 | rt = ip_route_output_flow(net, fl4, sk); | 442 | rt = ip_route_output_flow(net, fl4, sk); |
443 | if (IS_ERR(rt)) | 443 | if (IS_ERR(rt)) |
444 | goto no_route; | 444 | goto no_route; |
445 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) | 445 | if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) |
446 | goto route_err; | 446 | goto route_err; |
447 | rcu_read_unlock(); | 447 | rcu_read_unlock(); |
448 | return &rt->dst; | 448 | return &rt->dst; |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 7f35ac26a71a..694de3b7aebf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) | |||
85 | 85 | ||
86 | rt = skb_rtable(skb); | 86 | rt = skb_rtable(skb); |
87 | 87 | ||
88 | if (opt->is_strictroute && rt->rt_gateway) | 88 | if (opt->is_strictroute && rt->rt_uses_gateway) |
89 | goto sr_failed; | 89 | goto sr_failed; |
90 | 90 | ||
91 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && | 91 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24a29a39e9a8..6537a408a4fb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
193 | } | 193 | } |
194 | 194 | ||
195 | rcu_read_lock_bh(); | 195 | rcu_read_lock_bh(); |
196 | nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; | 196 | nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); |
197 | neigh = __ipv4_neigh_lookup_noref(dev, nexthop); | 197 | neigh = __ipv4_neigh_lookup_noref(dev, nexthop); |
198 | if (unlikely(!neigh)) | 198 | if (unlikely(!neigh)) |
199 | neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); | 199 | neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); |
@@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) | |||
371 | skb_dst_set_noref(skb, &rt->dst); | 371 | skb_dst_set_noref(skb, &rt->dst); |
372 | 372 | ||
373 | packet_routed: | 373 | packet_routed: |
374 | if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) | 374 | if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) |
375 | goto no_route; | 375 | goto no_route; |
376 | 376 | ||
377 | /* OK, we know where to send it, allocate and build IP header. */ | 377 | /* OK, we know where to send it, allocate and build IP header. */ |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b0180f11b20..3a116cb0991a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1126,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1126 | mtu = dst->dev->mtu; | 1126 | mtu = dst->dev->mtu; |
1127 | 1127 | ||
1128 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | 1128 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { |
1129 | if (rt->rt_gateway && mtu > 576) | 1129 | if (rt->rt_uses_gateway && mtu > 576) |
1130 | mtu = 576; | 1130 | mtu = 576; |
1131 | } | 1131 | } |
1132 | 1132 | ||
@@ -1177,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | |||
1177 | if (fnhe->fnhe_gw) { | 1177 | if (fnhe->fnhe_gw) { |
1178 | rt->rt_flags |= RTCF_REDIRECTED; | 1178 | rt->rt_flags |= RTCF_REDIRECTED; |
1179 | rt->rt_gateway = fnhe->fnhe_gw; | 1179 | rt->rt_gateway = fnhe->fnhe_gw; |
1180 | } | 1180 | rt->rt_uses_gateway = 1; |
1181 | } else if (!rt->rt_gateway) | ||
1182 | rt->rt_gateway = daddr; | ||
1181 | 1183 | ||
1182 | orig = rcu_dereference(fnhe->fnhe_rth); | 1184 | orig = rcu_dereference(fnhe->fnhe_rth); |
1183 | rcu_assign_pointer(fnhe->fnhe_rth, rt); | 1185 | rcu_assign_pointer(fnhe->fnhe_rth, rt); |
@@ -1186,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | |||
1186 | 1188 | ||
1187 | fnhe->fnhe_stamp = jiffies; | 1189 | fnhe->fnhe_stamp = jiffies; |
1188 | ret = true; | 1190 | ret = true; |
1189 | } else { | ||
1190 | /* Routes we intend to cache in nexthop exception have | ||
1191 | * the DST_NOCACHE bit clear. However, if we are | ||
1192 | * unsuccessful at storing this route into the cache | ||
1193 | * we really need to set it. | ||
1194 | */ | ||
1195 | rt->dst.flags |= DST_NOCACHE; | ||
1196 | } | 1191 | } |
1197 | spin_unlock_bh(&fnhe_lock); | 1192 | spin_unlock_bh(&fnhe_lock); |
1198 | 1193 | ||
@@ -1215,15 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) | |||
1215 | if (prev == orig) { | 1210 | if (prev == orig) { |
1216 | if (orig) | 1211 | if (orig) |
1217 | rt_free(orig); | 1212 | rt_free(orig); |
1218 | } else { | 1213 | } else |
1219 | /* Routes we intend to cache in the FIB nexthop have | ||
1220 | * the DST_NOCACHE bit clear. However, if we are | ||
1221 | * unsuccessful at storing this route into the cache | ||
1222 | * we really need to set it. | ||
1223 | */ | ||
1224 | rt->dst.flags |= DST_NOCACHE; | ||
1225 | ret = false; | 1214 | ret = false; |
1226 | } | ||
1227 | 1215 | ||
1228 | return ret; | 1216 | return ret; |
1229 | } | 1217 | } |
@@ -1284,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1284 | if (fi) { | 1272 | if (fi) { |
1285 | struct fib_nh *nh = &FIB_RES_NH(*res); | 1273 | struct fib_nh *nh = &FIB_RES_NH(*res); |
1286 | 1274 | ||
1287 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1275 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { |
1288 | rt->rt_gateway = nh->nh_gw; | 1276 | rt->rt_gateway = nh->nh_gw; |
1277 | rt->rt_uses_gateway = 1; | ||
1278 | } | ||
1289 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1279 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1290 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1280 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1291 | rt->dst.tclassid = nh->nh_tclassid; | 1281 | rt->dst.tclassid = nh->nh_tclassid; |
@@ -1294,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1294 | cached = rt_bind_exception(rt, fnhe, daddr); | 1284 | cached = rt_bind_exception(rt, fnhe, daddr); |
1295 | else if (!(rt->dst.flags & DST_NOCACHE)) | 1285 | else if (!(rt->dst.flags & DST_NOCACHE)) |
1296 | cached = rt_cache_route(nh, rt); | 1286 | cached = rt_cache_route(nh, rt); |
1297 | } | 1287 | if (unlikely(!cached)) { |
1298 | if (unlikely(!cached)) | 1288 | /* Routes we intend to cache in nexthop exception or |
1289 | * FIB nexthop have the DST_NOCACHE bit clear. | ||
1290 | * However, if we are unsuccessful at storing this | ||
1291 | * route into the cache we really need to set it. | ||
1292 | */ | ||
1293 | rt->dst.flags |= DST_NOCACHE; | ||
1294 | if (!rt->rt_gateway) | ||
1295 | rt->rt_gateway = daddr; | ||
1296 | rt_add_uncached_list(rt); | ||
1297 | } | ||
1298 | } else | ||
1299 | rt_add_uncached_list(rt); | 1299 | rt_add_uncached_list(rt); |
1300 | 1300 | ||
1301 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1301 | #ifdef CONFIG_IP_ROUTE_CLASSID |
@@ -1363,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1363 | rth->rt_iif = 0; | 1363 | rth->rt_iif = 0; |
1364 | rth->rt_pmtu = 0; | 1364 | rth->rt_pmtu = 0; |
1365 | rth->rt_gateway = 0; | 1365 | rth->rt_gateway = 0; |
1366 | rth->rt_uses_gateway = 0; | ||
1366 | INIT_LIST_HEAD(&rth->rt_uncached); | 1367 | INIT_LIST_HEAD(&rth->rt_uncached); |
1367 | if (our) { | 1368 | if (our) { |
1368 | rth->dst.input= ip_local_deliver; | 1369 | rth->dst.input= ip_local_deliver; |
@@ -1432,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1432 | return -EINVAL; | 1433 | return -EINVAL; |
1433 | } | 1434 | } |
1434 | 1435 | ||
1435 | |||
1436 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), | 1436 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
1437 | in_dev->dev, in_dev, &itag); | 1437 | in_dev->dev, in_dev, &itag); |
1438 | if (err < 0) { | 1438 | if (err < 0) { |
@@ -1488,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1488 | rth->rt_iif = 0; | 1488 | rth->rt_iif = 0; |
1489 | rth->rt_pmtu = 0; | 1489 | rth->rt_pmtu = 0; |
1490 | rth->rt_gateway = 0; | 1490 | rth->rt_gateway = 0; |
1491 | rth->rt_uses_gateway = 0; | ||
1491 | INIT_LIST_HEAD(&rth->rt_uncached); | 1492 | INIT_LIST_HEAD(&rth->rt_uncached); |
1492 | 1493 | ||
1493 | rth->dst.input = ip_forward; | 1494 | rth->dst.input = ip_forward; |
@@ -1658,6 +1659,7 @@ local_input: | |||
1658 | rth->rt_iif = 0; | 1659 | rth->rt_iif = 0; |
1659 | rth->rt_pmtu = 0; | 1660 | rth->rt_pmtu = 0; |
1660 | rth->rt_gateway = 0; | 1661 | rth->rt_gateway = 0; |
1662 | rth->rt_uses_gateway = 0; | ||
1661 | INIT_LIST_HEAD(&rth->rt_uncached); | 1663 | INIT_LIST_HEAD(&rth->rt_uncached); |
1662 | if (res.type == RTN_UNREACHABLE) { | 1664 | if (res.type == RTN_UNREACHABLE) { |
1663 | rth->dst.input= ip_error; | 1665 | rth->dst.input= ip_error; |
@@ -1826,6 +1828,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1826 | rth->rt_iif = orig_oif ? : 0; | 1828 | rth->rt_iif = orig_oif ? : 0; |
1827 | rth->rt_pmtu = 0; | 1829 | rth->rt_pmtu = 0; |
1828 | rth->rt_gateway = 0; | 1830 | rth->rt_gateway = 0; |
1831 | rth->rt_uses_gateway = 0; | ||
1829 | INIT_LIST_HEAD(&rth->rt_uncached); | 1832 | INIT_LIST_HEAD(&rth->rt_uncached); |
1830 | 1833 | ||
1831 | RT_CACHE_STAT_INC(out_slow_tot); | 1834 | RT_CACHE_STAT_INC(out_slow_tot); |
@@ -2104,6 +2107,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2104 | rt->rt_flags = ort->rt_flags; | 2107 | rt->rt_flags = ort->rt_flags; |
2105 | rt->rt_type = ort->rt_type; | 2108 | rt->rt_type = ort->rt_type; |
2106 | rt->rt_gateway = ort->rt_gateway; | 2109 | rt->rt_gateway = ort->rt_gateway; |
2110 | rt->rt_uses_gateway = ort->rt_uses_gateway; | ||
2107 | 2111 | ||
2108 | INIT_LIST_HEAD(&rt->rt_uncached); | 2112 | INIT_LIST_HEAD(&rt->rt_uncached); |
2109 | 2113 | ||
@@ -2182,7 +2186,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | |||
2182 | if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) | 2186 | if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) |
2183 | goto nla_put_failure; | 2187 | goto nla_put_failure; |
2184 | } | 2188 | } |
2185 | if (rt->rt_gateway && | 2189 | if (rt->rt_uses_gateway && |
2186 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) | 2190 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) |
2187 | goto nla_put_failure; | 2191 | goto nla_put_failure; |
2188 | 2192 | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 681ea2f413e2..05c5ab8d983c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
91 | RTCF_LOCAL); | 91 | RTCF_LOCAL); |
92 | xdst->u.rt.rt_type = rt->rt_type; | 92 | xdst->u.rt.rt_type = rt->rt_type; |
93 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 93 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
94 | xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; | ||
94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 95 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; |
95 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | 96 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); |
96 | 97 | ||