aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-17 13:48:26 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-17 13:48:26 -0400
commita6ff1a2f1e91578860b37df9fd861ef7af207de4 (patch)
tree1692579976add2fa59ab3fe008e4b0d36ec7ee30 /net/ipv6
parentbd2d0837abc0206ecdd3f6b9fc8c25b55b63c96b (diff)
parent4895c771c7f006b4b90f9d6b1d2210939ba57b38 (diff)
Merge branch 'nexthop_exceptions'
These patches implement the final mechanism necessary to really allow us to go without the route cache in ipv4. We need a place to have long-term storage of PMTU/redirect information which is independent of the routes themselves, yet does not get us back into a situation where we have to write to metrics or anything like that. For this we use an "next-hop exception" table in the FIB nexthops. The one thing I desperately want to avoid is having to create clone routes in the FIB trie for this purpose, because that is very expensive. However, I'm willing to entertain such an idea later if this current scheme proves to have downsides that the FIB trie variant would not have. In order to accomodate this any such scheme, we need to be able to produce a full flow key at PMTU/redirect time. That required an adjustment of the interface call-sites used to propagate these events. For a PMTU/redirect with a fully specified socket, we pass that socket and use it to produce the flow key. Otherwise we use a passed in SKB to formulate the key. There are two cases that need to be distinguished, ICMP message processing (in which case the IP header is at skb->data) and output packet processing (mostly tunnels, and in all such cases the IP header is at ip_hdr(skb)). We also have to make the code able to handle the case where the dst itself passed into the dst_ops->{update_pmtu,redirect} method is invalidated. This matters for calls from sockets that have cached that route. We provide a inet{,6} helper function for this purpose, and edit SCTP specially since it caches routes at the transport rather than socket level. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/inet6_connection_sock.c49
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/ipv6/xfrm6_policy.c10
6 files changed, 63 insertions, 64 deletions
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bceb14450a1..4a0c4d2d8b0 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
203 return dst; 203 return dst;
204} 204}
205 205
206int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) 206static struct dst_entry *inet6_csk_route_socket(struct sock *sk)
207{ 207{
208 struct sock *sk = skb->sk;
209 struct inet_sock *inet = inet_sk(sk); 208 struct inet_sock *inet = inet_sk(sk);
210 struct ipv6_pinfo *np = inet6_sk(sk); 209 struct ipv6_pinfo *np = inet6_sk(sk);
211 struct flowi6 fl6;
212 struct dst_entry *dst;
213 struct in6_addr *final_p, final; 210 struct in6_addr *final_p, final;
214 int res; 211 struct dst_entry *dst;
212 struct flowi6 fl6;
215 213
216 memset(&fl6, 0, sizeof(fl6)); 214 memset(&fl6, 0, sizeof(fl6));
217 fl6.flowi6_proto = sk->sk_protocol; 215 fl6.flowi6_proto = sk->sk_protocol;
@@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
228 final_p = fl6_update_dst(&fl6, np->opt, &final); 226 final_p = fl6_update_dst(&fl6, np->opt, &final);
229 227
230 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 228 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
231 229 if (!dst) {
232 if (dst == NULL) {
233 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); 230 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
234 231
235 if (IS_ERR(dst)) { 232 if (!IS_ERR(dst))
236 sk->sk_err_soft = -PTR_ERR(dst); 233 __inet6_csk_dst_store(sk, dst, NULL, NULL);
237 sk->sk_route_caps = 0; 234 }
238 kfree_skb(skb); 235 return dst;
239 return PTR_ERR(dst); 236}
240 }
241 237
242 __inet6_csk_dst_store(sk, dst, NULL, NULL); 238int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
239{
240 struct sock *sk = skb->sk;
241 struct ipv6_pinfo *np = inet6_sk(sk);
242 struct flowi6 fl6;
243 struct dst_entry *dst;
244 int res;
245
246 dst = inet6_csk_route_socket(sk);
247 if (IS_ERR(dst)) {
248 sk->sk_err_soft = -PTR_ERR(dst);
249 sk->sk_route_caps = 0;
250 kfree_skb(skb);
251 return PTR_ERR(dst);
243 } 252 }
244 253
245 rcu_read_lock(); 254 rcu_read_lock();
@@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
253 return res; 262 return res;
254} 263}
255EXPORT_SYMBOL_GPL(inet6_csk_xmit); 264EXPORT_SYMBOL_GPL(inet6_csk_xmit);
265
266struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
267{
268 struct dst_entry *dst = inet6_csk_route_socket(sk);
269
270 if (IS_ERR(dst))
271 return NULL;
272 dst->ops->update_pmtu(dst, sk, NULL, mtu);
273
274 return inet6_csk_route_socket(sk);
275}
276EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61d10659729..db328466796 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
609 if (rel_info > dst_mtu(skb_dst(skb2))) 609 if (rel_info > dst_mtu(skb_dst(skb2)))
610 goto out; 610 goto out;
611 611
612 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); 612 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
613 } 613 }
614 if (rel_type == ICMP_REDIRECT) 614 if (rel_type == ICMP_REDIRECT)
615 skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); 615 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
616 616
617 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 617 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
618 618
@@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
952 if (mtu < IPV6_MIN_MTU) 952 if (mtu < IPV6_MIN_MTU)
953 mtu = IPV6_MIN_MTU; 953 mtu = IPV6_MIN_MTU;
954 if (skb_dst(skb)) 954 if (skb_dst(skb))
955 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 955 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
956 if (skb->len > mtu) { 956 if (skb->len > mtu) {
957 *pmtu = mtu; 957 *pmtu = mtu;
958 err = -EMSGSIZE; 958 err = -EMSGSIZE;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 412fad809a3..84f6564dd37 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
78static int ip6_pkt_discard(struct sk_buff *skb); 78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb); 79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb); 80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); 82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
83 85
84#ifdef CONFIG_IPV6_ROUTE_INFO 86#ifdef CONFIG_IPV6_ROUTE_INFO
85static struct rt6_info *rt6_add_route_info(struct net *net, 87static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 return mtu ? : dst->dev->mtu; 189 return mtu ? : dst->dev->mtu;
188} 190}
189 191
190static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
191{ 194{
192} 195}
193 196
194static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) 197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
195{ 199{
196} 200}
197 201
@@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb)
1071 } 1075 }
1072} 1076}
1073 1077
1074static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1078static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079 struct sk_buff *skb, u32 mtu)
1075{ 1080{
1076 struct rt6_info *rt6 = (struct rt6_info*)dst; 1081 struct rt6_info *rt6 = (struct rt6_info*)dst;
1077 1082
@@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108 1113
1109 dst = ip6_route_output(net, NULL, &fl6); 1114 dst = ip6_route_output(net, NULL, &fl6);
1110 if (!dst->error) 1115 if (!dst->error)
1111 ip6_rt_update_pmtu(dst, ntohl(mtu)); 1116 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1112 dst_release(dst); 1117 dst_release(dst);
1113} 1118}
1114EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1119EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1136 1141
1137 dst = ip6_route_output(net, NULL, &fl6); 1142 dst = ip6_route_output(net, NULL, &fl6);
1138 if (!dst->error) 1143 if (!dst->error)
1139 rt6_do_redirect(dst, skb); 1144 rt6_do_redirect(dst, NULL, skb);
1140 dst_release(dst); 1145 dst_release(dst);
1141} 1146}
1142EXPORT_SYMBOL_GPL(ip6_redirect); 1147EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1639 return err; 1644 return err;
1640} 1645}
1641 1646
1642static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) 1647static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1643{ 1648{
1644 struct net *net = dev_net(skb->dev); 1649 struct net *net = dev_net(skb->dev);
1645 struct netevent_redirect netevent; 1650 struct netevent_redirect netevent;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fbf1622fdee..3bd1bfc01f8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
807 } 807 }
808 808
809 if (tunnel->parms.iph.daddr && skb_dst(skb)) 809 if (tunnel->parms.iph.daddr && skb_dst(skb))
810 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 810 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
811 811
812 if (skb->len > mtu) { 812 if (skb->len > mtu) {
813 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 813 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3071f377145..c9dabdd832d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
367 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 367 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
368 368
369 if (dst) 369 if (dst)
370 dst->ops->redirect(dst,skb); 370 dst->ops->redirect(dst, sk, skb);
371 } 371 }
372 372
373 if (type == ICMPV6_PKT_TOOBIG) { 373 if (type == ICMPV6_PKT_TOOBIG) {
@@ -378,43 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
379 goto out; 379 goto out;
380 380
381 /* icmp should have updated the destination cache entry */ 381 dst = inet6_csk_update_pmtu(sk, ntohl(info));
382 dst = __sk_dst_check(sk, np->dst_cookie); 382 if (!dst)
383 383 goto out;
384 if (dst == NULL) {
385 struct inet_sock *inet = inet_sk(sk);
386 struct flowi6 fl6;
387
388 /* BUGGG_FUTURE: Again, it is not clear how
389 to handle rthdr case. Ignore this complexity
390 for now.
391 */
392 memset(&fl6, 0, sizeof(fl6));
393 fl6.flowi6_proto = IPPROTO_TCP;
394 fl6.daddr = np->daddr;
395 fl6.saddr = np->saddr;
396 fl6.flowi6_oif = sk->sk_bound_dev_if;
397 fl6.flowi6_mark = sk->sk_mark;
398 fl6.fl6_dport = inet->inet_dport;
399 fl6.fl6_sport = inet->inet_sport;
400 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
401
402 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
403 if (IS_ERR(dst)) {
404 sk->sk_err_soft = -PTR_ERR(dst);
405 goto out;
406 }
407
408 } else
409 dst_hold(dst);
410
411 dst->ops->update_pmtu(dst, ntohl(info));
412 384
413 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 385 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
414 tcp_sync_mss(sk, dst_mtu(dst)); 386 tcp_sync_mss(sk, dst_mtu(dst));
415 tcp_simple_retransmit(sk); 387 tcp_simple_retransmit(sk);
416 } /* else let the usual retransmit timer handle it */ 388 }
417 dst_release(dst);
418 goto out; 389 goto out;
419 } 390 }
420 391
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f5a9cb8257b..ef39812107b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
207 return dst_entries_get_fast(ops) > ops->gc_thresh * 2; 207 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
208} 208}
209 209
210static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 210static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
211 struct sk_buff *skb, u32 mtu)
211{ 212{
212 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 213 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
213 struct dst_entry *path = xdst->route; 214 struct dst_entry *path = xdst->route;
214 215
215 path->ops->update_pmtu(path, mtu); 216 path->ops->update_pmtu(path, sk, skb, mtu);
216} 217}
217 218
218static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) 219static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
220 struct sk_buff *skb)
219{ 221{
220 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 222 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
221 struct dst_entry *path = xdst->route; 223 struct dst_entry *path = xdst->route;
222 224
223 path->ops->redirect(path, skb); 225 path->ops->redirect(path, sk, skb);
224} 226}
225 227
226static void xfrm6_dst_destroy(struct dst_entry *dst) 228static void xfrm6_dst_destroy(struct dst_entry *dst)