aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-17 13:48:26 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-17 13:48:26 -0400
commita6ff1a2f1e91578860b37df9fd861ef7af207de4 (patch)
tree1692579976add2fa59ab3fe008e4b0d36ec7ee30
parentbd2d0837abc0206ecdd3f6b9fc8c25b55b63c96b (diff)
parent4895c771c7f006b4b90f9d6b1d2210939ba57b38 (diff)
Merge branch 'nexthop_exceptions'
These patches implement the final mechanism necessary to really allow us to go without the route cache in ipv4. We need a place to have long-term storage of PMTU/redirect information which is independent of the routes themselves, yet does not get us back into a situation where we have to write to metrics or anything like that. For this we use an "next-hop exception" table in the FIB nexthops. The one thing I desperately want to avoid is having to create clone routes in the FIB trie for this purpose, because that is very expensive. However, I'm willing to entertain such an idea later if this current scheme proves to have downsides that the FIB trie variant would not have. In order to accomodate this any such scheme, we need to be able to produce a full flow key at PMTU/redirect time. That required an adjustment of the interface call-sites used to propagate these events. For a PMTU/redirect with a fully specified socket, we pass that socket and use it to produce the flow key. Otherwise we use a passed in SKB to formulate the key. There are two cases that need to be distinguished, ICMP message processing (in which case the IP header is at skb->data) and output packet processing (mostly tunnels, and in all such cases the IP header is at ip_hdr(skb)). We also have to make the code able to handle the case where the dst itself passed into the dst_ops->{update_pmtu,redirect} method is invalidated. This matters for calls from sockets that have cached that route. We provide a inet{,6} helper function for this purpose, and edit SCTP specially since it caches routes at the transport rather than socket level. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--include/net/dst_ops.h6
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/ip_fib.h18
-rw-r--r--include/net/sctp/sctp.h4
-rw-r--r--include/net/sctp/structs.h4
-rw-r--r--net/bridge/br_netfilter.c6
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/decnet/dn_route.c12
-rw-r--r--net/ipv4/fib_semantics.c23
-rw-r--r--net/ipv4/inet_connection_sock.c46
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/route.c267
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/xfrm4_policy.c10
-rw-r--r--net/ipv6/inet6_connection_sock.c49
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/input.c6
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/socket.c6
-rw-r--r--net/sctp/transport.c14
30 files changed, 449 insertions, 183 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 014504d8e43c..1ca732201f33 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1397 int e = skb_queue_empty(&priv->cm.skb_queue); 1397 int e = skb_queue_empty(&priv->cm.skb_queue);
1398 1398
1399 if (skb_dst(skb)) 1399 if (skb_dst(skb))
1400 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 1400 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1401 1401
1402 skb_queue_tail(&priv->cm.skb_queue, skb); 1402 skb_queue_tail(&priv->cm.skb_queue, skb);
1403 if (e) 1403 if (e)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 085931fa7ce0..d079fc61c123 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -24,8 +24,10 @@ struct dst_ops {
24 struct net_device *dev, int how); 24 struct net_device *dev, int how);
25 struct dst_entry * (*negative_advice)(struct dst_entry *); 25 struct dst_entry * (*negative_advice)(struct dst_entry *);
26 void (*link_failure)(struct sk_buff *); 26 void (*link_failure)(struct sk_buff *);
27 void (*update_pmtu)(struct dst_entry *dst, u32 mtu); 27 void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
28 void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); 28 struct sk_buff *skb, u32 mtu);
29 void (*redirect)(struct dst_entry *dst, struct sock *sk,
30 struct sk_buff *skb);
29 int (*local_out)(struct sk_buff *skb); 31 int (*local_out)(struct sk_buff *skb);
30 struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, 32 struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
31 struct sk_buff *skb, 33 struct sk_buff *skb,
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index df2a857e853d..04642c920431 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -43,4 +43,6 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
43extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); 43extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
44 44
45extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl); 45extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);
46
47extern struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
46#endif /* _INET6_CONNECTION_SOCK_H */ 48#endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 291e7cee14e7..2cf44b4ed2e6 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
337 char __user *optval, int __user *optlen); 337 char __user *optval, int __user *optlen);
338extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 338extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
339 char __user *optval, unsigned int optlen); 339 char __user *optval, unsigned int optlen);
340
341extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
340#endif /* _INET_CONNECTION_SOCK_H */ 342#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5697acefeba3..e9ee1ca07087 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -18,6 +18,7 @@
18 18
19#include <net/flow.h> 19#include <net/flow.h>
20#include <linux/seq_file.h> 20#include <linux/seq_file.h>
21#include <linux/rcupdate.h>
21#include <net/fib_rules.h> 22#include <net/fib_rules.h>
22#include <net/inetpeer.h> 23#include <net/inetpeer.h>
23 24
@@ -46,6 +47,22 @@ struct fib_config {
46 47
47struct fib_info; 48struct fib_info;
48 49
50struct fib_nh_exception {
51 struct fib_nh_exception __rcu *fnhe_next;
52 __be32 fnhe_daddr;
53 u32 fnhe_pmtu;
54 u32 fnhe_gw;
55 unsigned long fnhe_expires;
56 unsigned long fnhe_stamp;
57};
58
59struct fnhe_hash_bucket {
60 struct fib_nh_exception __rcu *chain;
61};
62
63#define FNHE_HASH_SIZE 2048
64#define FNHE_RECLAIM_DEPTH 5
65
49struct fib_nh { 66struct fib_nh {
50 struct net_device *nh_dev; 67 struct net_device *nh_dev;
51 struct hlist_node nh_hash; 68 struct hlist_node nh_hash;
@@ -63,6 +80,7 @@ struct fib_nh {
63 __be32 nh_gw; 80 __be32 nh_gw;
64 __be32 nh_saddr; 81 __be32 nh_saddr;
65 int nh_saddr_genid; 82 int nh_saddr_genid;
83 struct fnhe_hash_bucket *nh_exceptions;
66}; 84};
67 85
68/* 86/*
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1f2735dba753..ff499640528b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -519,10 +519,10 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
519 return frag; 519 return frag;
520} 520}
521 521
522static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) 522static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
523{ 523{
524 524
525 sctp_assoc_sync_pmtu(asoc); 525 sctp_assoc_sync_pmtu(sk, asoc);
526 asoc->pmtu_pending = 0; 526 asoc->pmtu_pending = 0;
527} 527}
528 528
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fecdf31816f2..536e439ddf1d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1091,7 +1091,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
1091void sctp_transport_burst_reset(struct sctp_transport *); 1091void sctp_transport_burst_reset(struct sctp_transport *);
1092unsigned long sctp_transport_timeout(struct sctp_transport *); 1092unsigned long sctp_transport_timeout(struct sctp_transport *);
1093void sctp_transport_reset(struct sctp_transport *); 1093void sctp_transport_reset(struct sctp_transport *);
1094void sctp_transport_update_pmtu(struct sctp_transport *, u32); 1094void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
1095void sctp_transport_immediate_rtx(struct sctp_transport *); 1095void sctp_transport_immediate_rtx(struct sctp_transport *);
1096 1096
1097 1097
@@ -2003,7 +2003,7 @@ void sctp_assoc_update(struct sctp_association *old,
2003 2003
2004__u32 sctp_association_get_next_tsn(struct sctp_association *); 2004__u32 sctp_association_get_next_tsn(struct sctp_association *);
2005 2005
2006void sctp_assoc_sync_pmtu(struct sctp_association *); 2006void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
2007void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); 2007void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
2008void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); 2008void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
2009void sctp_assoc_set_primary(struct sctp_association *, 2009void sctp_assoc_set_primary(struct sctp_association *,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 81f76c402cf2..68e8f364bbf8 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
111 pppoe_proto(skb) == htons(PPP_IPV6) && \ 111 pppoe_proto(skb) == htons(PPP_IPV6) && \
112 brnf_filter_pppoe_tagged) 112 brnf_filter_pppoe_tagged)
113 113
114static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) 114static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
115 struct sk_buff *skb, u32 mtu)
115{ 116{
116} 117}
117 118
118static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) 119static void fake_redirect(struct dst_entry *dst, struct sock *sk,
120 struct sk_buff *skb)
119{ 121{
120} 122}
121 123
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 129ed8f74138..ab4f44c9bb21 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
161 if (sk->sk_state == DCCP_LISTEN) 161 if (sk->sk_state == DCCP_LISTEN)
162 return; 162 return;
163 163
164 /* We don't check in the destentry if pmtu discovery is forbidden 164 dst = inet_csk_update_pmtu(sk, mtu);
165 * on this route. We just assume that no packet_to_big packets 165 if (!dst)
166 * are send back when pmtu discovery is not active.
167 * There is a small race when the user changes this flag in the
168 * route, but I think that's acceptable.
169 */
170 if ((dst = __sk_dst_check(sk, 0)) == NULL)
171 return; 166 return;
172 167
173 dst->ops->update_pmtu(dst, mtu);
174
175 /* Something is about to be wrong... Remember soft error 168 /* Something is about to be wrong... Remember soft error
176 * for the case, if this connection will not able to recover. 169 * for the case, if this connection will not able to recover.
177 */ 170 */
@@ -200,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
200 struct dst_entry *dst = __sk_dst_check(sk, 0); 193 struct dst_entry *dst = __sk_dst_check(sk, 0);
201 194
202 if (dst) 195 if (dst)
203 dst->ops->redirect(dst, skb); 196 dst->ops->redirect(dst, sk, skb);
204} 197}
205 198
206/* 199/*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 090c0800ce03..56840b249f3b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
134 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 134 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
135 135
136 if (dst) 136 if (dst)
137 dst->ops->redirect(dst, skb); 137 dst->ops->redirect(dst, sk, skb);
138 } 138 }
139 139
140 if (type == ICMPV6_PKT_TOOBIG) { 140 if (type == ICMPV6_PKT_TOOBIG) {
@@ -145,39 +145,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
145 if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) 145 if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
146 goto out; 146 goto out;
147 147
148 /* icmp should have updated the destination cache entry */ 148 dst = inet6_csk_update_pmtu(sk, ntohl(info));
149 dst = __sk_dst_check(sk, np->dst_cookie); 149 if (!dst)
150 if (dst == NULL) { 150 goto out;
151 struct inet_sock *inet = inet_sk(sk);
152 struct flowi6 fl6;
153
154 /* BUGGG_FUTURE: Again, it is not clear how
155 to handle rthdr case. Ignore this complexity
156 for now.
157 */
158 memset(&fl6, 0, sizeof(fl6));
159 fl6.flowi6_proto = IPPROTO_DCCP;
160 fl6.daddr = np->daddr;
161 fl6.saddr = np->saddr;
162 fl6.flowi6_oif = sk->sk_bound_dev_if;
163 fl6.fl6_dport = inet->inet_dport;
164 fl6.fl6_sport = inet->inet_sport;
165 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
166
167 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
168 if (IS_ERR(dst)) {
169 sk->sk_err_soft = -PTR_ERR(dst);
170 goto out;
171 }
172 } else
173 dst_hold(dst);
174
175 dst->ops->update_pmtu(dst, ntohl(info));
176 151
177 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 152 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
178 dccp_sync_mss(sk, dst_mtu(dst)); 153 dccp_sync_mss(sk, dst_mtu(dst));
179 } /* else let the usual retransmit timer handle it */
180 dst_release(dst);
181 goto out; 154 goto out;
182 } 155 }
183 156
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index e9c4e2e864c6..47de90d8fe94 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *);
117static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); 117static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
118static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 118static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
119static void dn_dst_link_failure(struct sk_buff *); 119static void dn_dst_link_failure(struct sk_buff *);
120static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 120static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
121static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); 121 struct sk_buff *skb , u32 mtu);
122static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
123 struct sk_buff *skb);
122static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, 124static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
123 struct sk_buff *skb, 125 struct sk_buff *skb,
124 const void *daddr); 126 const void *daddr);
@@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops)
266 * We update both the mtu and the advertised mss (i.e. the segment size we 268 * We update both the mtu and the advertised mss (i.e. the segment size we
267 * advertise to the other end). 269 * advertise to the other end).
268 */ 270 */
269static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) 271static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb, u32 mtu)
270{ 273{
271 struct dn_route *rt = (struct dn_route *) dst; 274 struct dn_route *rt = (struct dn_route *) dst;
272 struct neighbour *n = rt->n; 275 struct neighbour *n = rt->n;
@@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
294 } 297 }
295} 298}
296 299
297static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) 300static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
301 struct sk_buff *skb)
298{ 302{
299} 303}
300 304
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d71bfbdc0bf4..1e09852df512 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,27 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
140 }, 140 },
141}; 141};
142 142
143static void free_nh_exceptions(struct fib_nh *nh)
144{
145 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
146 int i;
147
148 for (i = 0; i < FNHE_HASH_SIZE; i++) {
149 struct fib_nh_exception *fnhe;
150
151 fnhe = rcu_dereference(hash[i].chain);
152 while (fnhe) {
153 struct fib_nh_exception *next;
154
155 next = rcu_dereference(fnhe->fnhe_next);
156 kfree(fnhe);
157
158 fnhe = next;
159 }
160 }
161 kfree(hash);
162}
163
143/* Release a nexthop info record */ 164/* Release a nexthop info record */
144static void free_fib_info_rcu(struct rcu_head *head) 165static void free_fib_info_rcu(struct rcu_head *head)
145{ 166{
@@ -148,6 +169,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
148 change_nexthops(fi) { 169 change_nexthops(fi) {
149 if (nexthop_nh->nh_dev) 170 if (nexthop_nh->nh_dev)
150 dev_put(nexthop_nh->nh_dev); 171 dev_put(nexthop_nh->nh_dev);
172 if (nexthop_nh->nh_exceptions)
173 free_nh_exceptions(nexthop_nh);
151 } endfor_nexthops(fi); 174 } endfor_nexthops(fi);
152 175
153 release_net(fi->fib_net); 176 release_net(fi->fib_net);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 76825be3b643..3ea465286a39 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
803} 803}
804EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 804EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
805#endif 805#endif
806
807static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
808{
809 struct inet_sock *inet = inet_sk(sk);
810 struct ip_options_rcu *inet_opt;
811 __be32 daddr = inet->inet_daddr;
812 struct flowi4 *fl4;
813 struct rtable *rt;
814
815 rcu_read_lock();
816 inet_opt = rcu_dereference(inet->inet_opt);
817 if (inet_opt && inet_opt->opt.srr)
818 daddr = inet_opt->opt.faddr;
819 fl4 = &fl->u.ip4;
820 rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
821 inet->inet_saddr, inet->inet_dport,
822 inet->inet_sport, sk->sk_protocol,
823 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
824 if (IS_ERR(rt))
825 rt = NULL;
826 if (rt)
827 sk_setup_caps(sk, &rt->dst);
828 rcu_read_unlock();
829
830 return &rt->dst;
831}
832
833struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
834{
835 struct dst_entry *dst = __sk_dst_check(sk, 0);
836 struct inet_sock *inet = inet_sk(sk);
837
838 if (!dst) {
839 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
840 if (!dst)
841 goto out;
842 }
843 dst->ops->update_pmtu(dst, sk, NULL, mtu);
844
845 dst = __sk_dst_check(sk, 0);
846 if (!dst)
847 dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
848out:
849 return dst;
850}
851EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0c3123566d76..42c44b1403c9 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
833 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 833 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
834 834
835 if (skb_dst(skb)) 835 if (skb_dst(skb))
836 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 836 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
837 837
838 if (skb->protocol == htons(ETH_P_IP)) { 838 if (skb->protocol == htons(ETH_P_IP)) {
839 df |= (old_iph->frag_off&htons(IP_DF)); 839 df |= (old_iph->frag_off&htons(IP_DF));
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c2d0e6d8baaf..2c2c35bace76 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
519 } 519 }
520 520
521 if (skb_dst(skb)) 521 if (skb_dst(skb))
522 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 522 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
523 523
524 if ((old_iph->frag_off & htons(IP_DF)) && 524 if ((old_iph->frag_off & htons(IP_DF)) &&
525 mtu < ntohs(old_iph->tot_len)) { 525 mtu < ntohs(old_iph->tot_len)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aad21819316d..a5bd0b4acc61 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst);
148static void ipv4_dst_destroy(struct dst_entry *dst); 148static void ipv4_dst_destroy(struct dst_entry *dst);
149static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 149static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
150static void ipv4_link_failure(struct sk_buff *skb); 150static void ipv4_link_failure(struct sk_buff *skb);
151static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 151static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
152static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); 152 struct sk_buff *skb, u32 mtu);
153static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
154 struct sk_buff *skb);
153static int rt_garbage_collect(struct dst_ops *ops); 155static int rt_garbage_collect(struct dst_ops *ops);
154 156
155static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 157static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1273,14 +1275,130 @@ static void rt_del(unsigned int hash, struct rtable *rt)
1273 spin_unlock_bh(rt_hash_lock_addr(hash)); 1275 spin_unlock_bh(rt_hash_lock_addr(hash));
1274} 1276}
1275 1277
1276static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) 1278static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
1279 const struct iphdr *iph,
1280 int oif, u8 tos,
1281 u8 prot, u32 mark, int flow_flags)
1282{
1283 if (sk) {
1284 const struct inet_sock *inet = inet_sk(sk);
1285
1286 oif = sk->sk_bound_dev_if;
1287 mark = sk->sk_mark;
1288 tos = RT_CONN_FLAGS(sk);
1289 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
1290 }
1291 flowi4_init_output(fl4, oif, mark, tos,
1292 RT_SCOPE_UNIVERSE, prot,
1293 flow_flags,
1294 iph->daddr, iph->saddr, 0, 0);
1295}
1296
1297static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk)
1298{
1299 const struct iphdr *iph = ip_hdr(skb);
1300 int oif = skb->dev->ifindex;
1301 u8 tos = RT_TOS(iph->tos);
1302 u8 prot = iph->protocol;
1303 u32 mark = skb->mark;
1304
1305 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
1306}
1307
1308static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk)
1309{
1310 const struct inet_sock *inet = inet_sk(sk);
1311 struct ip_options_rcu *inet_opt;
1312 __be32 daddr = inet->inet_daddr;
1313
1314 rcu_read_lock();
1315 inet_opt = rcu_dereference(inet->inet_opt);
1316 if (inet_opt && inet_opt->opt.srr)
1317 daddr = inet_opt->opt.faddr;
1318 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
1319 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
1320 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
1321 inet_sk_flowi_flags(sk),
1322 daddr, inet->inet_saddr, 0, 0);
1323 rcu_read_unlock();
1324}
1325
1326static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk,
1327 struct sk_buff *skb)
1328{
1329 if (skb)
1330 build_skb_flow_key(fl4, skb, sk);
1331 else
1332 build_sk_flow_key(fl4, sk);
1333}
1334
1335static DEFINE_SPINLOCK(fnhe_lock);
1336
1337static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr)
1338{
1339 struct fib_nh_exception *fnhe, *oldest;
1340
1341 oldest = rcu_dereference(hash->chain);
1342 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
1343 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1344 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
1345 oldest = fnhe;
1346 }
1347 return oldest;
1348}
1349
1350static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr)
1351{
1352 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1353 struct fib_nh_exception *fnhe;
1354 int depth;
1355 u32 hval;
1356
1357 if (!hash) {
1358 hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash),
1359 GFP_ATOMIC);
1360 if (!hash)
1361 return NULL;
1362 }
1363
1364 hval = (__force u32) daddr;
1365 hval ^= (hval >> 11) ^ (hval >> 22);
1366 hash += hval;
1367
1368 depth = 0;
1369 for (fnhe = rcu_dereference(hash->chain); fnhe;
1370 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1371 if (fnhe->fnhe_daddr == daddr)
1372 goto out;
1373 depth++;
1374 }
1375
1376 if (depth > FNHE_RECLAIM_DEPTH) {
1377 fnhe = fnhe_oldest(hash + hval, daddr);
1378 goto out_daddr;
1379 }
1380 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
1381 if (!fnhe)
1382 return NULL;
1383
1384 fnhe->fnhe_next = hash->chain;
1385 rcu_assign_pointer(hash->chain, fnhe);
1386
1387out_daddr:
1388 fnhe->fnhe_daddr = daddr;
1389out:
1390 fnhe->fnhe_stamp = jiffies;
1391 return fnhe;
1392}
1393
1394static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4)
1277{ 1395{
1278 __be32 new_gw = icmp_hdr(skb)->un.gateway; 1396 __be32 new_gw = icmp_hdr(skb)->un.gateway;
1279 __be32 old_gw = ip_hdr(skb)->saddr; 1397 __be32 old_gw = ip_hdr(skb)->saddr;
1280 struct net_device *dev = skb->dev; 1398 struct net_device *dev = skb->dev;
1281 struct in_device *in_dev; 1399 struct in_device *in_dev;
1400 struct fib_result res;
1282 struct neighbour *n; 1401 struct neighbour *n;
1283 struct rtable *rt;
1284 struct net *net; 1402 struct net *net;
1285 1403
1286 switch (icmp_hdr(skb)->code & 7) { 1404 switch (icmp_hdr(skb)->code & 7) {
@@ -1294,7 +1412,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
1294 return; 1412 return;
1295 } 1413 }
1296 1414
1297 rt = (struct rtable *) dst;
1298 if (rt->rt_gateway != old_gw) 1415 if (rt->rt_gateway != old_gw)
1299 return; 1416 return;
1300 1417
@@ -1318,11 +1435,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
1318 goto reject_redirect; 1435 goto reject_redirect;
1319 } 1436 }
1320 1437
1321 n = ipv4_neigh_lookup(dst, NULL, &new_gw); 1438 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
1322 if (n) { 1439 if (n) {
1323 if (!(n->nud_state & NUD_VALID)) { 1440 if (!(n->nud_state & NUD_VALID)) {
1324 neigh_event_send(n, NULL); 1441 neigh_event_send(n, NULL);
1325 } else { 1442 } else {
1443 if (fib_lookup(net, fl4, &res) == 0) {
1444 struct fib_nh *nh = &FIB_RES_NH(res);
1445 struct fib_nh_exception *fnhe;
1446
1447 spin_lock_bh(&fnhe_lock);
1448 fnhe = find_or_create_fnhe(nh, fl4->daddr);
1449 if (fnhe)
1450 fnhe->fnhe_gw = new_gw;
1451 spin_unlock_bh(&fnhe_lock);
1452 }
1326 rt->rt_gateway = new_gw; 1453 rt->rt_gateway = new_gw;
1327 rt->rt_flags |= RTCF_REDIRECTED; 1454 rt->rt_flags |= RTCF_REDIRECTED;
1328 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 1455 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
@@ -1347,6 +1474,17 @@ reject_redirect:
1347 ; 1474 ;
1348} 1475}
1349 1476
1477static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1478{
1479 struct rtable *rt;
1480 struct flowi4 fl4;
1481
1482 rt = (struct rtable *) dst;
1483
1484 ip_rt_build_flow_key(&fl4, sk, skb);
1485 __ip_do_redirect(rt, skb, &fl4);
1486}
1487
1350static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1488static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1351{ 1489{
1352 struct rtable *rt = (struct rtable *)dst; 1490 struct rtable *rt = (struct rtable *)dst;
@@ -1506,32 +1644,51 @@ out: kfree_skb(skb);
1506 return 0; 1644 return 0;
1507} 1645}
1508 1646
1509static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1647static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1510{ 1648{
1511 struct rtable *rt = (struct rtable *) dst; 1649 struct fib_result res;
1512
1513 dst_confirm(dst);
1514 1650
1515 if (mtu < ip_rt_min_pmtu) 1651 if (mtu < ip_rt_min_pmtu)
1516 mtu = ip_rt_min_pmtu; 1652 mtu = ip_rt_min_pmtu;
1517 1653
1654 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
1655 struct fib_nh *nh = &FIB_RES_NH(res);
1656 struct fib_nh_exception *fnhe;
1657
1658 spin_lock_bh(&fnhe_lock);
1659 fnhe = find_or_create_fnhe(nh, fl4->daddr);
1660 if (fnhe) {
1661 fnhe->fnhe_pmtu = mtu;
1662 fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires;
1663 }
1664 spin_unlock_bh(&fnhe_lock);
1665 }
1518 rt->rt_pmtu = mtu; 1666 rt->rt_pmtu = mtu;
1519 dst_set_expires(&rt->dst, ip_rt_mtu_expires); 1667 dst_set_expires(&rt->dst, ip_rt_mtu_expires);
1520} 1668}
1521 1669
1670static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1671 struct sk_buff *skb, u32 mtu)
1672{
1673 struct rtable *rt = (struct rtable *) dst;
1674 struct flowi4 fl4;
1675
1676 ip_rt_build_flow_key(&fl4, sk, skb);
1677 __ip_rt_update_pmtu(rt, &fl4, mtu);
1678}
1679
1522void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, 1680void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1523 int oif, u32 mark, u8 protocol, int flow_flags) 1681 int oif, u32 mark, u8 protocol, int flow_flags)
1524{ 1682{
1525 const struct iphdr *iph = (const struct iphdr *)skb->data; 1683 const struct iphdr *iph = (const struct iphdr *) skb->data;
1526 struct flowi4 fl4; 1684 struct flowi4 fl4;
1527 struct rtable *rt; 1685 struct rtable *rt;
1528 1686
1529 flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, 1687 __build_flow_key(&fl4, NULL, iph, oif,
1530 protocol, flow_flags, 1688 RT_TOS(iph->tos), protocol, mark, flow_flags);
1531 iph->daddr, iph->saddr, 0, 0);
1532 rt = __ip_route_output_key(net, &fl4); 1689 rt = __ip_route_output_key(net, &fl4);
1533 if (!IS_ERR(rt)) { 1690 if (!IS_ERR(rt)) {
1534 ip_rt_update_pmtu(&rt->dst, mtu); 1691 __ip_rt_update_pmtu(rt, &fl4, mtu);
1535 ip_rt_put(rt); 1692 ip_rt_put(rt);
1536 } 1693 }
1537} 1694}
@@ -1539,27 +1696,31 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1539 1696
1540void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 1697void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1541{ 1698{
1542 const struct inet_sock *inet = inet_sk(sk); 1699 const struct iphdr *iph = (const struct iphdr *) skb->data;
1700 struct flowi4 fl4;
1701 struct rtable *rt;
1543 1702
1544 return ipv4_update_pmtu(skb, sock_net(sk), mtu, 1703 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1545 sk->sk_bound_dev_if, sk->sk_mark, 1704 rt = __ip_route_output_key(sock_net(sk), &fl4);
1546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 1705 if (!IS_ERR(rt)) {
1547 inet_sk_flowi_flags(sk)); 1706 __ip_rt_update_pmtu(rt, &fl4, mtu);
1707 ip_rt_put(rt);
1708 }
1548} 1709}
1549EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 1710EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1550 1711
1551void ipv4_redirect(struct sk_buff *skb, struct net *net, 1712void ipv4_redirect(struct sk_buff *skb, struct net *net,
1552 int oif, u32 mark, u8 protocol, int flow_flags) 1713 int oif, u32 mark, u8 protocol, int flow_flags)
1553{ 1714{
1554 const struct iphdr *iph = (const struct iphdr *)skb->data; 1715 const struct iphdr *iph = (const struct iphdr *) skb->data;
1555 struct flowi4 fl4; 1716 struct flowi4 fl4;
1556 struct rtable *rt; 1717 struct rtable *rt;
1557 1718
1558 flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, 1719 __build_flow_key(&fl4, NULL, iph, oif,
1559 protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); 1720 RT_TOS(iph->tos), protocol, mark, flow_flags);
1560 rt = __ip_route_output_key(net, &fl4); 1721 rt = __ip_route_output_key(net, &fl4);
1561 if (!IS_ERR(rt)) { 1722 if (!IS_ERR(rt)) {
1562 ip_do_redirect(&rt->dst, skb); 1723 __ip_do_redirect(rt, skb, &fl4);
1563 ip_rt_put(rt); 1724 ip_rt_put(rt);
1564 } 1725 }
1565} 1726}
@@ -1567,12 +1728,16 @@ EXPORT_SYMBOL_GPL(ipv4_redirect);
1567 1728
1568void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) 1729void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1569{ 1730{
1570 const struct inet_sock *inet = inet_sk(sk); 1731 const struct iphdr *iph = (const struct iphdr *) skb->data;
1732 struct flowi4 fl4;
1733 struct rtable *rt;
1571 1734
1572 return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, 1735 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1573 sk->sk_mark, 1736 rt = __ip_route_output_key(sock_net(sk), &fl4);
1574 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 1737 if (!IS_ERR(rt)) {
1575 inet_sk_flowi_flags(sk)); 1738 __ip_do_redirect(rt, skb, &fl4);
1739 ip_rt_put(rt);
1740 }
1576} 1741}
1577EXPORT_SYMBOL_GPL(ipv4_sk_redirect); 1742EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1578 1743
@@ -1719,14 +1884,46 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1719 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1884 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1720} 1885}
1721 1886
1887static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr)
1888{
1889 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1890 struct fib_nh_exception *fnhe;
1891 u32 hval;
1892
1893 hval = (__force u32) daddr;
1894 hval ^= (hval >> 11) ^ (hval >> 22);
1895
1896 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1897 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1898 if (fnhe->fnhe_daddr == daddr) {
1899 if (fnhe->fnhe_pmtu) {
1900 unsigned long expires = fnhe->fnhe_expires;
1901 unsigned long diff = jiffies - expires;
1902
1903 if (time_before(jiffies, expires)) {
1904 rt->rt_pmtu = fnhe->fnhe_pmtu;
1905 dst_set_expires(&rt->dst, diff);
1906 }
1907 }
1908 if (fnhe->fnhe_gw)
1909 rt->rt_gateway = fnhe->fnhe_gw;
1910 fnhe->fnhe_stamp = jiffies;
1911 break;
1912 }
1913 }
1914}
1915
1722static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, 1916static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1723 const struct fib_result *res, 1917 const struct fib_result *res,
1724 struct fib_info *fi, u16 type, u32 itag) 1918 struct fib_info *fi, u16 type, u32 itag)
1725{ 1919{
1726 if (fi) { 1920 if (fi) {
1727 if (FIB_RES_GW(*res) && 1921 struct fib_nh *nh = &FIB_RES_NH(*res);
1728 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1922
1729 rt->rt_gateway = FIB_RES_GW(*res); 1923 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1924 rt->rt_gateway = nh->nh_gw;
1925 if (unlikely(nh->nh_exceptions))
1926 rt_bind_exception(rt, nh, fl4->daddr);
1730 rt_init_metrics(rt, fl4, fi); 1927 rt_init_metrics(rt, fl4, fi);
1731#ifdef CONFIG_IP_ROUTE_CLASSID 1928#ifdef CONFIG_IP_ROUTE_CLASSID
1732 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1929 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
@@ -2587,11 +2784,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2587 return mtu ? : dst->dev->mtu; 2784 return mtu ? : dst->dev->mtu;
2588} 2785}
2589 2786
2590static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2787static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2788 struct sk_buff *skb, u32 mtu)
2591{ 2789{
2592} 2790}
2593 2791
2594static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) 2792static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2793 struct sk_buff *skb)
2595{ 2794{
2596} 2795}
2597 2796
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7a0062cb4ed0..d9caf5c07aae 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
289 if (sk->sk_state == TCP_LISTEN) 289 if (sk->sk_state == TCP_LISTEN)
290 return; 290 return;
291 291
292 /* We don't check in the destentry if pmtu discovery is forbidden 292 dst = inet_csk_update_pmtu(sk, mtu);
293 * on this route. We just assume that no packet_to_big packets 293 if (!dst)
294 * are send back when pmtu discovery is not active.
295 * There is a small race when the user changes this flag in the
296 * route, but I think that's acceptable.
297 */
298 if ((dst = __sk_dst_check(sk, 0)) == NULL)
299 return; 294 return;
300 295
301 dst->ops->update_pmtu(dst, mtu);
302
303 /* Something is about to be wrong... Remember soft error 296 /* Something is about to be wrong... Remember soft error
304 * for the case, if this connection will not able to recover. 297 * for the case, if this connection will not able to recover.
305 */ 298 */
@@ -326,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
326 struct dst_entry *dst = __sk_dst_check(sk, 0); 319 struct dst_entry *dst = __sk_dst_check(sk, 0);
327 320
328 if (dst) 321 if (dst)
329 dst->ops->redirect(dst, skb); 322 dst->ops->redirect(dst, sk, skb);
330} 323}
331 324
332/* 325/*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 737131cef375..fcf7678bc009 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
194 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); 194 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
195} 195}
196 196
197static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) 197static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb, u32 mtu)
198{ 199{
199 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 200 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
200 struct dst_entry *path = xdst->route; 201 struct dst_entry *path = xdst->route;
201 202
202 path->ops->update_pmtu(path, mtu); 203 path->ops->update_pmtu(path, sk, skb, mtu);
203} 204}
204 205
205static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) 206static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
207 struct sk_buff *skb)
206{ 208{
207 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 209 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
208 struct dst_entry *path = xdst->route; 210 struct dst_entry *path = xdst->route;
209 211
210 path->ops->redirect(path, skb); 212 path->ops->redirect(path, sk, skb);
211} 213}
212 214
213static void xfrm4_dst_destroy(struct dst_entry *dst) 215static void xfrm4_dst_destroy(struct dst_entry *dst)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bceb14450a1d..4a0c4d2d8b05 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
203 return dst; 203 return dst;
204} 204}
205 205
206int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) 206static struct dst_entry *inet6_csk_route_socket(struct sock *sk)
207{ 207{
208 struct sock *sk = skb->sk;
209 struct inet_sock *inet = inet_sk(sk); 208 struct inet_sock *inet = inet_sk(sk);
210 struct ipv6_pinfo *np = inet6_sk(sk); 209 struct ipv6_pinfo *np = inet6_sk(sk);
211 struct flowi6 fl6;
212 struct dst_entry *dst;
213 struct in6_addr *final_p, final; 210 struct in6_addr *final_p, final;
214 int res; 211 struct dst_entry *dst;
212 struct flowi6 fl6;
215 213
216 memset(&fl6, 0, sizeof(fl6)); 214 memset(&fl6, 0, sizeof(fl6));
217 fl6.flowi6_proto = sk->sk_protocol; 215 fl6.flowi6_proto = sk->sk_protocol;
@@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
228 final_p = fl6_update_dst(&fl6, np->opt, &final); 226 final_p = fl6_update_dst(&fl6, np->opt, &final);
229 227
230 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 228 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
231 229 if (!dst) {
232 if (dst == NULL) {
233 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); 230 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
234 231
235 if (IS_ERR(dst)) { 232 if (!IS_ERR(dst))
236 sk->sk_err_soft = -PTR_ERR(dst); 233 __inet6_csk_dst_store(sk, dst, NULL, NULL);
237 sk->sk_route_caps = 0; 234 }
238 kfree_skb(skb); 235 return dst;
239 return PTR_ERR(dst); 236}
240 }
241 237
242 __inet6_csk_dst_store(sk, dst, NULL, NULL); 238int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
239{
240 struct sock *sk = skb->sk;
241 struct ipv6_pinfo *np = inet6_sk(sk);
242 struct flowi6 fl6;
243 struct dst_entry *dst;
244 int res;
245
246 dst = inet6_csk_route_socket(sk);
247 if (IS_ERR(dst)) {
248 sk->sk_err_soft = -PTR_ERR(dst);
249 sk->sk_route_caps = 0;
250 kfree_skb(skb);
251 return PTR_ERR(dst);
243 } 252 }
244 253
245 rcu_read_lock(); 254 rcu_read_lock();
@@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
253 return res; 262 return res;
254} 263}
255EXPORT_SYMBOL_GPL(inet6_csk_xmit); 264EXPORT_SYMBOL_GPL(inet6_csk_xmit);
265
266struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
267{
268 struct dst_entry *dst = inet6_csk_route_socket(sk);
269
270 if (IS_ERR(dst))
271 return NULL;
272 dst->ops->update_pmtu(dst, sk, NULL, mtu);
273
274 return inet6_csk_route_socket(sk);
275}
276EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 61d106597296..db3284667968 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
609 if (rel_info > dst_mtu(skb_dst(skb2))) 609 if (rel_info > dst_mtu(skb_dst(skb2)))
610 goto out; 610 goto out;
611 611
612 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); 612 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
613 } 613 }
614 if (rel_type == ICMP_REDIRECT) 614 if (rel_type == ICMP_REDIRECT)
615 skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); 615 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
616 616
617 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 617 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
618 618
@@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
952 if (mtu < IPV6_MIN_MTU) 952 if (mtu < IPV6_MIN_MTU)
953 mtu = IPV6_MIN_MTU; 953 mtu = IPV6_MIN_MTU;
954 if (skb_dst(skb)) 954 if (skb_dst(skb))
955 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 955 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
956 if (skb->len > mtu) { 956 if (skb->len > mtu) {
957 *pmtu = mtu; 957 *pmtu = mtu;
958 err = -EMSGSIZE; 958 err = -EMSGSIZE;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 412fad809a3b..84f6564dd372 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops);
78static int ip6_pkt_discard(struct sk_buff *skb); 78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb); 79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb); 80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); 82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
83 85
84#ifdef CONFIG_IPV6_ROUTE_INFO 86#ifdef CONFIG_IPV6_ROUTE_INFO
85static struct rt6_info *rt6_add_route_info(struct net *net, 87static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 return mtu ? : dst->dev->mtu; 189 return mtu ? : dst->dev->mtu;
188} 190}
189 191
190static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
191{ 194{
192} 195}
193 196
194static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) 197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
195{ 199{
196} 200}
197 201
@@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb)
1071 } 1075 }
1072} 1076}
1073 1077
1074static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1078static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079 struct sk_buff *skb, u32 mtu)
1075{ 1080{
1076 struct rt6_info *rt6 = (struct rt6_info*)dst; 1081 struct rt6_info *rt6 = (struct rt6_info*)dst;
1077 1082
@@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108 1113
1109 dst = ip6_route_output(net, NULL, &fl6); 1114 dst = ip6_route_output(net, NULL, &fl6);
1110 if (!dst->error) 1115 if (!dst->error)
1111 ip6_rt_update_pmtu(dst, ntohl(mtu)); 1116 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1112 dst_release(dst); 1117 dst_release(dst);
1113} 1118}
1114EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1119EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1136 1141
1137 dst = ip6_route_output(net, NULL, &fl6); 1142 dst = ip6_route_output(net, NULL, &fl6);
1138 if (!dst->error) 1143 if (!dst->error)
1139 rt6_do_redirect(dst, skb); 1144 rt6_do_redirect(dst, NULL, skb);
1140 dst_release(dst); 1145 dst_release(dst);
1141} 1146}
1142EXPORT_SYMBOL_GPL(ip6_redirect); 1147EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1639 return err; 1644 return err;
1640} 1645}
1641 1646
1642static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) 1647static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1643{ 1648{
1644 struct net *net = dev_net(skb->dev); 1649 struct net *net = dev_net(skb->dev);
1645 struct netevent_redirect netevent; 1650 struct netevent_redirect netevent;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fbf1622fdeef..3bd1bfc01f85 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
807 } 807 }
808 808
809 if (tunnel->parms.iph.daddr && skb_dst(skb)) 809 if (tunnel->parms.iph.daddr && skb_dst(skb))
810 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 810 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
811 811
812 if (skb->len > mtu) { 812 if (skb->len > mtu) {
813 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 813 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3071f377145c..c9dabdd832d7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
367 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 367 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
368 368
369 if (dst) 369 if (dst)
370 dst->ops->redirect(dst,skb); 370 dst->ops->redirect(dst, sk, skb);
371 } 371 }
372 372
373 if (type == ICMPV6_PKT_TOOBIG) { 373 if (type == ICMPV6_PKT_TOOBIG) {
@@ -378,43 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 378 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
379 goto out; 379 goto out;
380 380
381 /* icmp should have updated the destination cache entry */ 381 dst = inet6_csk_update_pmtu(sk, ntohl(info));
382 dst = __sk_dst_check(sk, np->dst_cookie); 382 if (!dst)
383 383 goto out;
384 if (dst == NULL) {
385 struct inet_sock *inet = inet_sk(sk);
386 struct flowi6 fl6;
387
388 /* BUGGG_FUTURE: Again, it is not clear how
389 to handle rthdr case. Ignore this complexity
390 for now.
391 */
392 memset(&fl6, 0, sizeof(fl6));
393 fl6.flowi6_proto = IPPROTO_TCP;
394 fl6.daddr = np->daddr;
395 fl6.saddr = np->saddr;
396 fl6.flowi6_oif = sk->sk_bound_dev_if;
397 fl6.flowi6_mark = sk->sk_mark;
398 fl6.fl6_dport = inet->inet_dport;
399 fl6.fl6_sport = inet->inet_sport;
400 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
401
402 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
403 if (IS_ERR(dst)) {
404 sk->sk_err_soft = -PTR_ERR(dst);
405 goto out;
406 }
407
408 } else
409 dst_hold(dst);
410
411 dst->ops->update_pmtu(dst, ntohl(info));
412 384
413 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 385 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
414 tcp_sync_mss(sk, dst_mtu(dst)); 386 tcp_sync_mss(sk, dst_mtu(dst));
415 tcp_simple_retransmit(sk); 387 tcp_simple_retransmit(sk);
416 } /* else let the usual retransmit timer handle it */ 388 }
417 dst_release(dst);
418 goto out; 389 goto out;
419 } 390 }
420 391
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f5a9cb8257b9..ef39812107b1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
207 return dst_entries_get_fast(ops) > ops->gc_thresh * 2; 207 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
208} 208}
209 209
210static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 210static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
211 struct sk_buff *skb, u32 mtu)
211{ 212{
212 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 213 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
213 struct dst_entry *path = xdst->route; 214 struct dst_entry *path = xdst->route;
214 215
215 path->ops->update_pmtu(path, mtu); 216 path->ops->update_pmtu(path, sk, skb, mtu);
216} 217}
217 218
218static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) 219static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
220 struct sk_buff *skb)
219{ 221{
220 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 222 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
221 struct dst_entry *path = xdst->route; 223 struct dst_entry *path = xdst->route;
222 224
223 path->ops->redirect(path, skb); 225 path->ops->redirect(path, sk, skb);
224} 226}
225 227
226static void xfrm6_dst_destroy(struct dst_entry *dst) 228static void xfrm6_dst_destroy(struct dst_entry *dst)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 71d6ecb65926..65b616ae1716 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
797 goto tx_error_put; 797 goto tx_error_put;
798 } 798 }
799 if (skb_dst(skb)) 799 if (skb_dst(skb))
800 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 800 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
801 801
802 df |= (old_iph->frag_off & htons(IP_DF)); 802 df |= (old_iph->frag_off & htons(IP_DF));
803 803
@@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
913 goto tx_error_put; 913 goto tx_error_put;
914 } 914 }
915 if (skb_dst(skb)) 915 if (skb_dst(skb))
916 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 916 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
917 917
918 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && 918 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
919 !skb_is_gso(skb)) { 919 !skb_is_gso(skb)) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b16517ee1aaf..8cf348e62e74 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1360,7 +1360,7 @@ struct sctp_transport *sctp_assoc_choose_alter_transport(
1360/* Update the association's pmtu and frag_point by going through all the 1360/* Update the association's pmtu and frag_point by going through all the
1361 * transports. This routine is called when a transport's PMTU has changed. 1361 * transports. This routine is called when a transport's PMTU has changed.
1362 */ 1362 */
1363void sctp_assoc_sync_pmtu(struct sctp_association *asoc) 1363void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
1364{ 1364{
1365 struct sctp_transport *t; 1365 struct sctp_transport *t;
1366 __u32 pmtu = 0; 1366 __u32 pmtu = 0;
@@ -1372,7 +1372,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
1372 list_for_each_entry(t, &asoc->peer.transport_addr_list, 1372 list_for_each_entry(t, &asoc->peer.transport_addr_list,
1373 transports) { 1373 transports) {
1374 if (t->pmtu_pending && t->dst) { 1374 if (t->pmtu_pending && t->dst) {
1375 sctp_transport_update_pmtu(t, dst_mtu(t->dst)); 1375 sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst));
1376 t->pmtu_pending = 0; 1376 t->pmtu_pending = 0;
1377 } 1377 }
1378 if (!pmtu || (t->pathmtu < pmtu)) 1378 if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f050d45faa98..c201b26879a1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -408,10 +408,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
408 408
409 if (t->param_flags & SPP_PMTUD_ENABLE) { 409 if (t->param_flags & SPP_PMTUD_ENABLE) {
410 /* Update transports view of the MTU */ 410 /* Update transports view of the MTU */
411 sctp_transport_update_pmtu(t, pmtu); 411 sctp_transport_update_pmtu(sk, t, pmtu);
412 412
413 /* Update association pmtu. */ 413 /* Update association pmtu. */
414 sctp_assoc_sync_pmtu(asoc); 414 sctp_assoc_sync_pmtu(sk, asoc);
415 } 415 }
416 416
417 /* Retransmit with the new pmtu setting. 417 /* Retransmit with the new pmtu setting.
@@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
432 return; 432 return;
433 dst = sctp_transport_dst_check(t); 433 dst = sctp_transport_dst_check(t);
434 if (dst) 434 if (dst)
435 dst->ops->redirect(dst, skb); 435 dst->ops->redirect(dst, sk, skb);
436} 436}
437 437
438/* 438/*
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 539f35d07f4e..838e18b4d7ea 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -410,7 +410,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
410 if (!sctp_transport_dst_check(tp)) { 410 if (!sctp_transport_dst_check(tp)) {
411 sctp_transport_route(tp, NULL, sctp_sk(sk)); 411 sctp_transport_route(tp, NULL, sctp_sk(sk));
412 if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { 412 if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
413 sctp_assoc_sync_pmtu(asoc); 413 sctp_assoc_sync_pmtu(sk, asoc);
414 } 414 }
415 } 415 }
416 dst = dst_clone(tp->dst); 416 dst = dst_clone(tp->dst);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b3b8a8d813eb..74bd3c47350a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1853,7 +1853,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1853 } 1853 }
1854 1854
1855 if (asoc->pmtu_pending) 1855 if (asoc->pmtu_pending)
1856 sctp_assoc_pending_pmtu(asoc); 1856 sctp_assoc_pending_pmtu(sk, asoc);
1857 1857
1858 /* If fragmentation is disabled and the message length exceeds the 1858 /* If fragmentation is disabled and the message length exceeds the
1859 * association fragmentation point, return EMSGSIZE. The I-D 1859 * association fragmentation point, return EMSGSIZE. The I-D
@@ -2365,7 +2365,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2365 if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { 2365 if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
2366 if (trans) { 2366 if (trans) {
2367 trans->pathmtu = params->spp_pathmtu; 2367 trans->pathmtu = params->spp_pathmtu;
2368 sctp_assoc_sync_pmtu(asoc); 2368 sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
2369 } else if (asoc) { 2369 } else if (asoc) {
2370 asoc->pathmtu = params->spp_pathmtu; 2370 asoc->pathmtu = params->spp_pathmtu;
2371 sctp_frag_point(asoc, params->spp_pathmtu); 2371 sctp_frag_point(asoc, params->spp_pathmtu);
@@ -2382,7 +2382,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2382 (trans->param_flags & ~SPP_PMTUD) | pmtud_change; 2382 (trans->param_flags & ~SPP_PMTUD) | pmtud_change;
2383 if (update) { 2383 if (update) {
2384 sctp_transport_pmtu(trans, sctp_opt2sk(sp)); 2384 sctp_transport_pmtu(trans, sctp_opt2sk(sp));
2385 sctp_assoc_sync_pmtu(asoc); 2385 sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
2386 } 2386 }
2387 } else if (asoc) { 2387 } else if (asoc) {
2388 asoc->param_flags = 2388 asoc->param_flags =
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1dcceb6e0ce6..a6b7ee9ce28a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -228,7 +228,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
228 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 228 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
229} 229}
230 230
231void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) 231void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
232{ 232{
233 struct dst_entry *dst; 233 struct dst_entry *dst;
234 234
@@ -245,8 +245,16 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
245 } 245 }
246 246
247 dst = sctp_transport_dst_check(t); 247 dst = sctp_transport_dst_check(t);
248 if (dst) 248 if (!dst)
249 dst->ops->update_pmtu(dst, pmtu); 249 t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
250
251 if (dst) {
252 dst->ops->update_pmtu(dst, sk, NULL, pmtu);
253
254 dst = sctp_transport_dst_check(t);
255 if (!dst)
256 t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
257 }
250} 258}
251 259
252/* Caches the dst entry and source address for a transport's destination 260/* Caches the dst entry and source address for a transport's destination