aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c46
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_frontend.c38
-rw-r--r--net/ipv4/fib_semantics.c7
-rw-r--r--net/ipv4/fib_trie.c16
-rw-r--r--net/ipv4/icmp.c24
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_diag.c125
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/inetpeer.c95
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_input.c28
-rw-r--r--net/ipv4/ip_options.c22
-rw-r--r--net/ipv4/ip_output.c30
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipip.c15
-rw-r--r--net/ipv4/ipmr.c28
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c15
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c172
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c52
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c13
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c4
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c355
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp_input.c16
-rw-r--r--net/ipv4/tcp_ipv4.c97
-rw-r--r--net/ipv4/tcp_minisocks.c34
-rw-r--r--net/ipv4/tcp_output.c20
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/xfrm4_policy.c11
44 files changed, 804 insertions, 564 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c8f7aee587d1..07a02f6e9696 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -157,6 +157,7 @@ void inet_sock_destruct(struct sock *sk)
157 157
158 kfree(rcu_dereference_protected(inet->inet_opt, 1)); 158 kfree(rcu_dereference_protected(inet->inet_opt, 1));
159 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); 159 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
160 dst_release(sk->sk_rx_dst);
160 sk_refcnt_debug_dec(sk); 161 sk_refcnt_debug_dec(sk);
161} 162}
162EXPORT_SYMBOL(inet_sock_destruct); 163EXPORT_SYMBOL(inet_sock_destruct);
@@ -242,20 +243,18 @@ void build_ehash_secret(void)
242} 243}
243EXPORT_SYMBOL(build_ehash_secret); 244EXPORT_SYMBOL(build_ehash_secret);
244 245
245static inline int inet_netns_ok(struct net *net, int protocol) 246static inline int inet_netns_ok(struct net *net, __u8 protocol)
246{ 247{
247 int hash;
248 const struct net_protocol *ipprot; 248 const struct net_protocol *ipprot;
249 249
250 if (net_eq(net, &init_net)) 250 if (net_eq(net, &init_net))
251 return 1; 251 return 1;
252 252
253 hash = protocol & (MAX_INET_PROTOS - 1); 253 ipprot = rcu_dereference(inet_protos[protocol]);
254 ipprot = rcu_dereference(inet_protos[hash]); 254 if (ipprot == NULL) {
255
256 if (ipprot == NULL)
257 /* raw IP is OK */ 255 /* raw IP is OK */
258 return 1; 256 return 1;
257 }
259 return ipprot->netns_ok; 258 return ipprot->netns_ok;
260} 259}
261 260
@@ -553,7 +552,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
553 552
554 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 553 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
555 return -EAGAIN; 554 return -EAGAIN;
556 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 555 return sk->sk_prot->connect(sk, uaddr, addr_len);
557} 556}
558EXPORT_SYMBOL(inet_dgram_connect); 557EXPORT_SYMBOL(inet_dgram_connect);
559 558
@@ -1216,8 +1215,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1216 1215
1217static int inet_gso_send_check(struct sk_buff *skb) 1216static int inet_gso_send_check(struct sk_buff *skb)
1218{ 1217{
1219 const struct iphdr *iph;
1220 const struct net_protocol *ops; 1218 const struct net_protocol *ops;
1219 const struct iphdr *iph;
1221 int proto; 1220 int proto;
1222 int ihl; 1221 int ihl;
1223 int err = -EINVAL; 1222 int err = -EINVAL;
@@ -1236,7 +1235,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
1236 __skb_pull(skb, ihl); 1235 __skb_pull(skb, ihl);
1237 skb_reset_transport_header(skb); 1236 skb_reset_transport_header(skb);
1238 iph = ip_hdr(skb); 1237 iph = ip_hdr(skb);
1239 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1238 proto = iph->protocol;
1240 err = -EPROTONOSUPPORT; 1239 err = -EPROTONOSUPPORT;
1241 1240
1242 rcu_read_lock(); 1241 rcu_read_lock();
@@ -1253,8 +1252,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1253 netdev_features_t features) 1252 netdev_features_t features)
1254{ 1253{
1255 struct sk_buff *segs = ERR_PTR(-EINVAL); 1254 struct sk_buff *segs = ERR_PTR(-EINVAL);
1256 struct iphdr *iph;
1257 const struct net_protocol *ops; 1255 const struct net_protocol *ops;
1256 struct iphdr *iph;
1258 int proto; 1257 int proto;
1259 int ihl; 1258 int ihl;
1260 int id; 1259 int id;
@@ -1286,7 +1285,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1286 skb_reset_transport_header(skb); 1285 skb_reset_transport_header(skb);
1287 iph = ip_hdr(skb); 1286 iph = ip_hdr(skb);
1288 id = ntohs(iph->id); 1287 id = ntohs(iph->id);
1289 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1288 proto = iph->protocol;
1290 segs = ERR_PTR(-EPROTONOSUPPORT); 1289 segs = ERR_PTR(-EPROTONOSUPPORT);
1291 1290
1292 rcu_read_lock(); 1291 rcu_read_lock();
@@ -1340,7 +1339,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1340 goto out; 1339 goto out;
1341 } 1340 }
1342 1341
1343 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1342 proto = iph->protocol;
1344 1343
1345 rcu_read_lock(); 1344 rcu_read_lock();
1346 ops = rcu_dereference(inet_protos[proto]); 1345 ops = rcu_dereference(inet_protos[proto]);
@@ -1398,11 +1397,11 @@ out:
1398 1397
1399static int inet_gro_complete(struct sk_buff *skb) 1398static int inet_gro_complete(struct sk_buff *skb)
1400{ 1399{
1401 const struct net_protocol *ops; 1400 __be16 newlen = htons(skb->len - skb_network_offset(skb));
1402 struct iphdr *iph = ip_hdr(skb); 1401 struct iphdr *iph = ip_hdr(skb);
1403 int proto = iph->protocol & (MAX_INET_PROTOS - 1); 1402 const struct net_protocol *ops;
1403 int proto = iph->protocol;
1404 int err = -ENOSYS; 1404 int err = -ENOSYS;
1405 __be16 newlen = htons(skb->len - skb_network_offset(skb));
1406 1405
1407 csum_replace2(&iph->check, iph->tot_len, newlen); 1406 csum_replace2(&iph->check, iph->tot_len, newlen);
1408 iph->tot_len = newlen; 1407 iph->tot_len = newlen;
@@ -1520,14 +1519,15 @@ static const struct net_protocol igmp_protocol = {
1520#endif 1519#endif
1521 1520
1522static const struct net_protocol tcp_protocol = { 1521static const struct net_protocol tcp_protocol = {
1523 .handler = tcp_v4_rcv, 1522 .early_demux = tcp_v4_early_demux,
1524 .err_handler = tcp_v4_err, 1523 .handler = tcp_v4_rcv,
1525 .gso_send_check = tcp_v4_gso_send_check, 1524 .err_handler = tcp_v4_err,
1526 .gso_segment = tcp_tso_segment, 1525 .gso_send_check = tcp_v4_gso_send_check,
1527 .gro_receive = tcp4_gro_receive, 1526 .gso_segment = tcp_tso_segment,
1528 .gro_complete = tcp4_gro_complete, 1527 .gro_receive = tcp4_gro_receive,
1529 .no_policy = 1, 1528 .gro_complete = tcp4_gro_complete,
1530 .netns_ok = 1, 1529 .no_policy = 1,
1530 .netns_ok = 1,
1531}; 1531};
1532 1532
1533static const struct net_protocol udp_protocol = { 1533static const struct net_protocol udp_protocol = {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index e8f2617ecd47..916d5ecaf6c6 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
408 return; 408 return;
409 pr_debug("pmtu discovery on SA AH/%08x/%08x\n", 409 pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
410 ntohl(ah->spi), ntohl(iph->daddr)); 410 ntohl(ah->spi), ntohl(iph->daddr));
411 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
411 xfrm_state_put(x); 412 xfrm_state_put(x);
412} 413}
413 414
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cda37be02f8d..2e560f0c757d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb)
790 * Check for bad requests for 127.x.x.x and requests for multicast 790 * Check for bad requests for 127.x.x.x and requests for multicast
791 * addresses. If this is one such, delete it. 791 * addresses. If this is one such, delete it.
792 */ 792 */
793 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) 793 if (ipv4_is_multicast(tip) ||
794 (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
794 goto out; 795 goto out;
795 796
796/* 797/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 10e15a144e95..44bf82e3aef7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1500 1500
1501 if (cnf == net->ipv4.devconf_dflt) 1501 if (cnf == net->ipv4.devconf_dflt)
1502 devinet_copy_dflt_conf(net, i); 1502 devinet_copy_dflt_conf(net, i);
1503 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) 1503 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1504 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1504 if ((new_value == 0) && (old_value != 0)) 1505 if ((new_value == 0) && (old_value != 0))
1505 rt_cache_flush(net, 0); 1506 rt_cache_flush(net, 0);
1506 } 1507 }
@@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table {
1617 "force_igmp_version"), 1618 "force_igmp_version"),
1618 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1619 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1619 "promote_secondaries"), 1620 "promote_secondaries"),
1621 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1622 "route_localnet"),
1620 }, 1623 },
1621}; 1624};
1622 1625
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index cb982a61536f..7b95b49a36ce 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
494 return; 494 return;
495 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 495 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
496 ntohl(esph->spi), ntohl(iph->daddr)); 496 ntohl(esph->spi), ntohl(iph->daddr));
497 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
497 xfrm_state_put(x); 498 xfrm_state_put(x);
498} 499}
499 500
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3854411fa37c..63b11ca54d95 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
180} 180}
181EXPORT_SYMBOL(inet_dev_addr_type); 181EXPORT_SYMBOL(inet_dev_addr_type);
182 182
183__be32 fib_compute_spec_dst(struct sk_buff *skb)
184{
185 struct net_device *dev = skb->dev;
186 struct in_device *in_dev;
187 struct fib_result res;
188 struct flowi4 fl4;
189 struct net *net;
190
191 if (skb->pkt_type != PACKET_BROADCAST &&
192 skb->pkt_type != PACKET_MULTICAST)
193 return ip_hdr(skb)->daddr;
194
195 in_dev = __in_dev_get_rcu(dev);
196 BUG_ON(!in_dev);
197 fl4.flowi4_oif = 0;
198 fl4.flowi4_iif = 0;
199 fl4.daddr = ip_hdr(skb)->saddr;
200 fl4.saddr = ip_hdr(skb)->daddr;
201 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
202 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
203 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
204
205 net = dev_net(dev);
206 if (!fib_lookup(net, &fl4, &res))
207 return FIB_RES_PREFSRC(net, res);
208 else
209 return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
210}
211
183/* Given (packet source, input interface) and optional (dst, oif, tos): 212/* Given (packet source, input interface) and optional (dst, oif, tos):
184 * - (main) check, that source is valid i.e. not broadcast or our local 213 * - (main) check, that source is valid i.e. not broadcast or our local
185 * address. 214 * address.
@@ -189,8 +218,7 @@ EXPORT_SYMBOL(inet_dev_addr_type);
189 * called with rcu_read_lock() 218 * called with rcu_read_lock()
190 */ 219 */
191int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, 220int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
192 int oif, struct net_device *dev, __be32 *spec_dst, 221 int oif, struct net_device *dev, u32 *itag)
193 u32 *itag)
194{ 222{
195 struct in_device *in_dev; 223 struct in_device *in_dev;
196 struct flowi4 fl4; 224 struct flowi4 fl4;
@@ -229,7 +257,6 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
229 if (res.type != RTN_LOCAL || !accept_local) 257 if (res.type != RTN_LOCAL || !accept_local)
230 goto e_inval; 258 goto e_inval;
231 } 259 }
232 *spec_dst = FIB_RES_PREFSRC(net, res);
233 fib_combine_itag(itag, &res); 260 fib_combine_itag(itag, &res);
234 dev_match = false; 261 dev_match = false;
235 262
@@ -258,17 +285,14 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
258 285
259 ret = 0; 286 ret = 0;
260 if (fib_lookup(net, &fl4, &res) == 0) { 287 if (fib_lookup(net, &fl4, &res) == 0) {
261 if (res.type == RTN_UNICAST) { 288 if (res.type == RTN_UNICAST)
262 *spec_dst = FIB_RES_PREFSRC(net, res);
263 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 289 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
264 }
265 } 290 }
266 return ret; 291 return ret;
267 292
268last_resort: 293last_resort:
269 if (rpf) 294 if (rpf)
270 goto e_rpf; 295 goto e_rpf;
271 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
272 *itag = 0; 296 *itag = 0;
273 return 0; 297 return 0;
274 298
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e5b7182fa099..415f8230fc88 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -779,9 +779,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
779 int type = nla_type(nla); 779 int type = nla_type(nla);
780 780
781 if (type) { 781 if (type) {
782 u32 val;
783
782 if (type > RTAX_MAX) 784 if (type > RTAX_MAX)
783 goto err_inval; 785 goto err_inval;
784 fi->fib_metrics[type - 1] = nla_get_u32(nla); 786 val = nla_get_u32(nla);
787 if (type == RTAX_ADVMSS && val > 65535 - 40)
788 val = 65535 - 40;
789 fi->fib_metrics[type - 1] = val;
785 } 790 }
786 } 791 }
787 } 792 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 30b88d7b4bd6..9b0f25930fbc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1007,9 +1007,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1007 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { 1007 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
1008 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1008 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1009 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 1009 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
1010 tn = (struct tnode *) resize(t, (struct tnode *)tn); 1010 tn = (struct tnode *)resize(t, tn);
1011 1011
1012 tnode_put_child_reorg((struct tnode *)tp, cindex, 1012 tnode_put_child_reorg(tp, cindex,
1013 (struct rt_trie_node *)tn, wasfull); 1013 (struct rt_trie_node *)tn, wasfull);
1014 1014
1015 tp = node_parent((struct rt_trie_node *) tn); 1015 tp = node_parent((struct rt_trie_node *) tn);
@@ -1024,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1024 1024
1025 /* Handle last (top) tnode */ 1025 /* Handle last (top) tnode */
1026 if (IS_TNODE(tn)) 1026 if (IS_TNODE(tn))
1027 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1027 tn = (struct tnode *)resize(t, tn);
1028 1028
1029 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1029 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1030 tnode_free_flush(); 1030 tnode_free_flush();
@@ -1125,7 +1125,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1125 node_set_parent((struct rt_trie_node *)l, tp); 1125 node_set_parent((struct rt_trie_node *)l, tp);
1126 1126
1127 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1127 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1128 put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); 1128 put_child(t, tp, cindex, (struct rt_trie_node *)l);
1129 } else { 1129 } else {
1130 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1130 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1131 /* 1131 /*
@@ -1160,8 +1160,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1160 1160
1161 if (tp) { 1161 if (tp) {
1162 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1162 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1163 put_child(t, (struct tnode *)tp, cindex, 1163 put_child(t, tp, cindex, (struct rt_trie_node *)tn);
1164 (struct rt_trie_node *)tn);
1165 } else { 1164 } else {
1166 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1165 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1167 tp = tn; 1166 tp = tn;
@@ -1620,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1620 1619
1621 if (tp) { 1620 if (tp) {
1622 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); 1621 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
1623 put_child(t, (struct tnode *)tp, cindex, NULL); 1622 put_child(t, tp, cindex, NULL);
1624 trie_rebalance(t, tp); 1623 trie_rebalance(t, tp);
1625 } else 1624 } else
1626 RCU_INIT_POINTER(t->trie, NULL); 1625 RCU_INIT_POINTER(t->trie, NULL);
@@ -1844,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb)
1844 if (ll && hlist_empty(&ll->list)) 1843 if (ll && hlist_empty(&ll->list))
1845 trie_leaf_remove(t, ll); 1844 trie_leaf_remove(t, ll);
1846 1845
1846 inetpeer_invalidate_tree(&tb->tb_peers);
1847
1847 pr_debug("trie_flush found=%d\n", found); 1848 pr_debug("trie_flush found=%d\n", found);
1848 return found; 1849 return found;
1849} 1850}
@@ -1992,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id)
1992 tb->tb_id = id; 1993 tb->tb_id = id;
1993 tb->tb_default = -1; 1994 tb->tb_default = -1;
1994 tb->tb_num_default = 0; 1995 tb->tb_num_default = 0;
1996 inet_peer_base_init(&tb->tb_peers);
1995 1997
1996 t = (struct trie *) tb->tb_data; 1998 t = (struct trie *) tb->tb_data;
1997 memset(t, 0, sizeof(*t)); 1999 memset(t, 0, sizeof(*t));
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c75efbdc71cb..4bce5a2830aa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -95,6 +95,7 @@
95#include <net/checksum.h> 95#include <net/checksum.h>
96#include <net/xfrm.h> 96#include <net/xfrm.h>
97#include <net/inet_common.h> 97#include <net/inet_common.h>
98#include <net/ip_fib.h>
98 99
99/* 100/*
100 * Build xmit assembly blocks 101 * Build xmit assembly blocks
@@ -253,9 +254,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
253 254
254 /* Limit if icmp type is enabled in ratemask. */ 255 /* Limit if icmp type is enabled in ratemask. */
255 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { 256 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
256 if (!rt->peer) 257 struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
257 rt_bind_peer(rt, fl4->daddr, 1); 258 rc = inet_peer_xrlim_allow(peer,
258 rc = inet_peer_xrlim_allow(rt->peer,
259 net->ipv4.sysctl_icmp_ratelimit); 259 net->ipv4.sysctl_icmp_ratelimit);
260 } 260 }
261out: 261out:
@@ -334,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
334 struct flowi4 fl4; 334 struct flowi4 fl4;
335 struct sock *sk; 335 struct sock *sk;
336 struct inet_sock *inet; 336 struct inet_sock *inet;
337 __be32 daddr; 337 __be32 daddr, saddr;
338 338
339 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) 339 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
340 return; 340 return;
@@ -348,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
348 348
349 inet->tos = ip_hdr(skb)->tos; 349 inet->tos = ip_hdr(skb)->tos;
350 daddr = ipc.addr = ip_hdr(skb)->saddr; 350 daddr = ipc.addr = ip_hdr(skb)->saddr;
351 saddr = fib_compute_spec_dst(skb);
351 ipc.opt = NULL; 352 ipc.opt = NULL;
352 ipc.tx_flags = 0; 353 ipc.tx_flags = 0;
353 if (icmp_param->replyopts.opt.opt.optlen) { 354 if (icmp_param->replyopts.opt.opt.optlen) {
@@ -357,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
357 } 358 }
358 memset(&fl4, 0, sizeof(fl4)); 359 memset(&fl4, 0, sizeof(fl4));
359 fl4.daddr = daddr; 360 fl4.daddr = daddr;
360 fl4.saddr = rt->rt_spec_dst; 361 fl4.saddr = saddr;
361 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 362 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
362 fl4.flowi4_proto = IPPROTO_ICMP; 363 fl4.flowi4_proto = IPPROTO_ICMP;
363 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 364 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -638,12 +639,12 @@ EXPORT_SYMBOL(icmp_send);
638 639
639static void icmp_unreach(struct sk_buff *skb) 640static void icmp_unreach(struct sk_buff *skb)
640{ 641{
642 const struct net_protocol *ipprot;
641 const struct iphdr *iph; 643 const struct iphdr *iph;
642 struct icmphdr *icmph; 644 struct icmphdr *icmph;
643 int hash, protocol;
644 const struct net_protocol *ipprot;
645 u32 info = 0;
646 struct net *net; 645 struct net *net;
646 u32 info = 0;
647 int protocol;
647 648
648 net = dev_net(skb_dst(skb)->dev); 649 net = dev_net(skb_dst(skb)->dev);
649 650
@@ -674,9 +675,7 @@ static void icmp_unreach(struct sk_buff *skb)
674 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), 675 LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
675 &iph->daddr); 676 &iph->daddr);
676 } else { 677 } else {
677 info = ip_rt_frag_needed(net, iph, 678 info = ntohs(icmph->un.frag.mtu);
678 ntohs(icmph->un.frag.mtu),
679 skb->dev);
680 if (!info) 679 if (!info)
681 goto out; 680 goto out;
682 } 681 }
@@ -734,9 +733,8 @@ static void icmp_unreach(struct sk_buff *skb)
734 */ 733 */
735 raw_icmp_error(skb, protocol, info); 734 raw_icmp_error(skb, protocol, info);
736 735
737 hash = protocol & (MAX_INET_PROTOS - 1);
738 rcu_read_lock(); 736 rcu_read_lock();
739 ipprot = rcu_dereference(inet_protos[hash]); 737 ipprot = rcu_dereference(inet_protos[protocol]);
740 if (ipprot && ipprot->err_handler) 738 if (ipprot && ipprot->err_handler)
741 ipprot->err_handler(skb, info); 739 ipprot->err_handler(skb, info);
742 rcu_read_unlock(); 740 rcu_read_unlock();
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f9ee7417f6a0..034ddbe42adf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -368,17 +368,21 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
368 368
369struct dst_entry *inet_csk_route_req(struct sock *sk, 369struct dst_entry *inet_csk_route_req(struct sock *sk,
370 struct flowi4 *fl4, 370 struct flowi4 *fl4,
371 const struct request_sock *req) 371 const struct request_sock *req,
372 bool nocache)
372{ 373{
373 struct rtable *rt; 374 struct rtable *rt;
374 const struct inet_request_sock *ireq = inet_rsk(req); 375 const struct inet_request_sock *ireq = inet_rsk(req);
375 struct ip_options_rcu *opt = inet_rsk(req)->opt; 376 struct ip_options_rcu *opt = inet_rsk(req)->opt;
376 struct net *net = sock_net(sk); 377 struct net *net = sock_net(sk);
378 int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS;
377 379
380 if (nocache)
381 flags |= FLOWI_FLAG_RT_NOCACHE;
378 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 382 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
379 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 383 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
380 sk->sk_protocol, 384 sk->sk_protocol,
381 inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, 385 flags,
382 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, 386 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
383 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); 387 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
384 security_req_classify_flow(req, flowi4_to_flowi(fl4)); 388 security_req_classify_flow(req, flowi4_to_flowi(fl4));
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 46d1e7199a8c..38064a285cca 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -46,9 +46,6 @@ struct inet_diag_entry {
46 u16 userlocks; 46 u16 userlocks;
47}; 47};
48 48
49#define INET_DIAG_PUT(skb, attrtype, attrlen) \
50 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
51
52static DEFINE_MUTEX(inet_diag_table_mutex); 49static DEFINE_MUTEX(inet_diag_table_mutex);
53 50
54static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 51static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
@@ -78,24 +75,22 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
78 const struct inet_sock *inet = inet_sk(sk); 75 const struct inet_sock *inet = inet_sk(sk);
79 struct inet_diag_msg *r; 76 struct inet_diag_msg *r;
80 struct nlmsghdr *nlh; 77 struct nlmsghdr *nlh;
78 struct nlattr *attr;
81 void *info = NULL; 79 void *info = NULL;
82 struct inet_diag_meminfo *minfo = NULL;
83 unsigned char *b = skb_tail_pointer(skb);
84 const struct inet_diag_handler *handler; 80 const struct inet_diag_handler *handler;
85 int ext = req->idiag_ext; 81 int ext = req->idiag_ext;
86 82
87 handler = inet_diag_table[req->sdiag_protocol]; 83 handler = inet_diag_table[req->sdiag_protocol];
88 BUG_ON(handler == NULL); 84 BUG_ON(handler == NULL);
89 85
90 nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); 86 nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
91 nlh->nlmsg_flags = nlmsg_flags; 87 nlmsg_flags);
88 if (!nlh)
89 return -EMSGSIZE;
92 90
93 r = NLMSG_DATA(nlh); 91 r = nlmsg_data(nlh);
94 BUG_ON(sk->sk_state == TCP_TIME_WAIT); 92 BUG_ON(sk->sk_state == TCP_TIME_WAIT);
95 93
96 if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
97 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
98
99 r->idiag_family = sk->sk_family; 94 r->idiag_family = sk->sk_family;
100 r->idiag_state = sk->sk_state; 95 r->idiag_state = sk->sk_state;
101 r->idiag_timer = 0; 96 r->idiag_timer = 0;
@@ -113,7 +108,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
113 * hence this needs to be included regardless of socket family. 108 * hence this needs to be included regardless of socket family.
114 */ 109 */
115 if (ext & (1 << (INET_DIAG_TOS - 1))) 110 if (ext & (1 << (INET_DIAG_TOS - 1)))
116 RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); 111 if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
112 goto errout;
117 113
118#if IS_ENABLED(CONFIG_IPV6) 114#if IS_ENABLED(CONFIG_IPV6)
119 if (r->idiag_family == AF_INET6) { 115 if (r->idiag_family == AF_INET6) {
@@ -121,24 +117,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
121 117
122 *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; 118 *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
123 *(struct in6_addr *)r->id.idiag_dst = np->daddr; 119 *(struct in6_addr *)r->id.idiag_dst = np->daddr;
120
124 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 121 if (ext & (1 << (INET_DIAG_TCLASS - 1)))
125 RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); 122 if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0)
123 goto errout;
126 } 124 }
127#endif 125#endif
128 126
129 r->idiag_uid = sock_i_uid(sk); 127 r->idiag_uid = sock_i_uid(sk);
130 r->idiag_inode = sock_i_ino(sk); 128 r->idiag_inode = sock_i_ino(sk);
131 129
132 if (minfo) { 130 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
133 minfo->idiag_rmem = sk_rmem_alloc_get(sk); 131 struct inet_diag_meminfo minfo = {
134 minfo->idiag_wmem = sk->sk_wmem_queued; 132 .idiag_rmem = sk_rmem_alloc_get(sk),
135 minfo->idiag_fmem = sk->sk_forward_alloc; 133 .idiag_wmem = sk->sk_wmem_queued,
136 minfo->idiag_tmem = sk_wmem_alloc_get(sk); 134 .idiag_fmem = sk->sk_forward_alloc,
135 .idiag_tmem = sk_wmem_alloc_get(sk),
136 };
137
138 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
139 goto errout;
137 } 140 }
138 141
139 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) 142 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
140 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) 143 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
141 goto rtattr_failure; 144 goto errout;
142 145
143 if (icsk == NULL) { 146 if (icsk == NULL) {
144 handler->idiag_get_info(sk, r, NULL); 147 handler->idiag_get_info(sk, r, NULL);
@@ -165,16 +168,20 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
165 } 168 }
166#undef EXPIRES_IN_MS 169#undef EXPIRES_IN_MS
167 170
168 if (ext & (1 << (INET_DIAG_INFO - 1))) 171 if (ext & (1 << (INET_DIAG_INFO - 1))) {
169 info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); 172 attr = nla_reserve(skb, INET_DIAG_INFO,
173 sizeof(struct tcp_info));
174 if (!attr)
175 goto errout;
170 176
171 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { 177 info = nla_data(attr);
172 const size_t len = strlen(icsk->icsk_ca_ops->name);
173
174 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
175 icsk->icsk_ca_ops->name);
176 } 178 }
177 179
180 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops)
181 if (nla_put_string(skb, INET_DIAG_CONG,
182 icsk->icsk_ca_ops->name) < 0)
183 goto errout;
184
178 handler->idiag_get_info(sk, r, info); 185 handler->idiag_get_info(sk, r, info);
179 186
180 if (sk->sk_state < TCP_TIME_WAIT && 187 if (sk->sk_state < TCP_TIME_WAIT &&
@@ -182,12 +189,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
182 icsk->icsk_ca_ops->get_info(sk, ext, skb); 189 icsk->icsk_ca_ops->get_info(sk, ext, skb);
183 190
184out: 191out:
185 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 192 return nlmsg_end(skb, nlh);
186 return skb->len;
187 193
188rtattr_failure: 194errout:
189nlmsg_failure: 195 nlmsg_cancel(skb, nlh);
190 nlmsg_trim(skb, b);
191 return -EMSGSIZE; 196 return -EMSGSIZE;
192} 197}
193EXPORT_SYMBOL_GPL(inet_sk_diag_fill); 198EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
@@ -208,14 +213,15 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
208{ 213{
209 long tmo; 214 long tmo;
210 struct inet_diag_msg *r; 215 struct inet_diag_msg *r;
211 const unsigned char *previous_tail = skb_tail_pointer(skb); 216 struct nlmsghdr *nlh;
212 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
213 unlh->nlmsg_type, sizeof(*r));
214 217
215 r = NLMSG_DATA(nlh); 218 nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
216 BUG_ON(tw->tw_state != TCP_TIME_WAIT); 219 nlmsg_flags);
220 if (!nlh)
221 return -EMSGSIZE;
217 222
218 nlh->nlmsg_flags = nlmsg_flags; 223 r = nlmsg_data(nlh);
224 BUG_ON(tw->tw_state != TCP_TIME_WAIT);
219 225
220 tmo = tw->tw_ttd - jiffies; 226 tmo = tw->tw_ttd - jiffies;
221 if (tmo < 0) 227 if (tmo < 0)
@@ -245,11 +251,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
245 *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; 251 *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
246 } 252 }
247#endif 253#endif
248 nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; 254
249 return skb->len; 255 return nlmsg_end(skb, nlh);
250nlmsg_failure:
251 nlmsg_trim(skb, previous_tail);
252 return -EMSGSIZE;
253} 256}
254 257
255static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 258static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
@@ -298,20 +301,20 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
298 if (err) 301 if (err)
299 goto out; 302 goto out;
300 303
301 err = -ENOMEM; 304 rep = nlmsg_new(sizeof(struct inet_diag_msg) +
302 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + 305 sizeof(struct inet_diag_meminfo) +
303 sizeof(struct inet_diag_meminfo) + 306 sizeof(struct tcp_info) + 64, GFP_KERNEL);
304 sizeof(struct tcp_info) + 64)), 307 if (!rep) {
305 GFP_KERNEL); 308 err = -ENOMEM;
306 if (!rep)
307 goto out; 309 goto out;
310 }
308 311
309 err = sk_diag_fill(sk, rep, req, 312 err = sk_diag_fill(sk, rep, req,
310 NETLINK_CB(in_skb).pid, 313 NETLINK_CB(in_skb).pid,
311 nlh->nlmsg_seq, 0, nlh); 314 nlh->nlmsg_seq, 0, nlh);
312 if (err < 0) { 315 if (err < 0) {
313 WARN_ON(err == -EMSGSIZE); 316 WARN_ON(err == -EMSGSIZE);
314 kfree_skb(rep); 317 nlmsg_free(rep);
315 goto out; 318 goto out;
316 } 319 }
317 err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, 320 err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
@@ -592,15 +595,16 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
592{ 595{
593 const struct inet_request_sock *ireq = inet_rsk(req); 596 const struct inet_request_sock *ireq = inet_rsk(req);
594 struct inet_sock *inet = inet_sk(sk); 597 struct inet_sock *inet = inet_sk(sk);
595 unsigned char *b = skb_tail_pointer(skb);
596 struct inet_diag_msg *r; 598 struct inet_diag_msg *r;
597 struct nlmsghdr *nlh; 599 struct nlmsghdr *nlh;
598 long tmo; 600 long tmo;
599 601
600 nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); 602 nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
601 nlh->nlmsg_flags = NLM_F_MULTI; 603 NLM_F_MULTI);
602 r = NLMSG_DATA(nlh); 604 if (!nlh)
605 return -EMSGSIZE;
603 606
607 r = nlmsg_data(nlh);
604 r->idiag_family = sk->sk_family; 608 r->idiag_family = sk->sk_family;
605 r->idiag_state = TCP_SYN_RECV; 609 r->idiag_state = TCP_SYN_RECV;
606 r->idiag_timer = 1; 610 r->idiag_timer = 1;
@@ -628,13 +632,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
628 *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; 632 *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
629 } 633 }
630#endif 634#endif
631 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
632
633 return skb->len;
634 635
635nlmsg_failure: 636 return nlmsg_end(skb, nlh);
636 nlmsg_trim(skb, b);
637 return -1;
638} 637}
639 638
640static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 639static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
@@ -892,7 +891,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
892 if (nlmsg_attrlen(cb->nlh, hdrlen)) 891 if (nlmsg_attrlen(cb->nlh, hdrlen))
893 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); 892 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
894 893
895 return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); 894 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc);
896} 895}
897 896
898static inline int inet_diag_type2proto(int type) 897static inline int inet_diag_type2proto(int type)
@@ -909,7 +908,7 @@ static inline int inet_diag_type2proto(int type)
909 908
910static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) 909static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
911{ 910{
912 struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); 911 struct inet_diag_req *rc = nlmsg_data(cb->nlh);
913 struct inet_diag_req_v2 req; 912 struct inet_diag_req_v2 req;
914 struct nlattr *bc = NULL; 913 struct nlattr *bc = NULL;
915 int hdrlen = sizeof(struct inet_diag_req); 914 int hdrlen = sizeof(struct inet_diag_req);
@@ -929,7 +928,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c
929static int inet_diag_get_exact_compat(struct sk_buff *in_skb, 928static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
930 const struct nlmsghdr *nlh) 929 const struct nlmsghdr *nlh)
931{ 930{
932 struct inet_diag_req *rc = NLMSG_DATA(nlh); 931 struct inet_diag_req *rc = nlmsg_data(nlh);
933 struct inet_diag_req_v2 req; 932 struct inet_diag_req_v2 req;
934 933
935 req.sdiag_family = rc->idiag_family; 934 req.sdiag_family = rc->idiag_family;
@@ -996,7 +995,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
996 } 995 }
997 } 996 }
998 997
999 return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); 998 return inet_diag_get_exact(skb, h, nlmsg_data(h));
1000} 999}
1001 1000
1002static const struct sock_diag_handler inet_diag_handler = { 1001static const struct sock_diag_handler inet_diag_handler = {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5ff2a51b6d0c..85190e69297b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
243 if (q == NULL) 243 if (q == NULL)
244 return NULL; 244 return NULL;
245 245
246 q->net = nf;
246 f->constructor(q, arg); 247 f->constructor(q, arg);
247 atomic_add(f->qsize, &nf->mem); 248 atomic_add(f->qsize, &nf->mem);
248 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 249 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
249 spin_lock_init(&q->lock); 250 spin_lock_init(&q->lock);
250 atomic_set(&q->refcnt, 1); 251 atomic_set(&q->refcnt, 1);
251 q->net = nf;
252 252
253 return q; 253 return q;
254} 254}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index dfba343b2509..da90a8cab614 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -82,23 +82,39 @@ static const struct inet_peer peer_fake_node = {
82 .avl_height = 0 82 .avl_height = 0
83}; 83};
84 84
85struct inet_peer_base { 85void inet_peer_base_init(struct inet_peer_base *bp)
86 struct inet_peer __rcu *root; 86{
87 seqlock_t lock; 87 bp->root = peer_avl_empty_rcu;
88 int total; 88 seqlock_init(&bp->lock);
89}; 89 bp->flush_seq = ~0U;
90 bp->total = 0;
91}
92EXPORT_SYMBOL_GPL(inet_peer_base_init);
90 93
91static struct inet_peer_base v4_peers = { 94static atomic_t v4_seq = ATOMIC_INIT(0);
92 .root = peer_avl_empty_rcu, 95static atomic_t v6_seq = ATOMIC_INIT(0);
93 .lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
94 .total = 0,
95};
96 96
97static struct inet_peer_base v6_peers = { 97static atomic_t *inetpeer_seq_ptr(int family)
98 .root = peer_avl_empty_rcu, 98{
99 .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), 99 return (family == AF_INET ? &v4_seq : &v6_seq);
100 .total = 0, 100}
101}; 101
102static inline void flush_check(struct inet_peer_base *base, int family)
103{
104 atomic_t *fp = inetpeer_seq_ptr(family);
105
106 if (unlikely(base->flush_seq != atomic_read(fp))) {
107 inetpeer_invalidate_tree(base);
108 base->flush_seq = atomic_read(fp);
109 }
110}
111
112void inetpeer_invalidate_family(int family)
113{
114 atomic_t *fp = inetpeer_seq_ptr(family);
115
116 atomic_inc(fp);
117}
102 118
103#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 119#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
104 120
@@ -110,7 +126,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
110 126
111static void inetpeer_gc_worker(struct work_struct *work) 127static void inetpeer_gc_worker(struct work_struct *work)
112{ 128{
113 struct inet_peer *p, *n; 129 struct inet_peer *p, *n, *c;
114 LIST_HEAD(list); 130 LIST_HEAD(list);
115 131
116 spin_lock_bh(&gc_lock); 132 spin_lock_bh(&gc_lock);
@@ -122,17 +138,19 @@ static void inetpeer_gc_worker(struct work_struct *work)
122 138
123 list_for_each_entry_safe(p, n, &list, gc_list) { 139 list_for_each_entry_safe(p, n, &list, gc_list) {
124 140
125 if(need_resched()) 141 if (need_resched())
126 cond_resched(); 142 cond_resched();
127 143
128 if (p->avl_left != peer_avl_empty) { 144 c = rcu_dereference_protected(p->avl_left, 1);
129 list_add_tail(&p->avl_left->gc_list, &list); 145 if (c != peer_avl_empty) {
130 p->avl_left = peer_avl_empty; 146 list_add_tail(&c->gc_list, &list);
147 p->avl_left = peer_avl_empty_rcu;
131 } 148 }
132 149
133 if (p->avl_right != peer_avl_empty) { 150 c = rcu_dereference_protected(p->avl_right, 1);
134 list_add_tail(&p->avl_right->gc_list, &list); 151 if (c != peer_avl_empty) {
135 p->avl_right = peer_avl_empty; 152 list_add_tail(&c->gc_list, &list);
153 p->avl_right = peer_avl_empty_rcu;
136 } 154 }
137 155
138 n = list_entry(p->gc_list.next, struct inet_peer, gc_list); 156 n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
@@ -401,11 +419,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
401 call_rcu(&p->rcu, inetpeer_free_rcu); 419 call_rcu(&p->rcu, inetpeer_free_rcu);
402} 420}
403 421
404static struct inet_peer_base *family_to_base(int family)
405{
406 return family == AF_INET ? &v4_peers : &v6_peers;
407}
408
409/* perform garbage collect on all items stacked during a lookup */ 422/* perform garbage collect on all items stacked during a lookup */
410static int inet_peer_gc(struct inet_peer_base *base, 423static int inet_peer_gc(struct inet_peer_base *base,
411 struct inet_peer __rcu **stack[PEER_MAXDEPTH], 424 struct inet_peer __rcu **stack[PEER_MAXDEPTH],
@@ -443,14 +456,17 @@ static int inet_peer_gc(struct inet_peer_base *base,
443 return cnt; 456 return cnt;
444} 457}
445 458
446struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) 459struct inet_peer *inet_getpeer(struct inet_peer_base *base,
460 const struct inetpeer_addr *daddr,
461 int create)
447{ 462{
448 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 463 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
449 struct inet_peer_base *base = family_to_base(daddr->family);
450 struct inet_peer *p; 464 struct inet_peer *p;
451 unsigned int sequence; 465 unsigned int sequence;
452 int invalidated, gccnt = 0; 466 int invalidated, gccnt = 0;
453 467
468 flush_check(base, daddr->family);
469
454 /* Attempt a lockless lookup first. 470 /* Attempt a lockless lookup first.
455 * Because of a concurrent writer, we might not find an existing entry. 471 * Because of a concurrent writer, we might not find an existing entry.
456 */ 472 */
@@ -571,26 +587,19 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
571 schedule_delayed_work(&gc_work, gc_delay); 587 schedule_delayed_work(&gc_work, gc_delay);
572} 588}
573 589
574void inetpeer_invalidate_tree(int family) 590void inetpeer_invalidate_tree(struct inet_peer_base *base)
575{ 591{
576 struct inet_peer *old, *new, *prev; 592 struct inet_peer *root;
577 struct inet_peer_base *base = family_to_base(family);
578 593
579 write_seqlock_bh(&base->lock); 594 write_seqlock_bh(&base->lock);
580 595
581 old = base->root; 596 root = rcu_deref_locked(base->root, base);
582 if (old == peer_avl_empty_rcu) 597 if (root != peer_avl_empty) {
583 goto out; 598 base->root = peer_avl_empty_rcu;
584
585 new = peer_avl_empty_rcu;
586
587 prev = cmpxchg(&base->root, old, new);
588 if (prev == old) {
589 base->total = 0; 599 base->total = 0;
590 call_rcu(&prev->gc_rcu, inetpeer_inval_rcu); 600 call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
591 } 601 }
592 602
593out:
594 write_sequnlock_bh(&base->lock); 603 write_sequnlock_bh(&base->lock);
595} 604}
596EXPORT_SYMBOL(inetpeer_invalidate_tree); 605EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9dbd3dd6022d..8d07c973409c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
171static void ip4_frag_init(struct inet_frag_queue *q, void *a) 171static void ip4_frag_init(struct inet_frag_queue *q, void *a)
172{ 172{
173 struct ipq *qp = container_of(q, struct ipq, q); 173 struct ipq *qp = container_of(q, struct ipq, q);
174 struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
175 frags);
176 struct net *net = container_of(ipv4, struct net, ipv4);
177
174 struct ip4_create_arg *arg = a; 178 struct ip4_create_arg *arg = a;
175 179
176 qp->protocol = arg->iph->protocol; 180 qp->protocol = arg->iph->protocol;
@@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
180 qp->daddr = arg->iph->daddr; 184 qp->daddr = arg->iph->daddr;
181 qp->user = arg->user; 185 qp->user = arg->user;
182 qp->peer = sysctl_ipfrag_max_dist ? 186 qp->peer = sysctl_ipfrag_max_dist ?
183 inet_getpeer_v4(arg->iph->saddr, 1) : NULL; 187 inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
184} 188}
185 189
186static __inline__ void ip4_frag_free(struct inet_frag_queue *q) 190static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f49047b79609..594cec35ac4d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
516 case ICMP_PORT_UNREACH: 516 case ICMP_PORT_UNREACH:
517 /* Impossible event. */ 517 /* Impossible event. */
518 return; 518 return;
519 case ICMP_FRAG_NEEDED:
520 /* Soft state for pmtu is maintained by IP core. */
521 return;
522 default: 519 default:
523 /* All others are translated to HOST_UNREACH. 520 /* All others are translated to HOST_UNREACH.
524 rfc2003 contains "deep thoughts" about NET_UNREACH, 521 rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
538 flags & GRE_KEY ? 535 flags & GRE_KEY ?
539 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 536 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
540 p[1]); 537 p[1]);
541 if (t == NULL || t->parms.iph.daddr == 0 || 538 if (t == NULL)
539 goto out;
540
541 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
542 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
543 t->parms.link, 0, IPPROTO_GRE, 0);
544 goto out;
545 }
546
547 if (t->parms.iph.daddr == 0 ||
542 ipv4_is_multicast(t->parms.iph.daddr)) 548 ipv4_is_multicast(t->parms.iph.daddr))
543 goto out; 549 goto out;
544 550
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 8590144ca330..b27d4440f523 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
198 rcu_read_lock(); 198 rcu_read_lock();
199 { 199 {
200 int protocol = ip_hdr(skb)->protocol; 200 int protocol = ip_hdr(skb)->protocol;
201 int hash, raw;
202 const struct net_protocol *ipprot; 201 const struct net_protocol *ipprot;
202 int raw;
203 203
204 resubmit: 204 resubmit:
205 raw = raw_local_deliver(skb, protocol); 205 raw = raw_local_deliver(skb, protocol);
206 206
207 hash = protocol & (MAX_INET_PROTOS - 1); 207 ipprot = rcu_dereference(inet_protos[protocol]);
208 ipprot = rcu_dereference(inet_protos[hash]);
209 if (ipprot != NULL) { 208 if (ipprot != NULL) {
210 int ret; 209 int ret;
211 210
@@ -314,26 +313,33 @@ drop:
314 return true; 313 return true;
315} 314}
316 315
316int sysctl_ip_early_demux __read_mostly = 1;
317
317static int ip_rcv_finish(struct sk_buff *skb) 318static int ip_rcv_finish(struct sk_buff *skb)
318{ 319{
319 const struct iphdr *iph = ip_hdr(skb); 320 const struct iphdr *iph = ip_hdr(skb);
320 struct rtable *rt; 321 struct rtable *rt;
321 322
323 if (sysctl_ip_early_demux && !skb_dst(skb)) {
324 const struct net_protocol *ipprot;
325 int protocol = iph->protocol;
326
327 rcu_read_lock();
328 ipprot = rcu_dereference(inet_protos[protocol]);
329 if (ipprot && ipprot->early_demux)
330 ipprot->early_demux(skb);
331 rcu_read_unlock();
332 }
333
322 /* 334 /*
323 * Initialise the virtual path cache for the packet. It describes 335 * Initialise the virtual path cache for the packet. It describes
324 * how the packet travels inside Linux networking. 336 * how the packet travels inside Linux networking.
325 */ 337 */
326 if (skb_dst(skb) == NULL) { 338 if (!skb_dst(skb)) {
327 int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, 339 int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
328 iph->tos, skb->dev); 340 iph->tos, skb->dev);
329 if (unlikely(err)) { 341 if (unlikely(err)) {
330 if (err == -EHOSTUNREACH) 342 if (err == -EXDEV)
331 IP_INC_STATS_BH(dev_net(skb->dev),
332 IPSTATS_MIB_INADDRERRORS);
333 else if (err == -ENETUNREACH)
334 IP_INC_STATS_BH(dev_net(skb->dev),
335 IPSTATS_MIB_INNOROUTES);
336 else if (err == -EXDEV)
337 NET_INC_STATS_BH(dev_net(skb->dev), 343 NET_INC_STATS_BH(dev_net(skb->dev),
338 LINUX_MIB_IPRPFILTER); 344 LINUX_MIB_IPRPFILTER);
339 goto drop; 345 goto drop;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 708b99494e23..766dfe56885a 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -27,6 +27,7 @@
27#include <net/icmp.h> 27#include <net/icmp.h>
28#include <net/route.h> 28#include <net/route.h>
29#include <net/cipso_ipv4.h> 29#include <net/cipso_ipv4.h>
30#include <net/ip_fib.h>
30 31
31/* 32/*
32 * Write options to IP header, record destination address to 33 * Write options to IP header, record destination address to
@@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
104 sptr = skb_network_header(skb); 105 sptr = skb_network_header(skb);
105 dptr = dopt->__data; 106 dptr = dopt->__data;
106 107
107 daddr = skb_rtable(skb)->rt_spec_dst; 108 daddr = fib_compute_spec_dst(skb);
108 109
109 if (sopt->rr) { 110 if (sopt->rr) {
110 optlen = sptr[sopt->rr+1]; 111 optlen = sptr[sopt->rr+1];
@@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb)
250int ip_options_compile(struct net *net, 251int ip_options_compile(struct net *net,
251 struct ip_options *opt, struct sk_buff *skb) 252 struct ip_options *opt, struct sk_buff *skb)
252{ 253{
253 int l; 254 __be32 spec_dst = (__force __be32) 0;
254 unsigned char *iph;
255 unsigned char *optptr;
256 int optlen;
257 unsigned char *pp_ptr = NULL; 255 unsigned char *pp_ptr = NULL;
258 struct rtable *rt = NULL; 256 unsigned char *optptr;
257 unsigned char *iph;
258 int optlen, l;
259 259
260 if (skb != NULL) { 260 if (skb != NULL) {
261 rt = skb_rtable(skb); 261 spec_dst = fib_compute_spec_dst(skb);
262 optptr = (unsigned char *)&(ip_hdr(skb)[1]); 262 optptr = (unsigned char *)&(ip_hdr(skb)[1]);
263 } else 263 } else
264 optptr = opt->__data; 264 optptr = opt->__data;
@@ -330,8 +330,8 @@ int ip_options_compile(struct net *net,
330 pp_ptr = optptr + 2; 330 pp_ptr = optptr + 2;
331 goto error; 331 goto error;
332 } 332 }
333 if (rt) { 333 if (skb) {
334 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 334 memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
335 opt->is_changed = 1; 335 opt->is_changed = 1;
336 } 336 }
337 optptr[2] += 4; 337 optptr[2] += 4;
@@ -372,8 +372,8 @@ int ip_options_compile(struct net *net,
372 goto error; 372 goto error;
373 } 373 }
374 opt->ts = optptr - iph; 374 opt->ts = optptr - iph;
375 if (rt) { 375 if (skb) {
376 memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); 376 memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
377 timeptr = &optptr[optptr[2]+3]; 377 timeptr = &optptr[optptr[2]+3];
378 } 378 }
379 opt->ts_needaddr = 1; 379 opt->ts_needaddr = 1;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 451f97c42eb4..2630900e480a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb)
113} 113}
114EXPORT_SYMBOL_GPL(ip_local_out); 114EXPORT_SYMBOL_GPL(ip_local_out);
115 115
116/* dev_loopback_xmit for use with netfilter. */
117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
118{
119 skb_reset_mac_header(newskb);
120 __skb_pull(newskb, skb_network_offset(newskb));
121 newskb->pkt_type = PACKET_LOOPBACK;
122 newskb->ip_summed = CHECKSUM_UNNECESSARY;
123 WARN_ON(!skb_dst(newskb));
124 skb_dst_force(newskb);
125 netif_rx_ni(newskb);
126 return 0;
127}
128
129static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
130{ 117{
131 int ttl = inet->uc_ttl; 118 int ttl = inet->uc_ttl;
@@ -200,7 +187,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
200 } 187 }
201 if (skb->sk) 188 if (skb->sk)
202 skb_set_owner_w(skb2, skb->sk); 189 skb_set_owner_w(skb2, skb->sk);
203 kfree_skb(skb); 190 consume_skb(skb);
204 skb = skb2; 191 skb = skb2;
205 } 192 }
206 193
@@ -281,7 +268,7 @@ int ip_mc_output(struct sk_buff *skb)
281 if (newskb) 268 if (newskb)
282 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 269 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
283 newskb, NULL, newskb->dev, 270 newskb, NULL, newskb->dev,
284 ip_dev_loopback_xmit); 271 dev_loopback_xmit);
285 } 272 }
286 273
287 /* Multicasts with ttl 0 must not go beyond the host */ 274 /* Multicasts with ttl 0 must not go beyond the host */
@@ -296,7 +283,7 @@ int ip_mc_output(struct sk_buff *skb)
296 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 283 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
297 if (newskb) 284 if (newskb)
298 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, 285 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
299 NULL, newskb->dev, ip_dev_loopback_xmit); 286 NULL, newskb->dev, dev_loopback_xmit);
300 } 287 }
301 288
302 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, 289 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
@@ -709,7 +696,7 @@ slow_path:
709 696
710 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 697 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
711 } 698 }
712 kfree_skb(skb); 699 consume_skb(skb);
713 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 700 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
714 return err; 701 return err;
715 702
@@ -1472,13 +1459,14 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1472 1459
1473/* 1460/*
1474 * Generic function to send a packet as reply to another packet. 1461 * Generic function to send a packet as reply to another packet.
1475 * Used to send TCP resets so far. ICMP should use this function too. 1462 * Used to send TCP resets so far.
1476 * 1463 *
1477 * Should run single threaded per socket because it uses the sock 1464 * Should run single threaded per socket because it uses the sock
1478 * structure to pass arguments. 1465 * structure to pass arguments.
1479 */ 1466 */
1480void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, 1467void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1481 const struct ip_reply_arg *arg, unsigned int len) 1468 __be32 saddr, const struct ip_reply_arg *arg,
1469 unsigned int len)
1482{ 1470{
1483 struct inet_sock *inet = inet_sk(sk); 1471 struct inet_sock *inet = inet_sk(sk);
1484 struct ip_options_data replyopts; 1472 struct ip_options_data replyopts;
@@ -1504,7 +1492,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1504 RT_TOS(arg->tos), 1492 RT_TOS(arg->tos),
1505 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1493 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1506 ip_reply_arg_flowi_flags(arg), 1494 ip_reply_arg_flowi_flags(arg),
1507 daddr, rt->rt_spec_dst, 1495 daddr, saddr,
1508 tcp_hdr(skb)->source, tcp_hdr(skb)->dest); 1496 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1509 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 1497 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1510 rt = ip_route_output_key(sock_net(sk), &fl4); 1498 rt = ip_route_output_key(sock_net(sk), &fl4);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0d11f234d615..de29f46f68b0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -40,6 +40,7 @@
40#if IS_ENABLED(CONFIG_IPV6) 40#if IS_ENABLED(CONFIG_IPV6)
41#include <net/transp_v6.h> 41#include <net/transp_v6.h>
42#endif 42#endif
43#include <net/ip_fib.h>
43 44
44#include <linux/errqueue.h> 45#include <linux/errqueue.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
@@ -1019,8 +1020,8 @@ e_inval:
1019 * @sk: socket 1020 * @sk: socket
1020 * @skb: buffer 1021 * @skb: buffer
1021 * 1022 *
1022 * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst 1023 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1023 * in skb->cb[] before dst drop. 1024 * destination in skb->cb[] before dst drop.
1024 * This way, receiver doesnt make cache line misses to read rtable. 1025 * This way, receiver doesnt make cache line misses to read rtable.
1025 */ 1026 */
1026void ipv4_pktinfo_prepare(struct sk_buff *skb) 1027void ipv4_pktinfo_prepare(struct sk_buff *skb)
@@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb)
1030 1031
1031 if (rt) { 1032 if (rt) {
1032 pktinfo->ipi_ifindex = rt->rt_iif; 1033 pktinfo->ipi_ifindex = rt->rt_iif;
1033 pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; 1034 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1034 } else { 1035 } else {
1035 pktinfo->ipi_ifindex = 0; 1036 pktinfo->ipi_ifindex = 0;
1036 pktinfo->ipi_spec_dst.s_addr = 0; 1037 pktinfo->ipi_spec_dst.s_addr = 0;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 63b64c45a826..b91375482d84 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
42 return; 42 return;
43 NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", 43 NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n",
44 spi, &iph->daddr); 44 spi, &iph->daddr);
45 ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
45 xfrm_state_put(x); 46 xfrm_state_put(x);
46} 47}
47 48
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2d0f99bf61b3..715338a1b205 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
348 case ICMP_PORT_UNREACH: 348 case ICMP_PORT_UNREACH:
349 /* Impossible event. */ 349 /* Impossible event. */
350 return 0; 350 return 0;
351 case ICMP_FRAG_NEEDED:
352 /* Soft state for pmtu is maintained by IP core. */
353 return 0;
354 default: 351 default:
355 /* All others are translated to HOST_UNREACH. 352 /* All others are translated to HOST_UNREACH.
356 rfc2003 contains "deep thoughts" about NET_UNREACH, 353 rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
369 366
370 rcu_read_lock(); 367 rcu_read_lock();
371 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 368 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
372 if (t == NULL || t->parms.iph.daddr == 0) 369 if (t == NULL)
370 goto out;
371
372 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
373 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
374 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
375 err = 0;
376 goto out;
377 }
378
379 if (t->parms.iph.daddr == 0)
373 goto out; 380 goto out;
374 381
375 err = 0; 382 err = 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c94bbc6f2ba3..b4ac39f11d19 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2006,37 +2006,37 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2006{ 2006{
2007 int ct; 2007 int ct;
2008 struct rtnexthop *nhp; 2008 struct rtnexthop *nhp;
2009 u8 *b = skb_tail_pointer(skb); 2009 struct nlattr *mp_attr;
2010 struct rtattr *mp_head;
2011 2010
2012 /* If cache is unresolved, don't try to parse IIF and OIF */ 2011 /* If cache is unresolved, don't try to parse IIF and OIF */
2013 if (c->mfc_parent >= MAXVIFS) 2012 if (c->mfc_parent >= MAXVIFS)
2014 return -ENOENT; 2013 return -ENOENT;
2015 2014
2016 if (VIF_EXISTS(mrt, c->mfc_parent)) 2015 if (VIF_EXISTS(mrt, c->mfc_parent) &&
2017 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); 2016 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
2017 return -EMSGSIZE;
2018 2018
2019 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 2019 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
2020 return -EMSGSIZE;
2020 2021
2021 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2022 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2022 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2023 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2023 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 2024 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
2024 goto rtattr_failure; 2025 nla_nest_cancel(skb, mp_attr);
2025 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 2026 return -EMSGSIZE;
2027 }
2028
2026 nhp->rtnh_flags = 0; 2029 nhp->rtnh_flags = 0;
2027 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2030 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2028 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; 2031 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
2029 nhp->rtnh_len = sizeof(*nhp); 2032 nhp->rtnh_len = sizeof(*nhp);
2030 } 2033 }
2031 } 2034 }
2032 mp_head->rta_type = RTA_MULTIPATH; 2035
2033 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; 2036 nla_nest_end(skb, mp_attr);
2037
2034 rtm->rtm_type = RTN_MULTICAST; 2038 rtm->rtm_type = RTN_MULTICAST;
2035 return 1; 2039 return 1;
2036
2037rtattr_failure:
2038 nlmsg_trim(skb, b);
2039 return -EMSGSIZE;
2040} 2040}
2041 2041
2042int ipmr_get_route(struct net *net, struct sk_buff *skb, 2042int ipmr_get_route(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index ba5756d20165..99b3f53f16a7 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -196,12 +196,15 @@ static void ipt_ulog_packet(unsigned int hooknum,
196 196
197 pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); 197 pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
198 198
199 /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ 199 nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
200 nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, 200 sizeof(*pm)+copy_len, 0);
201 sizeof(*pm)+copy_len); 201 if (!nlh) {
202 pr_debug("error during nlmsg_put\n");
203 goto out_unlock;
204 }
202 ub->qlen++; 205 ub->qlen++;
203 206
204 pm = NLMSG_DATA(nlh); 207 pm = nlmsg_data(nlh);
205 208
206 /* We might not have a timestamp, get one */ 209 /* We might not have a timestamp, get one */
207 if (skb->tstamp.tv64 == 0) 210 if (skb->tstamp.tv64 == 0)
@@ -261,13 +264,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
261 nlh->nlmsg_type = NLMSG_DONE; 264 nlh->nlmsg_type = NLMSG_DONE;
262 ulog_send(groupnum); 265 ulog_send(groupnum);
263 } 266 }
264 267out_unlock:
265 spin_unlock_bh(&ulog_lock); 268 spin_unlock_bh(&ulog_lock);
266 269
267 return; 270 return;
268 271
269nlmsg_failure:
270 pr_debug("error during NLMSG_PUT\n");
271alloc_failure: 272alloc_failure:
272 pr_debug("Error building netlink message\n"); 273 pr_debug("Error building netlink message\n");
273 spin_unlock_bh(&ulog_lock); 274 spin_unlock_bh(&ulog_lock);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 91747d4ebc26..e7ff2dcab6ce 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -95,11 +95,11 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
95 return NF_ACCEPT; 95 return NF_ACCEPT;
96} 96}
97 97
98static unsigned int ipv4_confirm(unsigned int hooknum, 98static unsigned int ipv4_helper(unsigned int hooknum,
99 struct sk_buff *skb, 99 struct sk_buff *skb,
100 const struct net_device *in, 100 const struct net_device *in,
101 const struct net_device *out, 101 const struct net_device *out,
102 int (*okfn)(struct sk_buff *)) 102 int (*okfn)(struct sk_buff *))
103{ 103{
104 struct nf_conn *ct; 104 struct nf_conn *ct;
105 enum ip_conntrack_info ctinfo; 105 enum ip_conntrack_info ctinfo;
@@ -110,24 +110,38 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
110 /* This is where we call the helper: as the packet goes out. */ 110 /* This is where we call the helper: as the packet goes out. */
111 ct = nf_ct_get(skb, &ctinfo); 111 ct = nf_ct_get(skb, &ctinfo);
112 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 112 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
113 goto out; 113 return NF_ACCEPT;
114 114
115 help = nfct_help(ct); 115 help = nfct_help(ct);
116 if (!help) 116 if (!help)
117 goto out; 117 return NF_ACCEPT;
118 118
119 /* rcu_read_lock()ed by nf_hook_slow */ 119 /* rcu_read_lock()ed by nf_hook_slow */
120 helper = rcu_dereference(help->helper); 120 helper = rcu_dereference(help->helper);
121 if (!helper) 121 if (!helper)
122 goto out; 122 return NF_ACCEPT;
123 123
124 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 124 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
125 ct, ctinfo); 125 ct, ctinfo);
126 if (ret != NF_ACCEPT) { 126 if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
127 nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, 127 nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
128 "nf_ct_%s: dropping packet", helper->name); 128 "nf_ct_%s: dropping packet", helper->name);
129 return ret;
130 } 129 }
130 return ret;
131}
132
133static unsigned int ipv4_confirm(unsigned int hooknum,
134 struct sk_buff *skb,
135 const struct net_device *in,
136 const struct net_device *out,
137 int (*okfn)(struct sk_buff *))
138{
139 struct nf_conn *ct;
140 enum ip_conntrack_info ctinfo;
141
142 ct = nf_ct_get(skb, &ctinfo);
143 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
144 goto out;
131 145
132 /* adjust seqs for loopback traffic only in outgoing direction */ 146 /* adjust seqs for loopback traffic only in outgoing direction */
133 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 147 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
@@ -185,6 +199,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
185 .priority = NF_IP_PRI_CONNTRACK, 199 .priority = NF_IP_PRI_CONNTRACK,
186 }, 200 },
187 { 201 {
202 .hook = ipv4_helper,
203 .owner = THIS_MODULE,
204 .pf = NFPROTO_IPV4,
205 .hooknum = NF_INET_POST_ROUTING,
206 .priority = NF_IP_PRI_CONNTRACK_HELPER,
207 },
208 {
188 .hook = ipv4_confirm, 209 .hook = ipv4_confirm,
189 .owner = THIS_MODULE, 210 .owner = THIS_MODULE,
190 .pf = NFPROTO_IPV4, 211 .pf = NFPROTO_IPV4,
@@ -192,6 +213,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
192 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 213 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
193 }, 214 },
194 { 215 {
216 .hook = ipv4_helper,
217 .owner = THIS_MODULE,
218 .pf = NFPROTO_IPV4,
219 .hooknum = NF_INET_LOCAL_IN,
220 .priority = NF_IP_PRI_CONNTRACK_HELPER,
221 },
222 {
195 .hook = ipv4_confirm, 223 .hook = ipv4_confirm,
196 .owner = THIS_MODULE, 224 .owner = THIS_MODULE,
197 .pf = NFPROTO_IPV4, 225 .pf = NFPROTO_IPV4,
@@ -207,35 +235,30 @@ static int log_invalid_proto_max = 255;
207static ctl_table ip_ct_sysctl_table[] = { 235static ctl_table ip_ct_sysctl_table[] = {
208 { 236 {
209 .procname = "ip_conntrack_max", 237 .procname = "ip_conntrack_max",
210 .data = &nf_conntrack_max,
211 .maxlen = sizeof(int), 238 .maxlen = sizeof(int),
212 .mode = 0644, 239 .mode = 0644,
213 .proc_handler = proc_dointvec, 240 .proc_handler = proc_dointvec,
214 }, 241 },
215 { 242 {
216 .procname = "ip_conntrack_count", 243 .procname = "ip_conntrack_count",
217 .data = &init_net.ct.count,
218 .maxlen = sizeof(int), 244 .maxlen = sizeof(int),
219 .mode = 0444, 245 .mode = 0444,
220 .proc_handler = proc_dointvec, 246 .proc_handler = proc_dointvec,
221 }, 247 },
222 { 248 {
223 .procname = "ip_conntrack_buckets", 249 .procname = "ip_conntrack_buckets",
224 .data = &init_net.ct.htable_size,
225 .maxlen = sizeof(unsigned int), 250 .maxlen = sizeof(unsigned int),
226 .mode = 0444, 251 .mode = 0444,
227 .proc_handler = proc_dointvec, 252 .proc_handler = proc_dointvec,
228 }, 253 },
229 { 254 {
230 .procname = "ip_conntrack_checksum", 255 .procname = "ip_conntrack_checksum",
231 .data = &init_net.ct.sysctl_checksum,
232 .maxlen = sizeof(int), 256 .maxlen = sizeof(int),
233 .mode = 0644, 257 .mode = 0644,
234 .proc_handler = proc_dointvec, 258 .proc_handler = proc_dointvec,
235 }, 259 },
236 { 260 {
237 .procname = "ip_conntrack_log_invalid", 261 .procname = "ip_conntrack_log_invalid",
238 .data = &init_net.ct.sysctl_log_invalid,
239 .maxlen = sizeof(unsigned int), 262 .maxlen = sizeof(unsigned int),
240 .mode = 0644, 263 .mode = 0644,
241 .proc_handler = proc_dointvec_minmax, 264 .proc_handler = proc_dointvec_minmax,
@@ -351,6 +374,25 @@ static struct nf_sockopt_ops so_getorigdst = {
351 .owner = THIS_MODULE, 374 .owner = THIS_MODULE,
352}; 375};
353 376
377static int ipv4_init_net(struct net *net)
378{
379#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
380 struct nf_ip_net *in = &net->ct.nf_ct_proto;
381 in->ctl_table = kmemdup(ip_ct_sysctl_table,
382 sizeof(ip_ct_sysctl_table),
383 GFP_KERNEL);
384 if (!in->ctl_table)
385 return -ENOMEM;
386
387 in->ctl_table[0].data = &nf_conntrack_max;
388 in->ctl_table[1].data = &net->ct.count;
389 in->ctl_table[2].data = &net->ct.htable_size;
390 in->ctl_table[3].data = &net->ct.sysctl_checksum;
391 in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
392#endif
393 return 0;
394}
395
354struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { 396struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
355 .l3proto = PF_INET, 397 .l3proto = PF_INET,
356 .name = "ipv4", 398 .name = "ipv4",
@@ -366,8 +408,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
366#endif 408#endif
367#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 409#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
368 .ctl_table_path = "net/ipv4/netfilter", 410 .ctl_table_path = "net/ipv4/netfilter",
369 .ctl_table = ip_ct_sysctl_table,
370#endif 411#endif
412 .init_net = ipv4_init_net,
371 .me = THIS_MODULE, 413 .me = THIS_MODULE,
372}; 414};
373 415
@@ -378,6 +420,65 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
378MODULE_ALIAS("ip_conntrack"); 420MODULE_ALIAS("ip_conntrack");
379MODULE_LICENSE("GPL"); 421MODULE_LICENSE("GPL");
380 422
423static int ipv4_net_init(struct net *net)
424{
425 int ret = 0;
426
427 ret = nf_conntrack_l4proto_register(net,
428 &nf_conntrack_l4proto_tcp4);
429 if (ret < 0) {
430 pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n");
431 goto out_tcp;
432 }
433 ret = nf_conntrack_l4proto_register(net,
434 &nf_conntrack_l4proto_udp4);
435 if (ret < 0) {
436 pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n");
437 goto out_udp;
438 }
439 ret = nf_conntrack_l4proto_register(net,
440 &nf_conntrack_l4proto_icmp);
441 if (ret < 0) {
442 pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
443 goto out_icmp;
444 }
445 ret = nf_conntrack_l3proto_register(net,
446 &nf_conntrack_l3proto_ipv4);
447 if (ret < 0) {
448 pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
449 goto out_ipv4;
450 }
451 return 0;
452out_ipv4:
453 nf_conntrack_l4proto_unregister(net,
454 &nf_conntrack_l4proto_icmp);
455out_icmp:
456 nf_conntrack_l4proto_unregister(net,
457 &nf_conntrack_l4proto_udp4);
458out_udp:
459 nf_conntrack_l4proto_unregister(net,
460 &nf_conntrack_l4proto_tcp4);
461out_tcp:
462 return ret;
463}
464
465static void ipv4_net_exit(struct net *net)
466{
467 nf_conntrack_l3proto_unregister(net,
468 &nf_conntrack_l3proto_ipv4);
469 nf_conntrack_l4proto_unregister(net,
470 &nf_conntrack_l4proto_icmp);
471 nf_conntrack_l4proto_unregister(net,
472 &nf_conntrack_l4proto_udp4);
473 nf_conntrack_l4proto_unregister(net,
474 &nf_conntrack_l4proto_tcp4);
475}
476
477static struct pernet_operations ipv4_net_ops = {
478 .init = ipv4_net_init,
479 .exit = ipv4_net_exit,
480};
481
381static int __init nf_conntrack_l3proto_ipv4_init(void) 482static int __init nf_conntrack_l3proto_ipv4_init(void)
382{ 483{
383 int ret = 0; 484 int ret = 0;
@@ -391,35 +492,17 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
391 return ret; 492 return ret;
392 } 493 }
393 494
394 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); 495 ret = register_pernet_subsys(&ipv4_net_ops);
395 if (ret < 0) { 496 if (ret < 0) {
396 pr_err("nf_conntrack_ipv4: can't register tcp.\n"); 497 pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
397 goto cleanup_sockopt; 498 goto cleanup_sockopt;
398 } 499 }
399 500
400 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
401 if (ret < 0) {
402 pr_err("nf_conntrack_ipv4: can't register udp.\n");
403 goto cleanup_tcp;
404 }
405
406 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
407 if (ret < 0) {
408 pr_err("nf_conntrack_ipv4: can't register icmp.\n");
409 goto cleanup_udp;
410 }
411
412 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
413 if (ret < 0) {
414 pr_err("nf_conntrack_ipv4: can't register ipv4\n");
415 goto cleanup_icmp;
416 }
417
418 ret = nf_register_hooks(ipv4_conntrack_ops, 501 ret = nf_register_hooks(ipv4_conntrack_ops,
419 ARRAY_SIZE(ipv4_conntrack_ops)); 502 ARRAY_SIZE(ipv4_conntrack_ops));
420 if (ret < 0) { 503 if (ret < 0) {
421 pr_err("nf_conntrack_ipv4: can't register hooks.\n"); 504 pr_err("nf_conntrack_ipv4: can't register hooks.\n");
422 goto cleanup_ipv4; 505 goto cleanup_pernet;
423 } 506 }
424#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 507#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
425 ret = nf_conntrack_ipv4_compat_init(); 508 ret = nf_conntrack_ipv4_compat_init();
@@ -431,14 +514,8 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
431 cleanup_hooks: 514 cleanup_hooks:
432 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 515 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
433#endif 516#endif
434 cleanup_ipv4: 517 cleanup_pernet:
435 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 518 unregister_pernet_subsys(&ipv4_net_ops);
436 cleanup_icmp:
437 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
438 cleanup_udp:
439 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
440 cleanup_tcp:
441 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
442 cleanup_sockopt: 519 cleanup_sockopt:
443 nf_unregister_sockopt(&so_getorigdst); 520 nf_unregister_sockopt(&so_getorigdst);
444 return ret; 521 return ret;
@@ -451,10 +528,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
451 nf_conntrack_ipv4_compat_fini(); 528 nf_conntrack_ipv4_compat_fini();
452#endif 529#endif
453 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 530 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
454 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 531 unregister_pernet_subsys(&ipv4_net_ops);
455 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
456 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
457 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
458 nf_unregister_sockopt(&so_getorigdst); 532 nf_unregister_sockopt(&so_getorigdst);
459} 533}
460 534
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 0847e373d33c..041923cb67ad 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -23,6 +23,11 @@
23 23
24static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; 24static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
25 25
26static inline struct nf_icmp_net *icmp_pernet(struct net *net)
27{
28 return &net->ct.nf_ct_proto.icmp;
29}
30
26static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 31static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
27 struct nf_conntrack_tuple *tuple) 32 struct nf_conntrack_tuple *tuple)
28{ 33{
@@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s,
77 82
78static unsigned int *icmp_get_timeouts(struct net *net) 83static unsigned int *icmp_get_timeouts(struct net *net)
79{ 84{
80 return &nf_ct_icmp_timeout; 85 return &icmp_pernet(net)->timeout;
81} 86}
82 87
83/* Returns verdict for packet, or -1 for invalid. */ 88/* Returns verdict for packet, or -1 for invalid. */
@@ -274,16 +279,18 @@ static int icmp_nlattr_tuple_size(void)
274#include <linux/netfilter/nfnetlink.h> 279#include <linux/netfilter/nfnetlink.h>
275#include <linux/netfilter/nfnetlink_cttimeout.h> 280#include <linux/netfilter/nfnetlink_cttimeout.h>
276 281
277static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) 282static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
283 struct net *net, void *data)
278{ 284{
279 unsigned int *timeout = data; 285 unsigned int *timeout = data;
286 struct nf_icmp_net *in = icmp_pernet(net);
280 287
281 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { 288 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
282 *timeout = 289 *timeout =
283 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; 290 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
284 } else { 291 } else {
285 /* Set default ICMP timeout. */ 292 /* Set default ICMP timeout. */
286 *timeout = nf_ct_icmp_timeout; 293 *timeout = in->timeout;
287 } 294 }
288 return 0; 295 return 0;
289} 296}
@@ -308,11 +315,9 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
308#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 315#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
309 316
310#ifdef CONFIG_SYSCTL 317#ifdef CONFIG_SYSCTL
311static struct ctl_table_header *icmp_sysctl_header;
312static struct ctl_table icmp_sysctl_table[] = { 318static struct ctl_table icmp_sysctl_table[] = {
313 { 319 {
314 .procname = "nf_conntrack_icmp_timeout", 320 .procname = "nf_conntrack_icmp_timeout",
315 .data = &nf_ct_icmp_timeout,
316 .maxlen = sizeof(unsigned int), 321 .maxlen = sizeof(unsigned int),
317 .mode = 0644, 322 .mode = 0644,
318 .proc_handler = proc_dointvec_jiffies, 323 .proc_handler = proc_dointvec_jiffies,
@@ -323,7 +328,6 @@ static struct ctl_table icmp_sysctl_table[] = {
323static struct ctl_table icmp_compat_sysctl_table[] = { 328static struct ctl_table icmp_compat_sysctl_table[] = {
324 { 329 {
325 .procname = "ip_conntrack_icmp_timeout", 330 .procname = "ip_conntrack_icmp_timeout",
326 .data = &nf_ct_icmp_timeout,
327 .maxlen = sizeof(unsigned int), 331 .maxlen = sizeof(unsigned int),
328 .mode = 0644, 332 .mode = 0644,
329 .proc_handler = proc_dointvec_jiffies, 333 .proc_handler = proc_dointvec_jiffies,
@@ -333,6 +337,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = {
333#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ 337#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
334#endif /* CONFIG_SYSCTL */ 338#endif /* CONFIG_SYSCTL */
335 339
340static int icmp_init_net(struct net *net)
341{
342 struct nf_icmp_net *in = icmp_pernet(net);
343 struct nf_proto_net *pn = (struct nf_proto_net *)in;
344 in->timeout = nf_ct_icmp_timeout;
345
346#ifdef CONFIG_SYSCTL
347 pn->ctl_table = kmemdup(icmp_sysctl_table,
348 sizeof(icmp_sysctl_table),
349 GFP_KERNEL);
350 if (!pn->ctl_table)
351 return -ENOMEM;
352 pn->ctl_table[0].data = &in->timeout;
353#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
354 pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
355 sizeof(icmp_compat_sysctl_table),
356 GFP_KERNEL);
357 if (!pn->ctl_compat_table) {
358 kfree(pn->ctl_table);
359 pn->ctl_table = NULL;
360 return -ENOMEM;
361 }
362 pn->ctl_compat_table[0].data = &in->timeout;
363#endif
364#endif
365 return 0;
366}
367
336struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = 368struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
337{ 369{
338 .l3proto = PF_INET, 370 .l3proto = PF_INET,
@@ -362,11 +394,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
362 .nla_policy = icmp_timeout_nla_policy, 394 .nla_policy = icmp_timeout_nla_policy,
363 }, 395 },
364#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 396#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
365#ifdef CONFIG_SYSCTL 397 .init_net = icmp_init_net,
366 .ctl_table_header = &icmp_sysctl_header,
367 .ctl_table = icmp_sysctl_table,
368#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
369 .ctl_compat_table = icmp_compat_sysctl_table,
370#endif
371#endif
372}; 398};
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 9bb1b8a37a22..742815518b0f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -94,14 +94,14 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
94 { 94 {
95 .hook = ipv4_conntrack_defrag, 95 .hook = ipv4_conntrack_defrag,
96 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
97 .pf = PF_INET, 97 .pf = NFPROTO_IPV4,
98 .hooknum = NF_INET_PRE_ROUTING, 98 .hooknum = NF_INET_PRE_ROUTING,
99 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 99 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
100 }, 100 },
101 { 101 {
102 .hook = ipv4_conntrack_defrag, 102 .hook = ipv4_conntrack_defrag,
103 .owner = THIS_MODULE, 103 .owner = THIS_MODULE,
104 .pf = PF_INET, 104 .pf = NFPROTO_IPV4,
105 .hooknum = NF_INET_LOCAL_OUT, 105 .hooknum = NF_INET_LOCAL_OUT,
106 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 106 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
107 }, 107 },
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 7b22382ff0e9..3c04d24e2976 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -13,10 +13,10 @@
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/udp.h> 14#include <linux/udp.h>
15 15
16#include <net/netfilter/nf_nat_helper.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_conntrack_helper.h> 16#include <net/netfilter/nf_conntrack_helper.h>
19#include <net/netfilter/nf_conntrack_expect.h> 17#include <net/netfilter/nf_conntrack_expect.h>
18#include <net/netfilter/nf_nat_helper.h>
19#include <net/netfilter/nf_nat_rule.h>
20#include <linux/netfilter/nf_conntrack_amanda.h> 20#include <linux/netfilter/nf_conntrack_amanda.h>
21 21
22MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); 22MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index abb52adf5acd..44b082fd48ab 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -691,6 +691,10 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
691 .expectfn = nf_nat_follow_master, 691 .expectfn = nf_nat_follow_master,
692}; 692};
693 693
694static struct nfq_ct_nat_hook nfq_ct_nat = {
695 .seq_adjust = nf_nat_tcp_seq_adjust,
696};
697
694static int __init nf_nat_init(void) 698static int __init nf_nat_init(void)
695{ 699{
696 size_t i; 700 size_t i;
@@ -731,6 +735,7 @@ static int __init nf_nat_init(void)
731 nfnetlink_parse_nat_setup); 735 nfnetlink_parse_nat_setup);
732 BUG_ON(nf_ct_nat_offset != NULL); 736 BUG_ON(nf_ct_nat_offset != NULL);
733 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); 737 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
738 RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
734 return 0; 739 return 0;
735 740
736 cleanup_extend: 741 cleanup_extend:
@@ -747,6 +752,7 @@ static void __exit nf_nat_cleanup(void)
747 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); 752 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
748 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); 753 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
749 RCU_INIT_POINTER(nf_ct_nat_offset, NULL); 754 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
755 RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
750 synchronize_net(); 756 synchronize_net();
751} 757}
752 758
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index cad29c121318..c6784a18c1c4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -95,7 +95,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
95 unsigned char **data, 95 unsigned char **data,
96 TransportAddress *taddr, int count) 96 TransportAddress *taddr, int count)
97{ 97{
98 const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 98 const struct nf_ct_h323_master *info = nfct_help_data(ct);
99 int dir = CTINFO2DIR(ctinfo); 99 int dir = CTINFO2DIR(ctinfo);
100 int i; 100 int i;
101 __be16 port; 101 __be16 port;
@@ -178,7 +178,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
178 struct nf_conntrack_expect *rtp_exp, 178 struct nf_conntrack_expect *rtp_exp,
179 struct nf_conntrack_expect *rtcp_exp) 179 struct nf_conntrack_expect *rtcp_exp)
180{ 180{
181 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 181 struct nf_ct_h323_master *info = nfct_help_data(ct);
182 int dir = CTINFO2DIR(ctinfo); 182 int dir = CTINFO2DIR(ctinfo);
183 int i; 183 int i;
184 u_int16_t nated_port; 184 u_int16_t nated_port;
@@ -330,7 +330,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
330 TransportAddress *taddr, __be16 port, 330 TransportAddress *taddr, __be16 port,
331 struct nf_conntrack_expect *exp) 331 struct nf_conntrack_expect *exp)
332{ 332{
333 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 333 struct nf_ct_h323_master *info = nfct_help_data(ct);
334 int dir = CTINFO2DIR(ctinfo); 334 int dir = CTINFO2DIR(ctinfo);
335 u_int16_t nated_port = ntohs(port); 335 u_int16_t nated_port = ntohs(port);
336 336
@@ -419,7 +419,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
419 unsigned char **data, TransportAddress *taddr, int idx, 419 unsigned char **data, TransportAddress *taddr, int idx,
420 __be16 port, struct nf_conntrack_expect *exp) 420 __be16 port, struct nf_conntrack_expect *exp)
421{ 421{
422 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 422 struct nf_ct_h323_master *info = nfct_help_data(ct);
423 int dir = CTINFO2DIR(ctinfo); 423 int dir = CTINFO2DIR(ctinfo);
424 u_int16_t nated_port = ntohs(port); 424 u_int16_t nated_port = ntohs(port);
425 union nf_inet_addr addr; 425 union nf_inet_addr addr;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index af65958f6308..2e59ad0b90ca 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,6 +153,19 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
157 u32 ctinfo, int off)
158{
159 const struct tcphdr *th;
160
161 if (nf_ct_protonum(ct) != IPPROTO_TCP)
162 return;
163
164 th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
165 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
166}
167EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
168
156static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, 169static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen) 170 int datalen, __sum16 *check, int oldlen)
158{ 171{
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index c273d58980ae..388140881ebe 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -49,7 +49,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
49 const struct nf_nat_pptp *nat_pptp_info; 49 const struct nf_nat_pptp *nat_pptp_info;
50 struct nf_nat_ipv4_range range; 50 struct nf_nat_ipv4_range range;
51 51
52 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; 52 ct_pptp_info = nfct_help_data(master);
53 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; 53 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
54 54
55 /* And here goes the grand finale of corrosion... */ 55 /* And here goes the grand finale of corrosion... */
@@ -123,7 +123,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
123 __be16 new_callid; 123 __be16 new_callid;
124 unsigned int cid_off; 124 unsigned int cid_off;
125 125
126 ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; 126 ct_pptp_info = nfct_help_data(ct);
127 nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; 127 nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
128 128
129 new_callid = ct_pptp_info->pns_call_id; 129 new_callid = ct_pptp_info->pns_call_id;
@@ -192,7 +192,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
192 struct nf_ct_pptp_master *ct_pptp_info; 192 struct nf_ct_pptp_master *ct_pptp_info;
193 struct nf_nat_pptp *nat_pptp_info; 193 struct nf_nat_pptp *nat_pptp_info;
194 194
195 ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; 195 ct_pptp_info = nfct_help_data(ct);
196 nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; 196 nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
197 197
198 /* save original PAC call ID in nat_info */ 198 /* save original PAC call ID in nat_info */
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 746edec8b86e..bac712293fd6 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -405,7 +405,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
405 405
406 ptr = *octets; 406 ptr = *octets;
407 while (ctx->pointer < eoc) { 407 while (ctx->pointer < eoc) {
408 if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { 408 if (!asn1_octet_decode(ctx, ptr++)) {
409 kfree(*octets); 409 kfree(*octets);
410 *octets = NULL; 410 *octets = NULL;
411 return 0; 411 return 0;
@@ -759,7 +759,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
759 } 759 }
760 break; 760 break;
761 case SNMP_OBJECTID: 761 case SNMP_OBJECTID:
762 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { 762 if (!asn1_oid_decode(ctx, end, &lp, &len)) {
763 kfree(id); 763 kfree(id);
764 return 0; 764 return 0;
765 } 765 }
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index a2901bf829c0..9dbb8d284f99 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -8,10 +8,10 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/udp.h> 9#include <linux/udp.h>
10 10
11#include <net/netfilter/nf_nat_helper.h>
12#include <net/netfilter/nf_nat_rule.h>
13#include <net/netfilter/nf_conntrack_helper.h> 11#include <net/netfilter/nf_conntrack_helper.h>
14#include <net/netfilter/nf_conntrack_expect.h> 12#include <net/netfilter/nf_conntrack_expect.h>
13#include <net/netfilter/nf_nat_helper.h>
14#include <net/netfilter/nf_nat_rule.h>
15#include <linux/netfilter/nf_conntrack_tftp.h> 15#include <linux/netfilter/nf_conntrack_tftp.h>
16 16
17MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); 17MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2c00e8bf684d..340fcf29a966 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info)
371 break; 371 break;
372 case ICMP_DEST_UNREACH: 372 case ICMP_DEST_UNREACH:
373 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 373 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
374 ipv4_sk_update_pmtu(skb, sk, info);
374 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { 375 if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
375 err = EMSGSIZE; 376 err = EMSGSIZE;
376 harderr = 1; 377 harderr = 1;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 9ae5c01cd0b2..8918eff1426d 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
36 36
37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
38{ 38{
39 int hash = protocol & (MAX_INET_PROTOS - 1); 39 return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
40
41 return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
42 NULL, prot) ? 0 : -1; 40 NULL, prot) ? 0 : -1;
43} 41}
44EXPORT_SYMBOL(inet_add_protocol); 42EXPORT_SYMBOL(inet_add_protocol);
@@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol);
49 47
50int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 48int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
51{ 49{
52 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 50 int ret;
53 51
54 ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], 52 ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol],
55 prot, NULL) == prot) ? 0 : -1; 53 prot, NULL) == prot) ? 0 : -1;
56 54
57 synchronize_net(); 55 synchronize_net();
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4032b818f3e4..659ddfb10947 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
216 int err = 0; 216 int err = 0;
217 int harderr = 0; 217 int harderr = 0;
218 218
219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
220 ipv4_sk_update_pmtu(skb, sk, info);
221
219 /* Report error on raw socket, if: 222 /* Report error on raw socket, if:
220 1. User requested ip_recverr. 223 1. User requested ip_recverr.
221 2. Socket is connected (otherwise the error indication 224 2. Socket is connected (otherwise the error indication
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98b30d08efe9..83d56a016625 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
162 struct inet_peer *peer; 162 struct inet_peer *peer;
163 u32 *p = NULL; 163 u32 *p = NULL;
164 164
165 if (!rt->peer) 165 peer = rt_get_peer_create(rt, rt->rt_dst);
166 rt_bind_peer(rt, rt->rt_dst, 1);
167
168 peer = rt->peer;
169 if (peer) { 166 if (peer) {
170 u32 *old_p = __DST_METRICS_PTR(old); 167 u32 *old_p = __DST_METRICS_PTR(old);
171 unsigned long prev, new; 168 unsigned long prev, new;
@@ -443,7 +440,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
443 r->rt_key_tos, 440 r->rt_key_tos,
444 -1, 441 -1,
445 HHUptod, 442 HHUptod,
446 r->rt_spec_dst, &len); 443 0, &len);
447 444
448 seq_printf(seq, "%*s\n", 127 - len, ""); 445 seq_printf(seq, "%*s\n", 127 - len, "");
449 } 446 }
@@ -680,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth)
680static inline int rt_valuable(struct rtable *rth) 677static inline int rt_valuable(struct rtable *rth)
681{ 678{
682 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 679 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
683 (rth->peer && rth->peer->pmtu_expires); 680 (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires);
684} 681}
685 682
686static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 683static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -873,34 +870,22 @@ static void rt_check_expire(void)
873 while ((rth = rcu_dereference_protected(*rthp, 870 while ((rth = rcu_dereference_protected(*rthp,
874 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { 871 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
875 prefetch(rth->dst.rt_next); 872 prefetch(rth->dst.rt_next);
876 if (rt_is_expired(rth)) { 873 if (rt_is_expired(rth) ||
874 rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
877 *rthp = rth->dst.rt_next; 875 *rthp = rth->dst.rt_next;
878 rt_free(rth); 876 rt_free(rth);
879 continue; 877 continue;
880 } 878 }
881 if (rth->dst.expires) {
882 /* Entry is expired even if it is in use */
883 if (time_before_eq(jiffies, rth->dst.expires)) {
884nofree:
885 tmo >>= 1;
886 rthp = &rth->dst.rt_next;
887 /*
888 * We only count entries on
889 * a chain with equal hash inputs once
890 * so that entries for different QOS
891 * levels, and other non-hash input
892 * attributes don't unfairly skew
893 * the length computation
894 */
895 length += has_noalias(rt_hash_table[i].chain, rth);
896 continue;
897 }
898 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
899 goto nofree;
900 879
901 /* Cleanup aged off entries. */ 880 /* We only count entries on a chain with equal
902 *rthp = rth->dst.rt_next; 881 * hash inputs once so that entries for
903 rt_free(rth); 882 * different QOS levels, and other non-hash
883 * input attributes don't unfairly skew the
884 * length computation
885 */
886 tmo >>= 1;
887 rthp = &rth->dst.rt_next;
888 length += has_noalias(rt_hash_table[i].chain, rth);
904 } 889 }
905 spin_unlock_bh(rt_hash_lock_addr(i)); 890 spin_unlock_bh(rt_hash_lock_addr(i));
906 sum += length; 891 sum += length;
@@ -938,7 +923,7 @@ static void rt_cache_invalidate(struct net *net)
938 923
939 get_random_bytes(&shuffle, sizeof(shuffle)); 924 get_random_bytes(&shuffle, sizeof(shuffle));
940 atomic_add(shuffle + 1U, &net->ipv4.rt_genid); 925 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
941 inetpeer_invalidate_tree(AF_INET); 926 inetpeer_invalidate_family(AF_INET);
942} 927}
943 928
944/* 929/*
@@ -1159,7 +1144,7 @@ restart:
1159 candp = NULL; 1144 candp = NULL;
1160 now = jiffies; 1145 now = jiffies;
1161 1146
1162 if (!rt_caching(dev_net(rt->dst.dev))) { 1147 if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) {
1163 /* 1148 /*
1164 * If we're not caching, just tell the caller we 1149 * If we're not caching, just tell the caller we
1165 * were successful and don't touch the route. The 1150 * were successful and don't touch the route. The
@@ -1328,14 +1313,20 @@ static u32 rt_peer_genid(void)
1328 1313
1329void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) 1314void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
1330{ 1315{
1316 struct inet_peer_base *base;
1331 struct inet_peer *peer; 1317 struct inet_peer *peer;
1332 1318
1333 peer = inet_getpeer_v4(daddr, create); 1319 base = inetpeer_base_ptr(rt->_peer);
1320 if (!base)
1321 return;
1334 1322
1335 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1323 peer = inet_getpeer_v4(base, daddr, create);
1336 inet_putpeer(peer); 1324 if (peer) {
1337 else 1325 if (!rt_set_peer(rt, peer))
1338 rt->rt_peer_genid = rt_peer_genid(); 1326 inet_putpeer(peer);
1327 else
1328 rt->rt_peer_genid = rt_peer_genid();
1329 }
1339} 1330}
1340 1331
1341/* 1332/*
@@ -1363,14 +1354,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1363 struct rtable *rt = (struct rtable *) dst; 1354 struct rtable *rt = (struct rtable *) dst;
1364 1355
1365 if (rt && !(rt->dst.flags & DST_NOPEER)) { 1356 if (rt && !(rt->dst.flags & DST_NOPEER)) {
1366 if (rt->peer == NULL) 1357 struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
1367 rt_bind_peer(rt, rt->rt_dst, 1);
1368 1358
1369 /* If peer is attached to destination, it is never detached, 1359 /* If peer is attached to destination, it is never detached,
1370 so that we need not to grab a lock to dereference it. 1360 so that we need not to grab a lock to dereference it.
1371 */ 1361 */
1372 if (rt->peer) { 1362 if (peer) {
1373 iph->id = htons(inet_getid(rt->peer, more)); 1363 iph->id = htons(inet_getid(peer, more));
1374 return; 1364 return;
1375 } 1365 }
1376 } else if (!rt) 1366 } else if (!rt)
@@ -1480,10 +1470,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1480 rt->rt_gateway != old_gw) 1470 rt->rt_gateway != old_gw)
1481 continue; 1471 continue;
1482 1472
1483 if (!rt->peer) 1473 peer = rt_get_peer_create(rt, rt->rt_dst);
1484 rt_bind_peer(rt, rt->rt_dst, 1);
1485
1486 peer = rt->peer;
1487 if (peer) { 1474 if (peer) {
1488 if (peer->redirect_learned.a4 != new_gw) { 1475 if (peer->redirect_learned.a4 != new_gw) {
1489 peer->redirect_learned.a4 = new_gw; 1476 peer->redirect_learned.a4 = new_gw;
@@ -1539,8 +1526,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1539 rt_genid(dev_net(dst->dev))); 1526 rt_genid(dev_net(dst->dev)));
1540 rt_del(hash, rt); 1527 rt_del(hash, rt);
1541 ret = NULL; 1528 ret = NULL;
1542 } else if (rt->peer && peer_pmtu_expired(rt->peer)) { 1529 } else if (rt_has_peer(rt)) {
1543 dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); 1530 struct inet_peer *peer = rt_peer_ptr(rt);
1531 if (peer_pmtu_expired(peer))
1532 dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
1544 } 1533 }
1545 } 1534 }
1546 return ret; 1535 return ret;
@@ -1578,9 +1567,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1578 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1567 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1579 rcu_read_unlock(); 1568 rcu_read_unlock();
1580 1569
1581 if (!rt->peer) 1570 peer = rt_get_peer_create(rt, rt->rt_dst);
1582 rt_bind_peer(rt, rt->rt_dst, 1);
1583 peer = rt->peer;
1584 if (!peer) { 1571 if (!peer) {
1585 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1572 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1586 return; 1573 return;
@@ -1622,12 +1609,28 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1622 1609
1623static int ip_error(struct sk_buff *skb) 1610static int ip_error(struct sk_buff *skb)
1624{ 1611{
1612 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
1625 struct rtable *rt = skb_rtable(skb); 1613 struct rtable *rt = skb_rtable(skb);
1626 struct inet_peer *peer; 1614 struct inet_peer *peer;
1627 unsigned long now; 1615 unsigned long now;
1616 struct net *net;
1628 bool send; 1617 bool send;
1629 int code; 1618 int code;
1630 1619
1620 net = dev_net(rt->dst.dev);
1621 if (!IN_DEV_FORWARD(in_dev)) {
1622 switch (rt->dst.error) {
1623 case EHOSTUNREACH:
1624 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
1625 break;
1626
1627 case ENETUNREACH:
1628 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
1629 break;
1630 }
1631 goto out;
1632 }
1633
1631 switch (rt->dst.error) { 1634 switch (rt->dst.error) {
1632 case EINVAL: 1635 case EINVAL:
1633 default: 1636 default:
@@ -1637,17 +1640,14 @@ static int ip_error(struct sk_buff *skb)
1637 break; 1640 break;
1638 case ENETUNREACH: 1641 case ENETUNREACH:
1639 code = ICMP_NET_UNREACH; 1642 code = ICMP_NET_UNREACH;
1640 IP_INC_STATS_BH(dev_net(rt->dst.dev), 1643 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
1641 IPSTATS_MIB_INNOROUTES);
1642 break; 1644 break;
1643 case EACCES: 1645 case EACCES:
1644 code = ICMP_PKT_FILTERED; 1646 code = ICMP_PKT_FILTERED;
1645 break; 1647 break;
1646 } 1648 }
1647 1649
1648 if (!rt->peer) 1650 peer = rt_get_peer_create(rt, rt->rt_dst);
1649 rt_bind_peer(rt, rt->rt_dst, 1);
1650 peer = rt->peer;
1651 1651
1652 send = true; 1652 send = true;
1653 if (peer) { 1653 if (peer) {
@@ -1668,67 +1668,6 @@ out: kfree_skb(skb);
1668 return 0; 1668 return 0;
1669} 1669}
1670 1670
1671/*
1672 * The last two values are not from the RFC but
1673 * are needed for AMPRnet AX.25 paths.
1674 */
1675
1676static const unsigned short mtu_plateau[] =
1677{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1678
1679static inline unsigned short guess_mtu(unsigned short old_mtu)
1680{
1681 int i;
1682
1683 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
1684 if (old_mtu > mtu_plateau[i])
1685 return mtu_plateau[i];
1686 return 68;
1687}
1688
1689unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1690 unsigned short new_mtu,
1691 struct net_device *dev)
1692{
1693 unsigned short old_mtu = ntohs(iph->tot_len);
1694 unsigned short est_mtu = 0;
1695 struct inet_peer *peer;
1696
1697 peer = inet_getpeer_v4(iph->daddr, 1);
1698 if (peer) {
1699 unsigned short mtu = new_mtu;
1700
1701 if (new_mtu < 68 || new_mtu >= old_mtu) {
1702 /* BSD 4.2 derived systems incorrectly adjust
1703 * tot_len by the IP header length, and report
1704 * a zero MTU in the ICMP message.
1705 */
1706 if (mtu == 0 &&
1707 old_mtu >= 68 + (iph->ihl << 2))
1708 old_mtu -= iph->ihl << 2;
1709 mtu = guess_mtu(old_mtu);
1710 }
1711
1712 if (mtu < ip_rt_min_pmtu)
1713 mtu = ip_rt_min_pmtu;
1714 if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
1715 unsigned long pmtu_expires;
1716
1717 pmtu_expires = jiffies + ip_rt_mtu_expires;
1718 if (!pmtu_expires)
1719 pmtu_expires = 1UL;
1720
1721 est_mtu = mtu;
1722 peer->pmtu_learned = mtu;
1723 peer->pmtu_expires = pmtu_expires;
1724 atomic_inc(&__rt_peer_genid);
1725 }
1726
1727 inet_putpeer(peer);
1728 }
1729 return est_mtu ? : new_mtu;
1730}
1731
1732static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) 1671static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
1733{ 1672{
1734 unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); 1673 unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
@@ -1753,9 +1692,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1753 1692
1754 dst_confirm(dst); 1693 dst_confirm(dst);
1755 1694
1756 if (!rt->peer) 1695 peer = rt_get_peer_create(rt, rt->rt_dst);
1757 rt_bind_peer(rt, rt->rt_dst, 1);
1758 peer = rt->peer;
1759 if (peer) { 1696 if (peer) {
1760 unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); 1697 unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
1761 1698
@@ -1777,16 +1714,40 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1777 } 1714 }
1778} 1715}
1779 1716
1717void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1718 int oif, u32 mark, u8 protocol, int flow_flags)
1719{
1720 const struct iphdr *iph = (const struct iphdr *)skb->data;
1721 struct flowi4 fl4;
1722 struct rtable *rt;
1723
1724 flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
1725 protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS,
1726 iph->daddr, iph->saddr, 0, 0);
1727 rt = __ip_route_output_key(net, &fl4);
1728 if (!IS_ERR(rt)) {
1729 ip_rt_update_pmtu(&rt->dst, mtu);
1730 ip_rt_put(rt);
1731 }
1732}
1733EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1734
1735void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1736{
1737 const struct inet_sock *inet = inet_sk(sk);
1738
1739 return ipv4_update_pmtu(skb, sock_net(sk), mtu,
1740 sk->sk_bound_dev_if, sk->sk_mark,
1741 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
1742 inet_sk_flowi_flags(sk));
1743}
1744EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1780 1745
1781static void ipv4_validate_peer(struct rtable *rt) 1746static void ipv4_validate_peer(struct rtable *rt)
1782{ 1747{
1783 if (rt->rt_peer_genid != rt_peer_genid()) { 1748 if (rt->rt_peer_genid != rt_peer_genid()) {
1784 struct inet_peer *peer; 1749 struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst);
1785 1750
1786 if (!rt->peer)
1787 rt_bind_peer(rt, rt->rt_dst, 0);
1788
1789 peer = rt->peer;
1790 if (peer) { 1751 if (peer) {
1791 check_peer_pmtu(&rt->dst, peer); 1752 check_peer_pmtu(&rt->dst, peer);
1792 1753
@@ -1812,14 +1773,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1812static void ipv4_dst_destroy(struct dst_entry *dst) 1773static void ipv4_dst_destroy(struct dst_entry *dst)
1813{ 1774{
1814 struct rtable *rt = (struct rtable *) dst; 1775 struct rtable *rt = (struct rtable *) dst;
1815 struct inet_peer *peer = rt->peer;
1816 1776
1817 if (rt->fi) { 1777 if (rt->fi) {
1818 fib_info_put(rt->fi); 1778 fib_info_put(rt->fi);
1819 rt->fi = NULL; 1779 rt->fi = NULL;
1820 } 1780 }
1821 if (peer) { 1781 if (rt_has_peer(rt)) {
1822 rt->peer = NULL; 1782 struct inet_peer *peer = rt_peer_ptr(rt);
1823 inet_putpeer(peer); 1783 inet_putpeer(peer);
1824 } 1784 }
1825} 1785}
@@ -1832,8 +1792,11 @@ static void ipv4_link_failure(struct sk_buff *skb)
1832 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1792 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1833 1793
1834 rt = skb_rtable(skb); 1794 rt = skb_rtable(skb);
1835 if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) 1795 if (rt && rt_has_peer(rt)) {
1836 dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); 1796 struct inet_peer *peer = rt_peer_ptr(rt);
1797 if (peer_pmtu_cleaned(peer))
1798 dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig);
1799 }
1837} 1800}
1838 1801
1839static int ip_rt_bug(struct sk_buff *skb) 1802static int ip_rt_bug(struct sk_buff *skb)
@@ -1935,6 +1898,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1935static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, 1898static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1936 struct fib_info *fi) 1899 struct fib_info *fi)
1937{ 1900{
1901 struct inet_peer_base *base;
1938 struct inet_peer *peer; 1902 struct inet_peer *peer;
1939 int create = 0; 1903 int create = 0;
1940 1904
@@ -1944,8 +1908,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1944 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) 1908 if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
1945 create = 1; 1909 create = 1;
1946 1910
1947 rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); 1911 base = inetpeer_base_ptr(rt->_peer);
1912 BUG_ON(!base);
1913
1914 peer = inet_getpeer_v4(base, rt->rt_dst, create);
1948 if (peer) { 1915 if (peer) {
1916 __rt_set_peer(rt, peer);
1949 rt->rt_peer_genid = rt_peer_genid(); 1917 rt->rt_peer_genid = rt_peer_genid();
1950 if (inet_metrics_new(peer)) 1918 if (inet_metrics_new(peer))
1951 memcpy(peer->metrics, fi->fib_metrics, 1919 memcpy(peer->metrics, fi->fib_metrics,
@@ -1986,8 +1954,6 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
1986 1954
1987 if (dst_mtu(dst) > IP_MAX_MTU) 1955 if (dst_mtu(dst) > IP_MAX_MTU)
1988 dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); 1956 dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
1989 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1990 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1991 1957
1992#ifdef CONFIG_IP_ROUTE_CLASSID 1958#ifdef CONFIG_IP_ROUTE_CLASSID
1993#ifdef CONFIG_IP_MULTIPLE_TABLES 1959#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -2012,7 +1978,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2012{ 1978{
2013 unsigned int hash; 1979 unsigned int hash;
2014 struct rtable *rth; 1980 struct rtable *rth;
2015 __be32 spec_dst;
2016 struct in_device *in_dev = __in_dev_get_rcu(dev); 1981 struct in_device *in_dev = __in_dev_get_rcu(dev);
2017 u32 itag = 0; 1982 u32 itag = 0;
2018 int err; 1983 int err;
@@ -2023,16 +1988,18 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2023 return -EINVAL; 1988 return -EINVAL;
2024 1989
2025 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1990 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
2026 ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP)) 1991 skb->protocol != htons(ETH_P_IP))
2027 goto e_inval; 1992 goto e_inval;
2028 1993
1994 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1995 if (ipv4_is_loopback(saddr))
1996 goto e_inval;
1997
2029 if (ipv4_is_zeronet(saddr)) { 1998 if (ipv4_is_zeronet(saddr)) {
2030 if (!ipv4_is_local_multicast(daddr)) 1999 if (!ipv4_is_local_multicast(daddr))
2031 goto e_inval; 2000 goto e_inval;
2032 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2033 } else { 2001 } else {
2034 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, 2002 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag);
2035 &itag);
2036 if (err < 0) 2003 if (err < 0)
2037 goto e_err; 2004 goto e_err;
2038 } 2005 }
@@ -2059,9 +2026,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2059 rth->rt_oif = 0; 2026 rth->rt_oif = 0;
2060 rth->rt_mark = skb->mark; 2027 rth->rt_mark = skb->mark;
2061 rth->rt_gateway = daddr; 2028 rth->rt_gateway = daddr;
2062 rth->rt_spec_dst= spec_dst;
2063 rth->rt_peer_genid = 0; 2029 rth->rt_peer_genid = 0;
2064 rth->peer = NULL; 2030 rt_init_peer(rth, dev_net(dev)->ipv4.peers);
2065 rth->fi = NULL; 2031 rth->fi = NULL;
2066 if (our) { 2032 if (our) {
2067 rth->dst.input= ip_local_deliver; 2033 rth->dst.input= ip_local_deliver;
@@ -2123,7 +2089,6 @@ static int __mkroute_input(struct sk_buff *skb,
2123 int err; 2089 int err;
2124 struct in_device *out_dev; 2090 struct in_device *out_dev;
2125 unsigned int flags = 0; 2091 unsigned int flags = 0;
2126 __be32 spec_dst;
2127 u32 itag; 2092 u32 itag;
2128 2093
2129 /* get a working reference to the output device */ 2094 /* get a working reference to the output device */
@@ -2135,7 +2100,7 @@ static int __mkroute_input(struct sk_buff *skb,
2135 2100
2136 2101
2137 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), 2102 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
2138 in_dev->dev, &spec_dst, &itag); 2103 in_dev->dev, &itag);
2139 if (err < 0) { 2104 if (err < 0) {
2140 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 2105 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
2141 saddr); 2106 saddr);
@@ -2187,9 +2152,8 @@ static int __mkroute_input(struct sk_buff *skb,
2187 rth->rt_oif = 0; 2152 rth->rt_oif = 0;
2188 rth->rt_mark = skb->mark; 2153 rth->rt_mark = skb->mark;
2189 rth->rt_gateway = daddr; 2154 rth->rt_gateway = daddr;
2190 rth->rt_spec_dst= spec_dst;
2191 rth->rt_peer_genid = 0; 2155 rth->rt_peer_genid = 0;
2192 rth->peer = NULL; 2156 rt_init_peer(rth, &res->table->tb_peers);
2193 rth->fi = NULL; 2157 rth->fi = NULL;
2194 2158
2195 rth->dst.input = ip_forward; 2159 rth->dst.input = ip_forward;
@@ -2253,7 +2217,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2253 u32 itag = 0; 2217 u32 itag = 0;
2254 struct rtable *rth; 2218 struct rtable *rth;
2255 unsigned int hash; 2219 unsigned int hash;
2256 __be32 spec_dst;
2257 int err = -EINVAL; 2220 int err = -EINVAL;
2258 struct net *net = dev_net(dev); 2221 struct net *net = dev_net(dev);
2259 2222
@@ -2266,8 +2229,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2266 by fib_lookup. 2229 by fib_lookup.
2267 */ 2230 */
2268 2231
2269 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 2232 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
2270 ipv4_is_loopback(saddr))
2271 goto martian_source; 2233 goto martian_source;
2272 2234
2273 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) 2235 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
@@ -2279,9 +2241,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2279 if (ipv4_is_zeronet(saddr)) 2241 if (ipv4_is_zeronet(saddr))
2280 goto martian_source; 2242 goto martian_source;
2281 2243
2282 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) 2244 if (ipv4_is_zeronet(daddr))
2283 goto martian_destination; 2245 goto martian_destination;
2284 2246
2247 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
2248 if (ipv4_is_loopback(daddr))
2249 goto martian_destination;
2250
2251 if (ipv4_is_loopback(saddr))
2252 goto martian_source;
2253 }
2254
2285 /* 2255 /*
2286 * Now we are ready to route packet. 2256 * Now we are ready to route packet.
2287 */ 2257 */
@@ -2293,11 +2263,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2293 fl4.daddr = daddr; 2263 fl4.daddr = daddr;
2294 fl4.saddr = saddr; 2264 fl4.saddr = saddr;
2295 err = fib_lookup(net, &fl4, &res); 2265 err = fib_lookup(net, &fl4, &res);
2296 if (err != 0) { 2266 if (err != 0)
2297 if (!IN_DEV_FORWARD(in_dev))
2298 goto e_hostunreach;
2299 goto no_route; 2267 goto no_route;
2300 }
2301 2268
2302 RT_CACHE_STAT_INC(in_slow_tot); 2269 RT_CACHE_STAT_INC(in_slow_tot);
2303 2270
@@ -2307,17 +2274,16 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2307 if (res.type == RTN_LOCAL) { 2274 if (res.type == RTN_LOCAL) {
2308 err = fib_validate_source(skb, saddr, daddr, tos, 2275 err = fib_validate_source(skb, saddr, daddr, tos,
2309 net->loopback_dev->ifindex, 2276 net->loopback_dev->ifindex,
2310 dev, &spec_dst, &itag); 2277 dev, &itag);
2311 if (err < 0) 2278 if (err < 0)
2312 goto martian_source_keep_err; 2279 goto martian_source_keep_err;
2313 if (err) 2280 if (err)
2314 flags |= RTCF_DIRECTSRC; 2281 flags |= RTCF_DIRECTSRC;
2315 spec_dst = daddr;
2316 goto local_input; 2282 goto local_input;
2317 } 2283 }
2318 2284
2319 if (!IN_DEV_FORWARD(in_dev)) 2285 if (!IN_DEV_FORWARD(in_dev))
2320 goto e_hostunreach; 2286 goto no_route;
2321 if (res.type != RTN_UNICAST) 2287 if (res.type != RTN_UNICAST)
2322 goto martian_destination; 2288 goto martian_destination;
2323 2289
@@ -2328,11 +2294,8 @@ brd_input:
2328 if (skb->protocol != htons(ETH_P_IP)) 2294 if (skb->protocol != htons(ETH_P_IP))
2329 goto e_inval; 2295 goto e_inval;
2330 2296
2331 if (ipv4_is_zeronet(saddr)) 2297 if (!ipv4_is_zeronet(saddr)) {
2332 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 2298 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag);
2333 else {
2334 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2335 &itag);
2336 if (err < 0) 2299 if (err < 0)
2337 goto martian_source_keep_err; 2300 goto martian_source_keep_err;
2338 if (err) 2301 if (err)
@@ -2370,9 +2333,8 @@ local_input:
2370 rth->rt_oif = 0; 2333 rth->rt_oif = 0;
2371 rth->rt_mark = skb->mark; 2334 rth->rt_mark = skb->mark;
2372 rth->rt_gateway = daddr; 2335 rth->rt_gateway = daddr;
2373 rth->rt_spec_dst= spec_dst;
2374 rth->rt_peer_genid = 0; 2336 rth->rt_peer_genid = 0;
2375 rth->peer = NULL; 2337 rt_init_peer(rth, net->ipv4.peers);
2376 rth->fi = NULL; 2338 rth->fi = NULL;
2377 if (res.type == RTN_UNREACHABLE) { 2339 if (res.type == RTN_UNREACHABLE) {
2378 rth->dst.input= ip_error; 2340 rth->dst.input= ip_error;
@@ -2388,7 +2350,6 @@ local_input:
2388 2350
2389no_route: 2351no_route:
2390 RT_CACHE_STAT_INC(in_no_route); 2352 RT_CACHE_STAT_INC(in_no_route);
2391 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
2392 res.type = RTN_UNREACHABLE; 2353 res.type = RTN_UNREACHABLE;
2393 if (err == -ESRCH) 2354 if (err == -ESRCH)
2394 err = -ENETUNREACH; 2355 err = -ENETUNREACH;
@@ -2405,10 +2366,6 @@ martian_destination:
2405 &daddr, &saddr, dev->name); 2366 &daddr, &saddr, dev->name);
2406#endif 2367#endif
2407 2368
2408e_hostunreach:
2409 err = -EHOSTUNREACH;
2410 goto out;
2411
2412e_inval: 2369e_inval:
2413 err = -EINVAL; 2370 err = -EINVAL;
2414 goto out; 2371 goto out;
@@ -2520,9 +2477,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2520 u16 type = res->type; 2477 u16 type = res->type;
2521 struct rtable *rth; 2478 struct rtable *rth;
2522 2479
2523 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) 2480 in_dev = __in_dev_get_rcu(dev_out);
2481 if (!in_dev)
2524 return ERR_PTR(-EINVAL); 2482 return ERR_PTR(-EINVAL);
2525 2483
2484 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
2485 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
2486 return ERR_PTR(-EINVAL);
2487
2526 if (ipv4_is_lbcast(fl4->daddr)) 2488 if (ipv4_is_lbcast(fl4->daddr))
2527 type = RTN_BROADCAST; 2489 type = RTN_BROADCAST;
2528 else if (ipv4_is_multicast(fl4->daddr)) 2490 else if (ipv4_is_multicast(fl4->daddr))
@@ -2533,10 +2495,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2533 if (dev_out->flags & IFF_LOOPBACK) 2495 if (dev_out->flags & IFF_LOOPBACK)
2534 flags |= RTCF_LOCAL; 2496 flags |= RTCF_LOCAL;
2535 2497
2536 in_dev = __in_dev_get_rcu(dev_out);
2537 if (!in_dev)
2538 return ERR_PTR(-EINVAL);
2539
2540 if (type == RTN_BROADCAST) { 2498 if (type == RTN_BROADCAST) {
2541 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2499 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2542 fi = NULL; 2500 fi = NULL;
@@ -2574,19 +2532,17 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2574 rth->rt_oif = orig_oif; 2532 rth->rt_oif = orig_oif;
2575 rth->rt_mark = fl4->flowi4_mark; 2533 rth->rt_mark = fl4->flowi4_mark;
2576 rth->rt_gateway = fl4->daddr; 2534 rth->rt_gateway = fl4->daddr;
2577 rth->rt_spec_dst= fl4->saddr;
2578 rth->rt_peer_genid = 0; 2535 rth->rt_peer_genid = 0;
2579 rth->peer = NULL; 2536 rt_init_peer(rth, (res->table ?
2537 &res->table->tb_peers :
2538 dev_net(dev_out)->ipv4.peers));
2580 rth->fi = NULL; 2539 rth->fi = NULL;
2581 2540
2582 RT_CACHE_STAT_INC(out_slow_tot); 2541 RT_CACHE_STAT_INC(out_slow_tot);
2583 2542
2584 if (flags & RTCF_LOCAL) { 2543 if (flags & RTCF_LOCAL)
2585 rth->dst.input = ip_local_deliver; 2544 rth->dst.input = ip_local_deliver;
2586 rth->rt_spec_dst = fl4->daddr;
2587 }
2588 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2545 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2589 rth->rt_spec_dst = fl4->saddr;
2590 if (flags & RTCF_LOCAL && 2546 if (flags & RTCF_LOCAL &&
2591 !(dev_out->flags & IFF_LOOPBACK)) { 2547 !(dev_out->flags & IFF_LOOPBACK)) {
2592 rth->dst.output = ip_mc_output; 2548 rth->dst.output = ip_mc_output;
@@ -2605,6 +2561,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2605 2561
2606 rt_set_nexthop(rth, fl4, res, fi, type, 0); 2562 rt_set_nexthop(rth, fl4, res, fi, type, 0);
2607 2563
2564 if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
2565 rth->dst.flags |= DST_NOCACHE;
2566
2608 return rth; 2567 return rth;
2609} 2568}
2610 2569
@@ -2625,6 +2584,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2625 int orig_oif; 2584 int orig_oif;
2626 2585
2627 res.fi = NULL; 2586 res.fi = NULL;
2587 res.table = NULL;
2628#ifdef CONFIG_IP_MULTIPLE_TABLES 2588#ifdef CONFIG_IP_MULTIPLE_TABLES
2629 res.r = NULL; 2589 res.r = NULL;
2630#endif 2590#endif
@@ -2730,6 +2690,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2730 2690
2731 if (fib_lookup(net, fl4, &res)) { 2691 if (fib_lookup(net, fl4, &res)) {
2732 res.fi = NULL; 2692 res.fi = NULL;
2693 res.table = NULL;
2733 if (fl4->flowi4_oif) { 2694 if (fl4->flowi4_oif) {
2734 /* Apparently, routing tables are wrong. Assume, 2695 /* Apparently, routing tables are wrong. Assume,
2735 that the destination is on link. 2696 that the destination is on link.
@@ -2912,10 +2873,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2912 rt->rt_dst = ort->rt_dst; 2873 rt->rt_dst = ort->rt_dst;
2913 rt->rt_src = ort->rt_src; 2874 rt->rt_src = ort->rt_src;
2914 rt->rt_gateway = ort->rt_gateway; 2875 rt->rt_gateway = ort->rt_gateway;
2915 rt->rt_spec_dst = ort->rt_spec_dst; 2876 rt_transfer_peer(rt, ort);
2916 rt->peer = ort->peer;
2917 if (rt->peer)
2918 atomic_inc(&rt->peer->refcnt);
2919 rt->fi = ort->fi; 2877 rt->fi = ort->fi;
2920 if (rt->fi) 2878 if (rt->fi)
2921 atomic_inc(&rt->fi->fib_clntref); 2879 atomic_inc(&rt->fi->fib_clntref);
@@ -2953,7 +2911,6 @@ static int rt_fill_info(struct net *net,
2953 struct rtmsg *r; 2911 struct rtmsg *r;
2954 struct nlmsghdr *nlh; 2912 struct nlmsghdr *nlh;
2955 unsigned long expires = 0; 2913 unsigned long expires = 0;
2956 const struct inet_peer *peer = rt->peer;
2957 u32 id = 0, ts = 0, tsage = 0, error; 2914 u32 id = 0, ts = 0, tsage = 0, error;
2958 2915
2959 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); 2916 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2990,10 +2947,8 @@ static int rt_fill_info(struct net *net,
2990 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) 2947 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2991 goto nla_put_failure; 2948 goto nla_put_failure;
2992#endif 2949#endif
2993 if (rt_is_input_route(rt)) { 2950 if (!rt_is_input_route(rt) &&
2994 if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) 2951 rt->rt_src != rt->rt_key_src) {
2995 goto nla_put_failure;
2996 } else if (rt->rt_src != rt->rt_key_src) {
2997 if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) 2952 if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
2998 goto nla_put_failure; 2953 goto nla_put_failure;
2999 } 2954 }
@@ -3009,8 +2964,9 @@ static int rt_fill_info(struct net *net,
3009 goto nla_put_failure; 2964 goto nla_put_failure;
3010 2965
3011 error = rt->dst.error; 2966 error = rt->dst.error;
3012 if (peer) { 2967 if (rt_has_peer(rt)) {
3013 inet_peer_refcheck(rt->peer); 2968 const struct inet_peer *peer = rt_peer_ptr(rt);
2969 inet_peer_refcheck(peer);
3014 id = atomic_read(&peer->ip_id_count) & 0xffff; 2970 id = atomic_read(&peer->ip_id_count) & 0xffff;
3015 if (peer->tcp_ts_stamp) { 2971 if (peer->tcp_ts_stamp) {
3016 ts = peer->tcp_ts; 2972 ts = peer->tcp_ts;
@@ -3400,6 +3356,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3400 .init = rt_genid_init, 3356 .init = rt_genid_init,
3401}; 3357};
3402 3358
3359static int __net_init ipv4_inetpeer_init(struct net *net)
3360{
3361 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3362
3363 if (!bp)
3364 return -ENOMEM;
3365 inet_peer_base_init(bp);
3366 net->ipv4.peers = bp;
3367 return 0;
3368}
3369
3370static void __net_exit ipv4_inetpeer_exit(struct net *net)
3371{
3372 struct inet_peer_base *bp = net->ipv4.peers;
3373
3374 net->ipv4.peers = NULL;
3375 inetpeer_invalidate_tree(bp);
3376 kfree(bp);
3377}
3378
3379static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3380 .init = ipv4_inetpeer_init,
3381 .exit = ipv4_inetpeer_exit,
3382};
3403 3383
3404#ifdef CONFIG_IP_ROUTE_CLASSID 3384#ifdef CONFIG_IP_ROUTE_CLASSID
3405struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3385struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
@@ -3480,6 +3460,7 @@ int __init ip_rt_init(void)
3480 register_pernet_subsys(&sysctl_route_ops); 3460 register_pernet_subsys(&sysctl_route_ops);
3481#endif 3461#endif
3482 register_pernet_subsys(&rt_genid_ops); 3462 register_pernet_subsys(&rt_genid_ops);
3463 register_pernet_subsys(&ipv4_inetpeer_ops);
3483 return rc; 3464 return rc;
3484} 3465}
3485 3466
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ef32956ed655..12aa0c5867c4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -301,6 +301,13 @@ static struct ctl_table ipv4_table[] = {
301 .proc_handler = proc_dointvec 301 .proc_handler = proc_dointvec
302 }, 302 },
303 { 303 {
304 .procname = "ip_early_demux",
305 .data = &sysctl_ip_early_demux,
306 .maxlen = sizeof(int),
307 .mode = 0644,
308 .proc_handler = proc_dointvec
309 },
310 {
304 .procname = "ip_dynaddr", 311 .procname = "ip_dynaddr",
305 .data = &sysctl_ip_dynaddr, 312 .data = &sysctl_ip_dynaddr,
306 .maxlen = sizeof(int), 313 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b224eb8bce8b..8416f8a68e65 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5518,6 +5518,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5518 struct tcp_sock *tp = tcp_sk(sk); 5518 struct tcp_sock *tp = tcp_sk(sk);
5519 int res; 5519 int res;
5520 5520
5521 if (sk->sk_rx_dst) {
5522 struct dst_entry *dst = sk->sk_rx_dst;
5523 if (unlikely(dst->obsolete)) {
5524 if (dst->ops->check(dst, 0) == NULL) {
5525 dst_release(dst);
5526 sk->sk_rx_dst = NULL;
5527 }
5528 }
5529 }
5530 if (unlikely(sk->sk_rx_dst == NULL))
5531 sk->sk_rx_dst = dst_clone(skb_dst(skb));
5532
5521 /* 5533 /*
5522 * Header prediction. 5534 * Header prediction.
5523 * The code loosely follows the one in the famous 5535 * The code loosely follows the one in the famous
@@ -5729,8 +5741,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5729 5741
5730 tcp_set_state(sk, TCP_ESTABLISHED); 5742 tcp_set_state(sk, TCP_ESTABLISHED);
5731 5743
5732 if (skb != NULL) 5744 if (skb != NULL) {
5745 sk->sk_rx_dst = dst_clone(skb_dst(skb));
5733 security_inet_conn_established(sk, skb); 5746 security_inet_conn_established(sk, skb);
5747 }
5734 5748
5735 /* Make sure socket is routed, for correct metrics. */ 5749 /* Make sure socket is routed, for correct metrics. */
5736 icsk->icsk_af_ops->rebuild_header(sk); 5750 icsk->icsk_af_ops->rebuild_header(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c8d28c433b2b..64568fa21d05 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
698 698
699 net = dev_net(skb_dst(skb)->dev); 699 net = dev_net(skb_dst(skb)->dev);
700 arg.tos = ip_hdr(skb)->tos; 700 arg.tos = ip_hdr(skb)->tos;
701 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 701 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
702 &arg, arg.iov[0].iov_len); 702 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
703 703
704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
781 if (oif) 781 if (oif)
782 arg.bound_dev_if = oif; 782 arg.bound_dev_if = oif;
783 arg.tos = tos; 783 arg.tos = tos;
784 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 784 ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
785 &arg, arg.iov[0].iov_len); 785 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
786 786
787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
788} 788}
@@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826 struct request_sock *req, 826 struct request_sock *req,
827 struct request_values *rvp, 827 struct request_values *rvp,
828 u16 queue_mapping) 828 u16 queue_mapping,
829 bool nocache)
829{ 830{
830 const struct inet_request_sock *ireq = inet_rsk(req); 831 const struct inet_request_sock *ireq = inet_rsk(req);
831 struct flowi4 fl4; 832 struct flowi4 fl4;
@@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
833 struct sk_buff * skb; 834 struct sk_buff * skb;
834 835
835 /* First, grab a route. */ 836 /* First, grab a route. */
836 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 837 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
837 return -1; 838 return -1;
838 839
839 skb = tcp_make_synack(sk, dst, req, rvp); 840 skb = tcp_make_synack(sk, dst, req, rvp);
@@ -848,7 +849,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
848 err = net_xmit_eval(err); 849 err = net_xmit_eval(err);
849 } 850 }
850 851
851 dst_release(dst);
852 return err; 852 return err;
853} 853}
854 854
@@ -856,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
856 struct request_values *rvp) 856 struct request_values *rvp)
857{ 857{
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0); 859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
860} 860}
861 861
862/* 862/*
@@ -1389,7 +1389,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1389 */ 1389 */
1390 if (tmp_opt.saw_tstamp && 1390 if (tmp_opt.saw_tstamp &&
1391 tcp_death_row.sysctl_tw_recycle && 1391 tcp_death_row.sysctl_tw_recycle &&
1392 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && 1392 (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
1393 fl4.daddr == saddr && 1393 fl4.daddr == saddr &&
1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { 1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1395 inet_peer_refcheck(peer); 1395 inet_peer_refcheck(peer);
@@ -1425,7 +1425,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1425 1425
1426 if (tcp_v4_send_synack(sk, dst, req, 1426 if (tcp_v4_send_synack(sk, dst, req,
1427 (struct request_values *)&tmp_ext, 1427 (struct request_values *)&tmp_ext,
1428 skb_get_queue_mapping(skb)) || 1428 skb_get_queue_mapping(skb),
1429 want_cookie) ||
1429 want_cookie) 1430 want_cookie)
1430 goto drop_and_free; 1431 goto drop_and_free;
1431 1432
@@ -1672,6 +1673,51 @@ csum_err:
1672} 1673}
1673EXPORT_SYMBOL(tcp_v4_do_rcv); 1674EXPORT_SYMBOL(tcp_v4_do_rcv);
1674 1675
1676void tcp_v4_early_demux(struct sk_buff *skb)
1677{
1678 struct net *net = dev_net(skb->dev);
1679 const struct iphdr *iph;
1680 const struct tcphdr *th;
1681 struct net_device *dev;
1682 struct sock *sk;
1683
1684 if (skb->pkt_type != PACKET_HOST)
1685 return;
1686
1687 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1688 return;
1689
1690 iph = ip_hdr(skb);
1691 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1692
1693 if (th->doff < sizeof(struct tcphdr) / 4)
1694 return;
1695
1696 if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4))
1697 return;
1698
1699 dev = skb->dev;
1700 sk = __inet_lookup_established(net, &tcp_hashinfo,
1701 iph->saddr, th->source,
1702 iph->daddr, ntohs(th->dest),
1703 dev->ifindex);
1704 if (sk) {
1705 skb->sk = sk;
1706 skb->destructor = sock_edemux;
1707 if (sk->sk_state != TCP_TIME_WAIT) {
1708 struct dst_entry *dst = sk->sk_rx_dst;
1709 if (dst)
1710 dst = dst_check(dst, 0);
1711 if (dst) {
1712 struct rtable *rt = (struct rtable *) dst;
1713
1714 if (rt->rt_iif == dev->ifindex)
1715 skb_dst_set_noref(skb, dst);
1716 }
1717 }
1718 }
1719}
1720
1675/* 1721/*
1676 * From tcp_input.c 1722 * From tcp_input.c
1677 */ 1723 */
@@ -1821,40 +1867,25 @@ do_time_wait:
1821 goto discard_it; 1867 goto discard_it;
1822} 1868}
1823 1869
1824struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) 1870struct inet_peer *tcp_v4_get_peer(struct sock *sk)
1825{ 1871{
1826 struct rtable *rt = (struct rtable *) __sk_dst_get(sk); 1872 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1827 struct inet_sock *inet = inet_sk(sk); 1873 struct inet_sock *inet = inet_sk(sk);
1828 struct inet_peer *peer;
1829 1874
1830 if (!rt || 1875 /* If we don't have a valid cached route, or we're doing IP
1831 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { 1876 * options which make the IPv4 header destination address
1832 peer = inet_getpeer_v4(inet->inet_daddr, 1); 1877 * different from our peer's, do not bother with this.
1833 *release_it = true; 1878 */
1834 } else { 1879 if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr)
1835 if (!rt->peer) 1880 return NULL;
1836 rt_bind_peer(rt, inet->inet_daddr, 1); 1881 return rt_get_peer_create(rt, inet->inet_daddr);
1837 peer = rt->peer;
1838 *release_it = false;
1839 }
1840
1841 return peer;
1842} 1882}
1843EXPORT_SYMBOL(tcp_v4_get_peer); 1883EXPORT_SYMBOL(tcp_v4_get_peer);
1844 1884
1845void *tcp_v4_tw_get_peer(struct sock *sk)
1846{
1847 const struct inet_timewait_sock *tw = inet_twsk(sk);
1848
1849 return inet_getpeer_v4(tw->tw_daddr, 1);
1850}
1851EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1852
1853static struct timewait_sock_ops tcp_timewait_sock_ops = { 1885static struct timewait_sock_ops tcp_timewait_sock_ops = {
1854 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1886 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1855 .twsk_unique = tcp_twsk_unique, 1887 .twsk_unique = tcp_twsk_unique,
1856 .twsk_destructor= tcp_twsk_destructor, 1888 .twsk_destructor= tcp_twsk_destructor,
1857 .twsk_getpeer = tcp_v4_tw_get_peer,
1858}; 1889};
1859 1890
1860const struct inet_connection_sock_af_ops ipv4_specific = { 1891const struct inet_connection_sock_af_ops ipv4_specific = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b85d9fe7d663..72b7c63b1a39 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk)
60 const struct inet_connection_sock *icsk = inet_csk(sk); 60 const struct inet_connection_sock *icsk = inet_csk(sk);
61 struct tcp_sock *tp = tcp_sk(sk); 61 struct tcp_sock *tp = tcp_sk(sk);
62 struct inet_peer *peer; 62 struct inet_peer *peer;
63 bool release_it;
64 63
65 peer = icsk->icsk_af_ops->get_peer(sk, &release_it); 64 peer = icsk->icsk_af_ops->get_peer(sk);
66 if (peer) { 65 if (peer) {
67 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 66 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
68 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && 67 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
@@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk)
70 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; 69 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
71 peer->tcp_ts = tp->rx_opt.ts_recent; 70 peer->tcp_ts = tp->rx_opt.ts_recent;
72 } 71 }
73 if (release_it)
74 inet_putpeer(peer);
75 return true; 72 return true;
76 } 73 }
77 74
@@ -80,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk)
80 77
81static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) 78static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
82{ 79{
80 const struct tcp_timewait_sock *tcptw;
83 struct sock *sk = (struct sock *) tw; 81 struct sock *sk = (struct sock *) tw;
84 struct inet_peer *peer; 82 struct inet_peer *peer;
85 83
86 peer = twsk_getpeer(sk); 84 tcptw = tcp_twsk(sk);
85 peer = tcptw->tw_peer;
87 if (peer) { 86 if (peer) {
88 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
89
90 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 87 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
91 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && 88 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
92 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { 89 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
93 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; 90 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
94 peer->tcp_ts = tcptw->tw_ts_recent; 91 peer->tcp_ts = tcptw->tw_ts_recent;
95 } 92 }
96 inet_putpeer(peer);
97 return true; 93 return true;
98 } 94 }
99 return false; 95 return false;
@@ -317,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
317 const struct inet_connection_sock *icsk = inet_csk(sk); 313 const struct inet_connection_sock *icsk = inet_csk(sk);
318 const struct tcp_sock *tp = tcp_sk(sk); 314 const struct tcp_sock *tp = tcp_sk(sk);
319 bool recycle_ok = false; 315 bool recycle_ok = false;
316 bool recycle_on = false;
320 317
321 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) 318 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) {
322 recycle_ok = tcp_remember_stamp(sk); 319 recycle_ok = tcp_remember_stamp(sk);
320 recycle_on = true;
321 }
323 322
324 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) 323 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
325 tw = inet_twsk_alloc(sk, state); 324 tw = inet_twsk_alloc(sk, state);
@@ -327,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
327 if (tw != NULL) { 326 if (tw != NULL) {
328 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 327 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
329 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); 328 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
329 struct inet_sock *inet = inet_sk(sk);
330 struct inet_peer *peer = NULL;
330 331
331 tw->tw_transparent = inet_sk(sk)->transparent; 332 tw->tw_transparent = inet->transparent;
332 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 333 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
333 tcptw->tw_rcv_nxt = tp->rcv_nxt; 334 tcptw->tw_rcv_nxt = tp->rcv_nxt;
334 tcptw->tw_snd_nxt = tp->snd_nxt; 335 tcptw->tw_snd_nxt = tp->snd_nxt;
@@ -350,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
350 } 351 }
351#endif 352#endif
352 353
354 if (recycle_on)
355 peer = icsk->icsk_af_ops->get_peer(sk);
356 tcptw->tw_peer = peer;
357 if (peer)
358 atomic_inc(&peer->refcnt);
359
353#ifdef CONFIG_TCP_MD5SIG 360#ifdef CONFIG_TCP_MD5SIG
354 /* 361 /*
355 * The timewait bucket does not have the key DB from the 362 * The timewait bucket does not have the key DB from the
@@ -401,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
401 408
402void tcp_twsk_destructor(struct sock *sk) 409void tcp_twsk_destructor(struct sock *sk)
403{ 410{
404#ifdef CONFIG_TCP_MD5SIG
405 struct tcp_timewait_sock *twsk = tcp_twsk(sk); 411 struct tcp_timewait_sock *twsk = tcp_twsk(sk);
412
413 if (twsk->tw_peer)
414 inet_putpeer(twsk->tw_peer);
415#ifdef CONFIG_TCP_MD5SIG
406 if (twsk->tw_md5_key) { 416 if (twsk->tw_md5_key) {
407 tcp_free_md5sig_pool(); 417 tcp_free_md5sig_pool();
408 kfree_rcu(twsk->tw_md5_key, rcu); 418 kfree_rcu(twsk->tw_md5_key, rcu);
@@ -435,6 +445,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
435 struct tcp_sock *oldtp = tcp_sk(sk); 445 struct tcp_sock *oldtp = tcp_sk(sk);
436 struct tcp_cookie_values *oldcvp = oldtp->cookie_values; 446 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
437 447
448 newsk->sk_rx_dst = dst_clone(skb_dst(skb));
449
438 /* TCP Cookie Transactions require space for the cookie pair, 450 /* TCP Cookie Transactions require space for the cookie pair,
439 * as it differs for each connection. There is no need to 451 * as it differs for each connection. There is no need to
440 * copy any s_data_payload stored at the original socket. 452 * copy any s_data_payload stored at the original socket.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 803cbfe82fbc..c465d3e51e28 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2442,7 +2442,16 @@ int tcp_send_synack(struct sock *sk)
2442 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2442 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2443} 2443}
2444 2444
2445/* Prepare a SYN-ACK. */ 2445/**
2446 * tcp_make_synack - Prepare a SYN-ACK.
2447 * sk: listener socket
2448 * dst: dst entry attached to the SYNACK
2449 * req: request_sock pointer
2450 * rvp: request_values pointer
2451 *
2452 * Allocate one skb and build a SYNACK packet.
2453 * @dst is consumed : Caller should not use it again.
2454 */
2446struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2455struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2447 struct request_sock *req, 2456 struct request_sock *req,
2448 struct request_values *rvp) 2457 struct request_values *rvp)
@@ -2461,14 +2470,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2461 2470
2462 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2471 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2463 s_data_desired = cvp->s_data_desired; 2472 s_data_desired = cvp->s_data_desired;
2464 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); 2473 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC);
2465 if (skb == NULL) 2474 if (unlikely(!skb)) {
2475 dst_release(dst);
2466 return NULL; 2476 return NULL;
2467 2477 }
2468 /* Reserve space for headers. */ 2478 /* Reserve space for headers. */
2469 skb_reserve(skb, MAX_TCP_HEADER); 2479 skb_reserve(skb, MAX_TCP_HEADER);
2470 2480
2471 skb_dst_set(skb, dst_clone(dst)); 2481 skb_dst_set(skb, dst);
2472 2482
2473 mss = dst_metric_advmss(dst); 2483 mss = dst_metric_advmss(dst);
2474 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2484 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eaca73644e79..ee37d47d472e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -108,6 +108,7 @@
108#include <net/xfrm.h> 108#include <net/xfrm.h>
109#include <trace/events/udp.h> 109#include <trace/events/udp.h>
110#include <linux/static_key.h> 110#include <linux/static_key.h>
111#include <trace/events/skb.h>
111#include "udp_impl.h" 112#include "udp_impl.h"
112 113
113struct udp_table udp_table __read_mostly; 114struct udp_table udp_table __read_mostly;
@@ -615,6 +616,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
615 break; 616 break;
616 case ICMP_DEST_UNREACH: 617 case ICMP_DEST_UNREACH:
617 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 618 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
619 ipv4_sk_update_pmtu(skb, sk, info);
618 if (inet->pmtudisc != IP_PMTUDISC_DONT) { 620 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
619 err = EMSGSIZE; 621 err = EMSGSIZE;
620 harderr = 1; 622 harderr = 1;
@@ -1219,8 +1221,10 @@ try_again:
1219 goto csum_copy_err; 1221 goto csum_copy_err;
1220 } 1222 }
1221 1223
1222 if (err) 1224 if (unlikely(err)) {
1225 trace_kfree_skb(skb, udp_recvmsg);
1223 goto out_free; 1226 goto out_free;
1227 }
1224 1228
1225 if (!peeked) 1229 if (!peeked)
1226 UDP_INC_STATS_USER(sock_net(sk), 1230 UDP_INC_STATS_USER(sock_net(sk),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0d3426cb5c4f..9815ea0bca7f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
90 xdst->u.dst.dev = dev; 90 xdst->u.dst.dev = dev;
91 dev_hold(dev); 91 dev_hold(dev);
92 92
93 xdst->u.rt.peer = rt->peer; 93 rt_transfer_peer(&xdst->u.rt, rt);
94 if (rt->peer)
95 atomic_inc(&rt->peer->refcnt);
96 94
97 /* Sheit... I remember I did this right. Apparently, 95 /* Sheit... I remember I did this right. Apparently,
98 * it was magically lost, so this code needs audit */ 96 * it was magically lost, so this code needs audit */
@@ -102,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
102 xdst->u.rt.rt_src = rt->rt_src; 100 xdst->u.rt.rt_src = rt->rt_src;
103 xdst->u.rt.rt_dst = rt->rt_dst; 101 xdst->u.rt.rt_dst = rt->rt_dst;
104 xdst->u.rt.rt_gateway = rt->rt_gateway; 102 xdst->u.rt.rt_gateway = rt->rt_gateway;
105 xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
106 103
107 return 0; 104 return 0;
108} 105}
@@ -212,8 +209,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
212 209
213 dst_destroy_metrics_generic(dst); 210 dst_destroy_metrics_generic(dst);
214 211
215 if (likely(xdst->u.rt.peer)) 212 if (rt_has_peer(&xdst->u.rt)) {
216 inet_putpeer(xdst->u.rt.peer); 213 struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt);
214 inet_putpeer(peer);
215 }
217 216
218 xfrm_dst_destroy(xdst); 217 xfrm_dst_destroy(xdst);
219} 218}