diff options
Diffstat (limited to 'net/ipv4')
44 files changed, 804 insertions, 564 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c8f7aee587d1..07a02f6e9696 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -157,6 +157,7 @@ void inet_sock_destruct(struct sock *sk) | |||
157 | 157 | ||
158 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); | 158 | kfree(rcu_dereference_protected(inet->inet_opt, 1)); |
159 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); | 159 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
160 | dst_release(sk->sk_rx_dst); | ||
160 | sk_refcnt_debug_dec(sk); | 161 | sk_refcnt_debug_dec(sk); |
161 | } | 162 | } |
162 | EXPORT_SYMBOL(inet_sock_destruct); | 163 | EXPORT_SYMBOL(inet_sock_destruct); |
@@ -242,20 +243,18 @@ void build_ehash_secret(void) | |||
242 | } | 243 | } |
243 | EXPORT_SYMBOL(build_ehash_secret); | 244 | EXPORT_SYMBOL(build_ehash_secret); |
244 | 245 | ||
245 | static inline int inet_netns_ok(struct net *net, int protocol) | 246 | static inline int inet_netns_ok(struct net *net, __u8 protocol) |
246 | { | 247 | { |
247 | int hash; | ||
248 | const struct net_protocol *ipprot; | 248 | const struct net_protocol *ipprot; |
249 | 249 | ||
250 | if (net_eq(net, &init_net)) | 250 | if (net_eq(net, &init_net)) |
251 | return 1; | 251 | return 1; |
252 | 252 | ||
253 | hash = protocol & (MAX_INET_PROTOS - 1); | 253 | ipprot = rcu_dereference(inet_protos[protocol]); |
254 | ipprot = rcu_dereference(inet_protos[hash]); | 254 | if (ipprot == NULL) { |
255 | |||
256 | if (ipprot == NULL) | ||
257 | /* raw IP is OK */ | 255 | /* raw IP is OK */ |
258 | return 1; | 256 | return 1; |
257 | } | ||
259 | return ipprot->netns_ok; | 258 | return ipprot->netns_ok; |
260 | } | 259 | } |
261 | 260 | ||
@@ -553,7 +552,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, | |||
553 | 552 | ||
554 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) | 553 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) |
555 | return -EAGAIN; | 554 | return -EAGAIN; |
556 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); | 555 | return sk->sk_prot->connect(sk, uaddr, addr_len); |
557 | } | 556 | } |
558 | EXPORT_SYMBOL(inet_dgram_connect); | 557 | EXPORT_SYMBOL(inet_dgram_connect); |
559 | 558 | ||
@@ -1216,8 +1215,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); | |||
1216 | 1215 | ||
1217 | static int inet_gso_send_check(struct sk_buff *skb) | 1216 | static int inet_gso_send_check(struct sk_buff *skb) |
1218 | { | 1217 | { |
1219 | const struct iphdr *iph; | ||
1220 | const struct net_protocol *ops; | 1218 | const struct net_protocol *ops; |
1219 | const struct iphdr *iph; | ||
1221 | int proto; | 1220 | int proto; |
1222 | int ihl; | 1221 | int ihl; |
1223 | int err = -EINVAL; | 1222 | int err = -EINVAL; |
@@ -1236,7 +1235,7 @@ static int inet_gso_send_check(struct sk_buff *skb) | |||
1236 | __skb_pull(skb, ihl); | 1235 | __skb_pull(skb, ihl); |
1237 | skb_reset_transport_header(skb); | 1236 | skb_reset_transport_header(skb); |
1238 | iph = ip_hdr(skb); | 1237 | iph = ip_hdr(skb); |
1239 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1238 | proto = iph->protocol; |
1240 | err = -EPROTONOSUPPORT; | 1239 | err = -EPROTONOSUPPORT; |
1241 | 1240 | ||
1242 | rcu_read_lock(); | 1241 | rcu_read_lock(); |
@@ -1253,8 +1252,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
1253 | netdev_features_t features) | 1252 | netdev_features_t features) |
1254 | { | 1253 | { |
1255 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1254 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
1256 | struct iphdr *iph; | ||
1257 | const struct net_protocol *ops; | 1255 | const struct net_protocol *ops; |
1256 | struct iphdr *iph; | ||
1258 | int proto; | 1257 | int proto; |
1259 | int ihl; | 1258 | int ihl; |
1260 | int id; | 1259 | int id; |
@@ -1286,7 +1285,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
1286 | skb_reset_transport_header(skb); | 1285 | skb_reset_transport_header(skb); |
1287 | iph = ip_hdr(skb); | 1286 | iph = ip_hdr(skb); |
1288 | id = ntohs(iph->id); | 1287 | id = ntohs(iph->id); |
1289 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1288 | proto = iph->protocol; |
1290 | segs = ERR_PTR(-EPROTONOSUPPORT); | 1289 | segs = ERR_PTR(-EPROTONOSUPPORT); |
1291 | 1290 | ||
1292 | rcu_read_lock(); | 1291 | rcu_read_lock(); |
@@ -1340,7 +1339,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1340 | goto out; | 1339 | goto out; |
1341 | } | 1340 | } |
1342 | 1341 | ||
1343 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1342 | proto = iph->protocol; |
1344 | 1343 | ||
1345 | rcu_read_lock(); | 1344 | rcu_read_lock(); |
1346 | ops = rcu_dereference(inet_protos[proto]); | 1345 | ops = rcu_dereference(inet_protos[proto]); |
@@ -1398,11 +1397,11 @@ out: | |||
1398 | 1397 | ||
1399 | static int inet_gro_complete(struct sk_buff *skb) | 1398 | static int inet_gro_complete(struct sk_buff *skb) |
1400 | { | 1399 | { |
1401 | const struct net_protocol *ops; | 1400 | __be16 newlen = htons(skb->len - skb_network_offset(skb)); |
1402 | struct iphdr *iph = ip_hdr(skb); | 1401 | struct iphdr *iph = ip_hdr(skb); |
1403 | int proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1402 | const struct net_protocol *ops; |
1403 | int proto = iph->protocol; | ||
1404 | int err = -ENOSYS; | 1404 | int err = -ENOSYS; |
1405 | __be16 newlen = htons(skb->len - skb_network_offset(skb)); | ||
1406 | 1405 | ||
1407 | csum_replace2(&iph->check, iph->tot_len, newlen); | 1406 | csum_replace2(&iph->check, iph->tot_len, newlen); |
1408 | iph->tot_len = newlen; | 1407 | iph->tot_len = newlen; |
@@ -1520,14 +1519,15 @@ static const struct net_protocol igmp_protocol = { | |||
1520 | #endif | 1519 | #endif |
1521 | 1520 | ||
1522 | static const struct net_protocol tcp_protocol = { | 1521 | static const struct net_protocol tcp_protocol = { |
1523 | .handler = tcp_v4_rcv, | 1522 | .early_demux = tcp_v4_early_demux, |
1524 | .err_handler = tcp_v4_err, | 1523 | .handler = tcp_v4_rcv, |
1525 | .gso_send_check = tcp_v4_gso_send_check, | 1524 | .err_handler = tcp_v4_err, |
1526 | .gso_segment = tcp_tso_segment, | 1525 | .gso_send_check = tcp_v4_gso_send_check, |
1527 | .gro_receive = tcp4_gro_receive, | 1526 | .gso_segment = tcp_tso_segment, |
1528 | .gro_complete = tcp4_gro_complete, | 1527 | .gro_receive = tcp4_gro_receive, |
1529 | .no_policy = 1, | 1528 | .gro_complete = tcp4_gro_complete, |
1530 | .netns_ok = 1, | 1529 | .no_policy = 1, |
1530 | .netns_ok = 1, | ||
1531 | }; | 1531 | }; |
1532 | 1532 | ||
1533 | static const struct net_protocol udp_protocol = { | 1533 | static const struct net_protocol udp_protocol = { |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index e8f2617ecd47..916d5ecaf6c6 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
408 | return; | 408 | return; |
409 | pr_debug("pmtu discovery on SA AH/%08x/%08x\n", | 409 | pr_debug("pmtu discovery on SA AH/%08x/%08x\n", |
410 | ntohl(ah->spi), ntohl(iph->daddr)); | 410 | ntohl(ah->spi), ntohl(iph->daddr)); |
411 | ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); | ||
411 | xfrm_state_put(x); | 412 | xfrm_state_put(x); |
412 | } | 413 | } |
413 | 414 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cda37be02f8d..2e560f0c757d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb) | |||
790 | * Check for bad requests for 127.x.x.x and requests for multicast | 790 | * Check for bad requests for 127.x.x.x and requests for multicast |
791 | * addresses. If this is one such, delete it. | 791 | * addresses. If this is one such, delete it. |
792 | */ | 792 | */ |
793 | if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) | 793 | if (ipv4_is_multicast(tip) || |
794 | (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) | ||
794 | goto out; | 795 | goto out; |
795 | 796 | ||
796 | /* | 797 | /* |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 10e15a144e95..44bf82e3aef7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write, | |||
1500 | 1500 | ||
1501 | if (cnf == net->ipv4.devconf_dflt) | 1501 | if (cnf == net->ipv4.devconf_dflt) |
1502 | devinet_copy_dflt_conf(net, i); | 1502 | devinet_copy_dflt_conf(net, i); |
1503 | if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) | 1503 | if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || |
1504 | i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) | ||
1504 | if ((new_value == 0) && (old_value != 0)) | 1505 | if ((new_value == 0) && (old_value != 0)) |
1505 | rt_cache_flush(net, 0); | 1506 | rt_cache_flush(net, 0); |
1506 | } | 1507 | } |
@@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table { | |||
1617 | "force_igmp_version"), | 1618 | "force_igmp_version"), |
1618 | DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, | 1619 | DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, |
1619 | "promote_secondaries"), | 1620 | "promote_secondaries"), |
1621 | DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, | ||
1622 | "route_localnet"), | ||
1620 | }, | 1623 | }, |
1621 | }; | 1624 | }; |
1622 | 1625 | ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index cb982a61536f..7b95b49a36ce 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
494 | return; | 494 | return; |
495 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 495 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
496 | ntohl(esph->spi), ntohl(iph->daddr)); | 496 | ntohl(esph->spi), ntohl(iph->daddr)); |
497 | ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); | ||
497 | xfrm_state_put(x); | 498 | xfrm_state_put(x); |
498 | } | 499 | } |
499 | 500 | ||
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3854411fa37c..63b11ca54d95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, | |||
180 | } | 180 | } |
181 | EXPORT_SYMBOL(inet_dev_addr_type); | 181 | EXPORT_SYMBOL(inet_dev_addr_type); |
182 | 182 | ||
183 | __be32 fib_compute_spec_dst(struct sk_buff *skb) | ||
184 | { | ||
185 | struct net_device *dev = skb->dev; | ||
186 | struct in_device *in_dev; | ||
187 | struct fib_result res; | ||
188 | struct flowi4 fl4; | ||
189 | struct net *net; | ||
190 | |||
191 | if (skb->pkt_type != PACKET_BROADCAST && | ||
192 | skb->pkt_type != PACKET_MULTICAST) | ||
193 | return ip_hdr(skb)->daddr; | ||
194 | |||
195 | in_dev = __in_dev_get_rcu(dev); | ||
196 | BUG_ON(!in_dev); | ||
197 | fl4.flowi4_oif = 0; | ||
198 | fl4.flowi4_iif = 0; | ||
199 | fl4.daddr = ip_hdr(skb)->saddr; | ||
200 | fl4.saddr = ip_hdr(skb)->daddr; | ||
201 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | ||
202 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
203 | fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; | ||
204 | |||
205 | net = dev_net(dev); | ||
206 | if (!fib_lookup(net, &fl4, &res)) | ||
207 | return FIB_RES_PREFSRC(net, res); | ||
208 | else | ||
209 | return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | ||
210 | } | ||
211 | |||
183 | /* Given (packet source, input interface) and optional (dst, oif, tos): | 212 | /* Given (packet source, input interface) and optional (dst, oif, tos): |
184 | * - (main) check, that source is valid i.e. not broadcast or our local | 213 | * - (main) check, that source is valid i.e. not broadcast or our local |
185 | * address. | 214 | * address. |
@@ -189,8 +218,7 @@ EXPORT_SYMBOL(inet_dev_addr_type); | |||
189 | * called with rcu_read_lock() | 218 | * called with rcu_read_lock() |
190 | */ | 219 | */ |
191 | int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, | 220 | int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, |
192 | int oif, struct net_device *dev, __be32 *spec_dst, | 221 | int oif, struct net_device *dev, u32 *itag) |
193 | u32 *itag) | ||
194 | { | 222 | { |
195 | struct in_device *in_dev; | 223 | struct in_device *in_dev; |
196 | struct flowi4 fl4; | 224 | struct flowi4 fl4; |
@@ -229,7 +257,6 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, | |||
229 | if (res.type != RTN_LOCAL || !accept_local) | 257 | if (res.type != RTN_LOCAL || !accept_local) |
230 | goto e_inval; | 258 | goto e_inval; |
231 | } | 259 | } |
232 | *spec_dst = FIB_RES_PREFSRC(net, res); | ||
233 | fib_combine_itag(itag, &res); | 260 | fib_combine_itag(itag, &res); |
234 | dev_match = false; | 261 | dev_match = false; |
235 | 262 | ||
@@ -258,17 +285,14 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, | |||
258 | 285 | ||
259 | ret = 0; | 286 | ret = 0; |
260 | if (fib_lookup(net, &fl4, &res) == 0) { | 287 | if (fib_lookup(net, &fl4, &res) == 0) { |
261 | if (res.type == RTN_UNICAST) { | 288 | if (res.type == RTN_UNICAST) |
262 | *spec_dst = FIB_RES_PREFSRC(net, res); | ||
263 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 289 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
264 | } | ||
265 | } | 290 | } |
266 | return ret; | 291 | return ret; |
267 | 292 | ||
268 | last_resort: | 293 | last_resort: |
269 | if (rpf) | 294 | if (rpf) |
270 | goto e_rpf; | 295 | goto e_rpf; |
271 | *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | ||
272 | *itag = 0; | 296 | *itag = 0; |
273 | return 0; | 297 | return 0; |
274 | 298 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e5b7182fa099..415f8230fc88 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -779,9 +779,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
779 | int type = nla_type(nla); | 779 | int type = nla_type(nla); |
780 | 780 | ||
781 | if (type) { | 781 | if (type) { |
782 | u32 val; | ||
783 | |||
782 | if (type > RTAX_MAX) | 784 | if (type > RTAX_MAX) |
783 | goto err_inval; | 785 | goto err_inval; |
784 | fi->fib_metrics[type - 1] = nla_get_u32(nla); | 786 | val = nla_get_u32(nla); |
787 | if (type == RTAX_ADVMSS && val > 65535 - 40) | ||
788 | val = 65535 - 40; | ||
789 | fi->fib_metrics[type - 1] = val; | ||
785 | } | 790 | } |
786 | } | 791 | } |
787 | } | 792 | } |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 30b88d7b4bd6..9b0f25930fbc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1007,9 +1007,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1007 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { | 1007 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { |
1008 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1008 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1009 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 1009 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
1010 | tn = (struct tnode *) resize(t, (struct tnode *)tn); | 1010 | tn = (struct tnode *)resize(t, tn); |
1011 | 1011 | ||
1012 | tnode_put_child_reorg((struct tnode *)tp, cindex, | 1012 | tnode_put_child_reorg(tp, cindex, |
1013 | (struct rt_trie_node *)tn, wasfull); | 1013 | (struct rt_trie_node *)tn, wasfull); |
1014 | 1014 | ||
1015 | tp = node_parent((struct rt_trie_node *) tn); | 1015 | tp = node_parent((struct rt_trie_node *) tn); |
@@ -1024,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1024 | 1024 | ||
1025 | /* Handle last (top) tnode */ | 1025 | /* Handle last (top) tnode */ |
1026 | if (IS_TNODE(tn)) | 1026 | if (IS_TNODE(tn)) |
1027 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1027 | tn = (struct tnode *)resize(t, tn); |
1028 | 1028 | ||
1029 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); | 1029 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1030 | tnode_free_flush(); | 1030 | tnode_free_flush(); |
@@ -1125,7 +1125,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1125 | node_set_parent((struct rt_trie_node *)l, tp); | 1125 | node_set_parent((struct rt_trie_node *)l, tp); |
1126 | 1126 | ||
1127 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1127 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1128 | put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); | 1128 | put_child(t, tp, cindex, (struct rt_trie_node *)l); |
1129 | } else { | 1129 | } else { |
1130 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1130 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1131 | /* | 1131 | /* |
@@ -1160,8 +1160,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1160 | 1160 | ||
1161 | if (tp) { | 1161 | if (tp) { |
1162 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1162 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1163 | put_child(t, (struct tnode *)tp, cindex, | 1163 | put_child(t, tp, cindex, (struct rt_trie_node *)tn); |
1164 | (struct rt_trie_node *)tn); | ||
1165 | } else { | 1164 | } else { |
1166 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); | 1165 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1167 | tp = tn; | 1166 | tp = tn; |
@@ -1620,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) | |||
1620 | 1619 | ||
1621 | if (tp) { | 1620 | if (tp) { |
1622 | t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); | 1621 | t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); |
1623 | put_child(t, (struct tnode *)tp, cindex, NULL); | 1622 | put_child(t, tp, cindex, NULL); |
1624 | trie_rebalance(t, tp); | 1623 | trie_rebalance(t, tp); |
1625 | } else | 1624 | } else |
1626 | RCU_INIT_POINTER(t->trie, NULL); | 1625 | RCU_INIT_POINTER(t->trie, NULL); |
@@ -1844,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb) | |||
1844 | if (ll && hlist_empty(&ll->list)) | 1843 | if (ll && hlist_empty(&ll->list)) |
1845 | trie_leaf_remove(t, ll); | 1844 | trie_leaf_remove(t, ll); |
1846 | 1845 | ||
1846 | inetpeer_invalidate_tree(&tb->tb_peers); | ||
1847 | |||
1847 | pr_debug("trie_flush found=%d\n", found); | 1848 | pr_debug("trie_flush found=%d\n", found); |
1848 | return found; | 1849 | return found; |
1849 | } | 1850 | } |
@@ -1992,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) | |||
1992 | tb->tb_id = id; | 1993 | tb->tb_id = id; |
1993 | tb->tb_default = -1; | 1994 | tb->tb_default = -1; |
1994 | tb->tb_num_default = 0; | 1995 | tb->tb_num_default = 0; |
1996 | inet_peer_base_init(&tb->tb_peers); | ||
1995 | 1997 | ||
1996 | t = (struct trie *) tb->tb_data; | 1998 | t = (struct trie *) tb->tb_data; |
1997 | memset(t, 0, sizeof(*t)); | 1999 | memset(t, 0, sizeof(*t)); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c75efbdc71cb..4bce5a2830aa 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -95,6 +95,7 @@ | |||
95 | #include <net/checksum.h> | 95 | #include <net/checksum.h> |
96 | #include <net/xfrm.h> | 96 | #include <net/xfrm.h> |
97 | #include <net/inet_common.h> | 97 | #include <net/inet_common.h> |
98 | #include <net/ip_fib.h> | ||
98 | 99 | ||
99 | /* | 100 | /* |
100 | * Build xmit assembly blocks | 101 | * Build xmit assembly blocks |
@@ -253,9 +254,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
253 | 254 | ||
254 | /* Limit if icmp type is enabled in ratemask. */ | 255 | /* Limit if icmp type is enabled in ratemask. */ |
255 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { | 256 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
256 | if (!rt->peer) | 257 | struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); |
257 | rt_bind_peer(rt, fl4->daddr, 1); | 258 | rc = inet_peer_xrlim_allow(peer, |
258 | rc = inet_peer_xrlim_allow(rt->peer, | ||
259 | net->ipv4.sysctl_icmp_ratelimit); | 259 | net->ipv4.sysctl_icmp_ratelimit); |
260 | } | 260 | } |
261 | out: | 261 | out: |
@@ -334,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
334 | struct flowi4 fl4; | 334 | struct flowi4 fl4; |
335 | struct sock *sk; | 335 | struct sock *sk; |
336 | struct inet_sock *inet; | 336 | struct inet_sock *inet; |
337 | __be32 daddr; | 337 | __be32 daddr, saddr; |
338 | 338 | ||
339 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) | 339 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) |
340 | return; | 340 | return; |
@@ -348,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
348 | 348 | ||
349 | inet->tos = ip_hdr(skb)->tos; | 349 | inet->tos = ip_hdr(skb)->tos; |
350 | daddr = ipc.addr = ip_hdr(skb)->saddr; | 350 | daddr = ipc.addr = ip_hdr(skb)->saddr; |
351 | saddr = fib_compute_spec_dst(skb); | ||
351 | ipc.opt = NULL; | 352 | ipc.opt = NULL; |
352 | ipc.tx_flags = 0; | 353 | ipc.tx_flags = 0; |
353 | if (icmp_param->replyopts.opt.opt.optlen) { | 354 | if (icmp_param->replyopts.opt.opt.optlen) { |
@@ -357,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
357 | } | 358 | } |
358 | memset(&fl4, 0, sizeof(fl4)); | 359 | memset(&fl4, 0, sizeof(fl4)); |
359 | fl4.daddr = daddr; | 360 | fl4.daddr = daddr; |
360 | fl4.saddr = rt->rt_spec_dst; | 361 | fl4.saddr = saddr; |
361 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | 362 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); |
362 | fl4.flowi4_proto = IPPROTO_ICMP; | 363 | fl4.flowi4_proto = IPPROTO_ICMP; |
363 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 364 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
@@ -638,12 +639,12 @@ EXPORT_SYMBOL(icmp_send); | |||
638 | 639 | ||
639 | static void icmp_unreach(struct sk_buff *skb) | 640 | static void icmp_unreach(struct sk_buff *skb) |
640 | { | 641 | { |
642 | const struct net_protocol *ipprot; | ||
641 | const struct iphdr *iph; | 643 | const struct iphdr *iph; |
642 | struct icmphdr *icmph; | 644 | struct icmphdr *icmph; |
643 | int hash, protocol; | ||
644 | const struct net_protocol *ipprot; | ||
645 | u32 info = 0; | ||
646 | struct net *net; | 645 | struct net *net; |
646 | u32 info = 0; | ||
647 | int protocol; | ||
647 | 648 | ||
648 | net = dev_net(skb_dst(skb)->dev); | 649 | net = dev_net(skb_dst(skb)->dev); |
649 | 650 | ||
@@ -674,9 +675,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
674 | LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), | 675 | LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), |
675 | &iph->daddr); | 676 | &iph->daddr); |
676 | } else { | 677 | } else { |
677 | info = ip_rt_frag_needed(net, iph, | 678 | info = ntohs(icmph->un.frag.mtu); |
678 | ntohs(icmph->un.frag.mtu), | ||
679 | skb->dev); | ||
680 | if (!info) | 679 | if (!info) |
681 | goto out; | 680 | goto out; |
682 | } | 681 | } |
@@ -734,9 +733,8 @@ static void icmp_unreach(struct sk_buff *skb) | |||
734 | */ | 733 | */ |
735 | raw_icmp_error(skb, protocol, info); | 734 | raw_icmp_error(skb, protocol, info); |
736 | 735 | ||
737 | hash = protocol & (MAX_INET_PROTOS - 1); | ||
738 | rcu_read_lock(); | 736 | rcu_read_lock(); |
739 | ipprot = rcu_dereference(inet_protos[hash]); | 737 | ipprot = rcu_dereference(inet_protos[protocol]); |
740 | if (ipprot && ipprot->err_handler) | 738 | if (ipprot && ipprot->err_handler) |
741 | ipprot->err_handler(skb, info); | 739 | ipprot->err_handler(skb, info); |
742 | rcu_read_unlock(); | 740 | rcu_read_unlock(); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f9ee7417f6a0..034ddbe42adf 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -368,17 +368,21 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | |||
368 | 368 | ||
369 | struct dst_entry *inet_csk_route_req(struct sock *sk, | 369 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
370 | struct flowi4 *fl4, | 370 | struct flowi4 *fl4, |
371 | const struct request_sock *req) | 371 | const struct request_sock *req, |
372 | bool nocache) | ||
372 | { | 373 | { |
373 | struct rtable *rt; | 374 | struct rtable *rt; |
374 | const struct inet_request_sock *ireq = inet_rsk(req); | 375 | const struct inet_request_sock *ireq = inet_rsk(req); |
375 | struct ip_options_rcu *opt = inet_rsk(req)->opt; | 376 | struct ip_options_rcu *opt = inet_rsk(req)->opt; |
376 | struct net *net = sock_net(sk); | 377 | struct net *net = sock_net(sk); |
378 | int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; | ||
377 | 379 | ||
380 | if (nocache) | ||
381 | flags |= FLOWI_FLAG_RT_NOCACHE; | ||
378 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | 382 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, |
379 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 383 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
380 | sk->sk_protocol, | 384 | sk->sk_protocol, |
381 | inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, | 385 | flags, |
382 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, | 386 | (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, |
383 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); | 387 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); |
384 | security_req_classify_flow(req, flowi4_to_flowi(fl4)); | 388 | security_req_classify_flow(req, flowi4_to_flowi(fl4)); |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 46d1e7199a8c..38064a285cca 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -46,9 +46,6 @@ struct inet_diag_entry { | |||
46 | u16 userlocks; | 46 | u16 userlocks; |
47 | }; | 47 | }; |
48 | 48 | ||
49 | #define INET_DIAG_PUT(skb, attrtype, attrlen) \ | ||
50 | RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) | ||
51 | |||
52 | static DEFINE_MUTEX(inet_diag_table_mutex); | 49 | static DEFINE_MUTEX(inet_diag_table_mutex); |
53 | 50 | ||
54 | static const struct inet_diag_handler *inet_diag_lock_handler(int proto) | 51 | static const struct inet_diag_handler *inet_diag_lock_handler(int proto) |
@@ -78,24 +75,22 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
78 | const struct inet_sock *inet = inet_sk(sk); | 75 | const struct inet_sock *inet = inet_sk(sk); |
79 | struct inet_diag_msg *r; | 76 | struct inet_diag_msg *r; |
80 | struct nlmsghdr *nlh; | 77 | struct nlmsghdr *nlh; |
78 | struct nlattr *attr; | ||
81 | void *info = NULL; | 79 | void *info = NULL; |
82 | struct inet_diag_meminfo *minfo = NULL; | ||
83 | unsigned char *b = skb_tail_pointer(skb); | ||
84 | const struct inet_diag_handler *handler; | 80 | const struct inet_diag_handler *handler; |
85 | int ext = req->idiag_ext; | 81 | int ext = req->idiag_ext; |
86 | 82 | ||
87 | handler = inet_diag_table[req->sdiag_protocol]; | 83 | handler = inet_diag_table[req->sdiag_protocol]; |
88 | BUG_ON(handler == NULL); | 84 | BUG_ON(handler == NULL); |
89 | 85 | ||
90 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); | 86 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), |
91 | nlh->nlmsg_flags = nlmsg_flags; | 87 | nlmsg_flags); |
88 | if (!nlh) | ||
89 | return -EMSGSIZE; | ||
92 | 90 | ||
93 | r = NLMSG_DATA(nlh); | 91 | r = nlmsg_data(nlh); |
94 | BUG_ON(sk->sk_state == TCP_TIME_WAIT); | 92 | BUG_ON(sk->sk_state == TCP_TIME_WAIT); |
95 | 93 | ||
96 | if (ext & (1 << (INET_DIAG_MEMINFO - 1))) | ||
97 | minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); | ||
98 | |||
99 | r->idiag_family = sk->sk_family; | 94 | r->idiag_family = sk->sk_family; |
100 | r->idiag_state = sk->sk_state; | 95 | r->idiag_state = sk->sk_state; |
101 | r->idiag_timer = 0; | 96 | r->idiag_timer = 0; |
@@ -113,7 +108,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
113 | * hence this needs to be included regardless of socket family. | 108 | * hence this needs to be included regardless of socket family. |
114 | */ | 109 | */ |
115 | if (ext & (1 << (INET_DIAG_TOS - 1))) | 110 | if (ext & (1 << (INET_DIAG_TOS - 1))) |
116 | RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); | 111 | if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) |
112 | goto errout; | ||
117 | 113 | ||
118 | #if IS_ENABLED(CONFIG_IPV6) | 114 | #if IS_ENABLED(CONFIG_IPV6) |
119 | if (r->idiag_family == AF_INET6) { | 115 | if (r->idiag_family == AF_INET6) { |
@@ -121,24 +117,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
121 | 117 | ||
122 | *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; | 118 | *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; |
123 | *(struct in6_addr *)r->id.idiag_dst = np->daddr; | 119 | *(struct in6_addr *)r->id.idiag_dst = np->daddr; |
120 | |||
124 | if (ext & (1 << (INET_DIAG_TCLASS - 1))) | 121 | if (ext & (1 << (INET_DIAG_TCLASS - 1))) |
125 | RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); | 122 | if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) |
123 | goto errout; | ||
126 | } | 124 | } |
127 | #endif | 125 | #endif |
128 | 126 | ||
129 | r->idiag_uid = sock_i_uid(sk); | 127 | r->idiag_uid = sock_i_uid(sk); |
130 | r->idiag_inode = sock_i_ino(sk); | 128 | r->idiag_inode = sock_i_ino(sk); |
131 | 129 | ||
132 | if (minfo) { | 130 | if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { |
133 | minfo->idiag_rmem = sk_rmem_alloc_get(sk); | 131 | struct inet_diag_meminfo minfo = { |
134 | minfo->idiag_wmem = sk->sk_wmem_queued; | 132 | .idiag_rmem = sk_rmem_alloc_get(sk), |
135 | minfo->idiag_fmem = sk->sk_forward_alloc; | 133 | .idiag_wmem = sk->sk_wmem_queued, |
136 | minfo->idiag_tmem = sk_wmem_alloc_get(sk); | 134 | .idiag_fmem = sk->sk_forward_alloc, |
135 | .idiag_tmem = sk_wmem_alloc_get(sk), | ||
136 | }; | ||
137 | |||
138 | if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) | ||
139 | goto errout; | ||
137 | } | 140 | } |
138 | 141 | ||
139 | if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) | 142 | if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) |
140 | if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) | 143 | if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) |
141 | goto rtattr_failure; | 144 | goto errout; |
142 | 145 | ||
143 | if (icsk == NULL) { | 146 | if (icsk == NULL) { |
144 | handler->idiag_get_info(sk, r, NULL); | 147 | handler->idiag_get_info(sk, r, NULL); |
@@ -165,16 +168,20 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
165 | } | 168 | } |
166 | #undef EXPIRES_IN_MS | 169 | #undef EXPIRES_IN_MS |
167 | 170 | ||
168 | if (ext & (1 << (INET_DIAG_INFO - 1))) | 171 | if (ext & (1 << (INET_DIAG_INFO - 1))) { |
169 | info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); | 172 | attr = nla_reserve(skb, INET_DIAG_INFO, |
173 | sizeof(struct tcp_info)); | ||
174 | if (!attr) | ||
175 | goto errout; | ||
170 | 176 | ||
171 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { | 177 | info = nla_data(attr); |
172 | const size_t len = strlen(icsk->icsk_ca_ops->name); | ||
173 | |||
174 | strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), | ||
175 | icsk->icsk_ca_ops->name); | ||
176 | } | 178 | } |
177 | 179 | ||
180 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) | ||
181 | if (nla_put_string(skb, INET_DIAG_CONG, | ||
182 | icsk->icsk_ca_ops->name) < 0) | ||
183 | goto errout; | ||
184 | |||
178 | handler->idiag_get_info(sk, r, info); | 185 | handler->idiag_get_info(sk, r, info); |
179 | 186 | ||
180 | if (sk->sk_state < TCP_TIME_WAIT && | 187 | if (sk->sk_state < TCP_TIME_WAIT && |
@@ -182,12 +189,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
182 | icsk->icsk_ca_ops->get_info(sk, ext, skb); | 189 | icsk->icsk_ca_ops->get_info(sk, ext, skb); |
183 | 190 | ||
184 | out: | 191 | out: |
185 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | 192 | return nlmsg_end(skb, nlh); |
186 | return skb->len; | ||
187 | 193 | ||
188 | rtattr_failure: | 194 | errout: |
189 | nlmsg_failure: | 195 | nlmsg_cancel(skb, nlh); |
190 | nlmsg_trim(skb, b); | ||
191 | return -EMSGSIZE; | 196 | return -EMSGSIZE; |
192 | } | 197 | } |
193 | EXPORT_SYMBOL_GPL(inet_sk_diag_fill); | 198 | EXPORT_SYMBOL_GPL(inet_sk_diag_fill); |
@@ -208,14 +213,15 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
208 | { | 213 | { |
209 | long tmo; | 214 | long tmo; |
210 | struct inet_diag_msg *r; | 215 | struct inet_diag_msg *r; |
211 | const unsigned char *previous_tail = skb_tail_pointer(skb); | 216 | struct nlmsghdr *nlh; |
212 | struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, | ||
213 | unlh->nlmsg_type, sizeof(*r)); | ||
214 | 217 | ||
215 | r = NLMSG_DATA(nlh); | 218 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), |
216 | BUG_ON(tw->tw_state != TCP_TIME_WAIT); | 219 | nlmsg_flags); |
220 | if (!nlh) | ||
221 | return -EMSGSIZE; | ||
217 | 222 | ||
218 | nlh->nlmsg_flags = nlmsg_flags; | 223 | r = nlmsg_data(nlh); |
224 | BUG_ON(tw->tw_state != TCP_TIME_WAIT); | ||
219 | 225 | ||
220 | tmo = tw->tw_ttd - jiffies; | 226 | tmo = tw->tw_ttd - jiffies; |
221 | if (tmo < 0) | 227 | if (tmo < 0) |
@@ -245,11 +251,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
245 | *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; | 251 | *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; |
246 | } | 252 | } |
247 | #endif | 253 | #endif |
248 | nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; | 254 | |
249 | return skb->len; | 255 | return nlmsg_end(skb, nlh); |
250 | nlmsg_failure: | ||
251 | nlmsg_trim(skb, previous_tail); | ||
252 | return -EMSGSIZE; | ||
253 | } | 256 | } |
254 | 257 | ||
255 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | 258 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, |
@@ -298,20 +301,20 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s | |||
298 | if (err) | 301 | if (err) |
299 | goto out; | 302 | goto out; |
300 | 303 | ||
301 | err = -ENOMEM; | 304 | rep = nlmsg_new(sizeof(struct inet_diag_msg) + |
302 | rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + | 305 | sizeof(struct inet_diag_meminfo) + |
303 | sizeof(struct inet_diag_meminfo) + | 306 | sizeof(struct tcp_info) + 64, GFP_KERNEL); |
304 | sizeof(struct tcp_info) + 64)), | 307 | if (!rep) { |
305 | GFP_KERNEL); | 308 | err = -ENOMEM; |
306 | if (!rep) | ||
307 | goto out; | 309 | goto out; |
310 | } | ||
308 | 311 | ||
309 | err = sk_diag_fill(sk, rep, req, | 312 | err = sk_diag_fill(sk, rep, req, |
310 | NETLINK_CB(in_skb).pid, | 313 | NETLINK_CB(in_skb).pid, |
311 | nlh->nlmsg_seq, 0, nlh); | 314 | nlh->nlmsg_seq, 0, nlh); |
312 | if (err < 0) { | 315 | if (err < 0) { |
313 | WARN_ON(err == -EMSGSIZE); | 316 | WARN_ON(err == -EMSGSIZE); |
314 | kfree_skb(rep); | 317 | nlmsg_free(rep); |
315 | goto out; | 318 | goto out; |
316 | } | 319 | } |
317 | err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, | 320 | err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, |
@@ -592,15 +595,16 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
592 | { | 595 | { |
593 | const struct inet_request_sock *ireq = inet_rsk(req); | 596 | const struct inet_request_sock *ireq = inet_rsk(req); |
594 | struct inet_sock *inet = inet_sk(sk); | 597 | struct inet_sock *inet = inet_sk(sk); |
595 | unsigned char *b = skb_tail_pointer(skb); | ||
596 | struct inet_diag_msg *r; | 598 | struct inet_diag_msg *r; |
597 | struct nlmsghdr *nlh; | 599 | struct nlmsghdr *nlh; |
598 | long tmo; | 600 | long tmo; |
599 | 601 | ||
600 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); | 602 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), |
601 | nlh->nlmsg_flags = NLM_F_MULTI; | 603 | NLM_F_MULTI); |
602 | r = NLMSG_DATA(nlh); | 604 | if (!nlh) |
605 | return -EMSGSIZE; | ||
603 | 606 | ||
607 | r = nlmsg_data(nlh); | ||
604 | r->idiag_family = sk->sk_family; | 608 | r->idiag_family = sk->sk_family; |
605 | r->idiag_state = TCP_SYN_RECV; | 609 | r->idiag_state = TCP_SYN_RECV; |
606 | r->idiag_timer = 1; | 610 | r->idiag_timer = 1; |
@@ -628,13 +632,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
628 | *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; | 632 | *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; |
629 | } | 633 | } |
630 | #endif | 634 | #endif |
631 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | ||
632 | |||
633 | return skb->len; | ||
634 | 635 | ||
635 | nlmsg_failure: | 636 | return nlmsg_end(skb, nlh); |
636 | nlmsg_trim(skb, b); | ||
637 | return -1; | ||
638 | } | 637 | } |
639 | 638 | ||
640 | static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | 639 | static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, |
@@ -892,7 +891,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
892 | if (nlmsg_attrlen(cb->nlh, hdrlen)) | 891 | if (nlmsg_attrlen(cb->nlh, hdrlen)) |
893 | bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); | 892 | bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); |
894 | 893 | ||
895 | return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); | 894 | return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); |
896 | } | 895 | } |
897 | 896 | ||
898 | static inline int inet_diag_type2proto(int type) | 897 | static inline int inet_diag_type2proto(int type) |
@@ -909,7 +908,7 @@ static inline int inet_diag_type2proto(int type) | |||
909 | 908 | ||
910 | static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) | 909 | static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) |
911 | { | 910 | { |
912 | struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); | 911 | struct inet_diag_req *rc = nlmsg_data(cb->nlh); |
913 | struct inet_diag_req_v2 req; | 912 | struct inet_diag_req_v2 req; |
914 | struct nlattr *bc = NULL; | 913 | struct nlattr *bc = NULL; |
915 | int hdrlen = sizeof(struct inet_diag_req); | 914 | int hdrlen = sizeof(struct inet_diag_req); |
@@ -929,7 +928,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c | |||
929 | static int inet_diag_get_exact_compat(struct sk_buff *in_skb, | 928 | static int inet_diag_get_exact_compat(struct sk_buff *in_skb, |
930 | const struct nlmsghdr *nlh) | 929 | const struct nlmsghdr *nlh) |
931 | { | 930 | { |
932 | struct inet_diag_req *rc = NLMSG_DATA(nlh); | 931 | struct inet_diag_req *rc = nlmsg_data(nlh); |
933 | struct inet_diag_req_v2 req; | 932 | struct inet_diag_req_v2 req; |
934 | 933 | ||
935 | req.sdiag_family = rc->idiag_family; | 934 | req.sdiag_family = rc->idiag_family; |
@@ -996,7 +995,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) | |||
996 | } | 995 | } |
997 | } | 996 | } |
998 | 997 | ||
999 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); | 998 | return inet_diag_get_exact(skb, h, nlmsg_data(h)); |
1000 | } | 999 | } |
1001 | 1000 | ||
1002 | static const struct sock_diag_handler inet_diag_handler = { | 1001 | static const struct sock_diag_handler inet_diag_handler = { |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0c..85190e69297b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, | |||
243 | if (q == NULL) | 243 | if (q == NULL) |
244 | return NULL; | 244 | return NULL; |
245 | 245 | ||
246 | q->net = nf; | ||
246 | f->constructor(q, arg); | 247 | f->constructor(q, arg); |
247 | atomic_add(f->qsize, &nf->mem); | 248 | atomic_add(f->qsize, &nf->mem); |
248 | setup_timer(&q->timer, f->frag_expire, (unsigned long)q); | 249 | setup_timer(&q->timer, f->frag_expire, (unsigned long)q); |
249 | spin_lock_init(&q->lock); | 250 | spin_lock_init(&q->lock); |
250 | atomic_set(&q->refcnt, 1); | 251 | atomic_set(&q->refcnt, 1); |
251 | q->net = nf; | ||
252 | 252 | ||
253 | return q; | 253 | return q; |
254 | } | 254 | } |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dfba343b2509..da90a8cab614 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -82,23 +82,39 @@ static const struct inet_peer peer_fake_node = { | |||
82 | .avl_height = 0 | 82 | .avl_height = 0 |
83 | }; | 83 | }; |
84 | 84 | ||
85 | struct inet_peer_base { | 85 | void inet_peer_base_init(struct inet_peer_base *bp) |
86 | struct inet_peer __rcu *root; | 86 | { |
87 | seqlock_t lock; | 87 | bp->root = peer_avl_empty_rcu; |
88 | int total; | 88 | seqlock_init(&bp->lock); |
89 | }; | 89 | bp->flush_seq = ~0U; |
90 | bp->total = 0; | ||
91 | } | ||
92 | EXPORT_SYMBOL_GPL(inet_peer_base_init); | ||
90 | 93 | ||
91 | static struct inet_peer_base v4_peers = { | 94 | static atomic_t v4_seq = ATOMIC_INIT(0); |
92 | .root = peer_avl_empty_rcu, | 95 | static atomic_t v6_seq = ATOMIC_INIT(0); |
93 | .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), | ||
94 | .total = 0, | ||
95 | }; | ||
96 | 96 | ||
97 | static struct inet_peer_base v6_peers = { | 97 | static atomic_t *inetpeer_seq_ptr(int family) |
98 | .root = peer_avl_empty_rcu, | 98 | { |
99 | .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), | 99 | return (family == AF_INET ? &v4_seq : &v6_seq); |
100 | .total = 0, | 100 | } |
101 | }; | 101 | |
102 | static inline void flush_check(struct inet_peer_base *base, int family) | ||
103 | { | ||
104 | atomic_t *fp = inetpeer_seq_ptr(family); | ||
105 | |||
106 | if (unlikely(base->flush_seq != atomic_read(fp))) { | ||
107 | inetpeer_invalidate_tree(base); | ||
108 | base->flush_seq = atomic_read(fp); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | void inetpeer_invalidate_family(int family) | ||
113 | { | ||
114 | atomic_t *fp = inetpeer_seq_ptr(family); | ||
115 | |||
116 | atomic_inc(fp); | ||
117 | } | ||
102 | 118 | ||
103 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ | 119 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ |
104 | 120 | ||
@@ -110,7 +126,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min | |||
110 | 126 | ||
111 | static void inetpeer_gc_worker(struct work_struct *work) | 127 | static void inetpeer_gc_worker(struct work_struct *work) |
112 | { | 128 | { |
113 | struct inet_peer *p, *n; | 129 | struct inet_peer *p, *n, *c; |
114 | LIST_HEAD(list); | 130 | LIST_HEAD(list); |
115 | 131 | ||
116 | spin_lock_bh(&gc_lock); | 132 | spin_lock_bh(&gc_lock); |
@@ -122,17 +138,19 @@ static void inetpeer_gc_worker(struct work_struct *work) | |||
122 | 138 | ||
123 | list_for_each_entry_safe(p, n, &list, gc_list) { | 139 | list_for_each_entry_safe(p, n, &list, gc_list) { |
124 | 140 | ||
125 | if(need_resched()) | 141 | if (need_resched()) |
126 | cond_resched(); | 142 | cond_resched(); |
127 | 143 | ||
128 | if (p->avl_left != peer_avl_empty) { | 144 | c = rcu_dereference_protected(p->avl_left, 1); |
129 | list_add_tail(&p->avl_left->gc_list, &list); | 145 | if (c != peer_avl_empty) { |
130 | p->avl_left = peer_avl_empty; | 146 | list_add_tail(&c->gc_list, &list); |
147 | p->avl_left = peer_avl_empty_rcu; | ||
131 | } | 148 | } |
132 | 149 | ||
133 | if (p->avl_right != peer_avl_empty) { | 150 | c = rcu_dereference_protected(p->avl_right, 1); |
134 | list_add_tail(&p->avl_right->gc_list, &list); | 151 | if (c != peer_avl_empty) { |
135 | p->avl_right = peer_avl_empty; | 152 | list_add_tail(&c->gc_list, &list); |
153 | p->avl_right = peer_avl_empty_rcu; | ||
136 | } | 154 | } |
137 | 155 | ||
138 | n = list_entry(p->gc_list.next, struct inet_peer, gc_list); | 156 | n = list_entry(p->gc_list.next, struct inet_peer, gc_list); |
@@ -401,11 +419,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, | |||
401 | call_rcu(&p->rcu, inetpeer_free_rcu); | 419 | call_rcu(&p->rcu, inetpeer_free_rcu); |
402 | } | 420 | } |
403 | 421 | ||
404 | static struct inet_peer_base *family_to_base(int family) | ||
405 | { | ||
406 | return family == AF_INET ? &v4_peers : &v6_peers; | ||
407 | } | ||
408 | |||
409 | /* perform garbage collect on all items stacked during a lookup */ | 422 | /* perform garbage collect on all items stacked during a lookup */ |
410 | static int inet_peer_gc(struct inet_peer_base *base, | 423 | static int inet_peer_gc(struct inet_peer_base *base, |
411 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], | 424 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], |
@@ -443,14 +456,17 @@ static int inet_peer_gc(struct inet_peer_base *base, | |||
443 | return cnt; | 456 | return cnt; |
444 | } | 457 | } |
445 | 458 | ||
446 | struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) | 459 | struct inet_peer *inet_getpeer(struct inet_peer_base *base, |
460 | const struct inetpeer_addr *daddr, | ||
461 | int create) | ||
447 | { | 462 | { |
448 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 463 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
449 | struct inet_peer_base *base = family_to_base(daddr->family); | ||
450 | struct inet_peer *p; | 464 | struct inet_peer *p; |
451 | unsigned int sequence; | 465 | unsigned int sequence; |
452 | int invalidated, gccnt = 0; | 466 | int invalidated, gccnt = 0; |
453 | 467 | ||
468 | flush_check(base, daddr->family); | ||
469 | |||
454 | /* Attempt a lockless lookup first. | 470 | /* Attempt a lockless lookup first. |
455 | * Because of a concurrent writer, we might not find an existing entry. | 471 | * Because of a concurrent writer, we might not find an existing entry. |
456 | */ | 472 | */ |
@@ -571,26 +587,19 @@ static void inetpeer_inval_rcu(struct rcu_head *head) | |||
571 | schedule_delayed_work(&gc_work, gc_delay); | 587 | schedule_delayed_work(&gc_work, gc_delay); |
572 | } | 588 | } |
573 | 589 | ||
574 | void inetpeer_invalidate_tree(int family) | 590 | void inetpeer_invalidate_tree(struct inet_peer_base *base) |
575 | { | 591 | { |
576 | struct inet_peer *old, *new, *prev; | 592 | struct inet_peer *root; |
577 | struct inet_peer_base *base = family_to_base(family); | ||
578 | 593 | ||
579 | write_seqlock_bh(&base->lock); | 594 | write_seqlock_bh(&base->lock); |
580 | 595 | ||
581 | old = base->root; | 596 | root = rcu_deref_locked(base->root, base); |
582 | if (old == peer_avl_empty_rcu) | 597 | if (root != peer_avl_empty) { |
583 | goto out; | 598 | base->root = peer_avl_empty_rcu; |
584 | |||
585 | new = peer_avl_empty_rcu; | ||
586 | |||
587 | prev = cmpxchg(&base->root, old, new); | ||
588 | if (prev == old) { | ||
589 | base->total = 0; | 599 | base->total = 0; |
590 | call_rcu(&prev->gc_rcu, inetpeer_inval_rcu); | 600 | call_rcu(&root->gc_rcu, inetpeer_inval_rcu); |
591 | } | 601 | } |
592 | 602 | ||
593 | out: | ||
594 | write_sequnlock_bh(&base->lock); | 603 | write_sequnlock_bh(&base->lock); |
595 | } | 604 | } |
596 | EXPORT_SYMBOL(inetpeer_invalidate_tree); | 605 | EXPORT_SYMBOL(inetpeer_invalidate_tree); |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd6022d..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) | |||
171 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) | 171 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) |
172 | { | 172 | { |
173 | struct ipq *qp = container_of(q, struct ipq, q); | 173 | struct ipq *qp = container_of(q, struct ipq, q); |
174 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, | ||
175 | frags); | ||
176 | struct net *net = container_of(ipv4, struct net, ipv4); | ||
177 | |||
174 | struct ip4_create_arg *arg = a; | 178 | struct ip4_create_arg *arg = a; |
175 | 179 | ||
176 | qp->protocol = arg->iph->protocol; | 180 | qp->protocol = arg->iph->protocol; |
@@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) | |||
180 | qp->daddr = arg->iph->daddr; | 184 | qp->daddr = arg->iph->daddr; |
181 | qp->user = arg->user; | 185 | qp->user = arg->user; |
182 | qp->peer = sysctl_ipfrag_max_dist ? | 186 | qp->peer = sysctl_ipfrag_max_dist ? |
183 | inet_getpeer_v4(arg->iph->saddr, 1) : NULL; | 187 | inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; |
184 | } | 188 | } |
185 | 189 | ||
186 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) | 190 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f49047b79609..594cec35ac4d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
516 | case ICMP_PORT_UNREACH: | 516 | case ICMP_PORT_UNREACH: |
517 | /* Impossible event. */ | 517 | /* Impossible event. */ |
518 | return; | 518 | return; |
519 | case ICMP_FRAG_NEEDED: | ||
520 | /* Soft state for pmtu is maintained by IP core. */ | ||
521 | return; | ||
522 | default: | 519 | default: |
523 | /* All others are translated to HOST_UNREACH. | 520 | /* All others are translated to HOST_UNREACH. |
524 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 521 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
@@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
538 | flags & GRE_KEY ? | 535 | flags & GRE_KEY ? |
539 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, | 536 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, |
540 | p[1]); | 537 | p[1]); |
541 | if (t == NULL || t->parms.iph.daddr == 0 || | 538 | if (t == NULL) |
539 | goto out; | ||
540 | |||
541 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | ||
542 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | ||
543 | t->parms.link, 0, IPPROTO_GRE, 0); | ||
544 | goto out; | ||
545 | } | ||
546 | |||
547 | if (t->parms.iph.daddr == 0 || | ||
542 | ipv4_is_multicast(t->parms.iph.daddr)) | 548 | ipv4_is_multicast(t->parms.iph.daddr)) |
543 | goto out; | 549 | goto out; |
544 | 550 | ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 8590144ca330..b27d4440f523 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb) | |||
198 | rcu_read_lock(); | 198 | rcu_read_lock(); |
199 | { | 199 | { |
200 | int protocol = ip_hdr(skb)->protocol; | 200 | int protocol = ip_hdr(skb)->protocol; |
201 | int hash, raw; | ||
202 | const struct net_protocol *ipprot; | 201 | const struct net_protocol *ipprot; |
202 | int raw; | ||
203 | 203 | ||
204 | resubmit: | 204 | resubmit: |
205 | raw = raw_local_deliver(skb, protocol); | 205 | raw = raw_local_deliver(skb, protocol); |
206 | 206 | ||
207 | hash = protocol & (MAX_INET_PROTOS - 1); | 207 | ipprot = rcu_dereference(inet_protos[protocol]); |
208 | ipprot = rcu_dereference(inet_protos[hash]); | ||
209 | if (ipprot != NULL) { | 208 | if (ipprot != NULL) { |
210 | int ret; | 209 | int ret; |
211 | 210 | ||
@@ -314,26 +313,33 @@ drop: | |||
314 | return true; | 313 | return true; |
315 | } | 314 | } |
316 | 315 | ||
316 | int sysctl_ip_early_demux __read_mostly = 1; | ||
317 | |||
317 | static int ip_rcv_finish(struct sk_buff *skb) | 318 | static int ip_rcv_finish(struct sk_buff *skb) |
318 | { | 319 | { |
319 | const struct iphdr *iph = ip_hdr(skb); | 320 | const struct iphdr *iph = ip_hdr(skb); |
320 | struct rtable *rt; | 321 | struct rtable *rt; |
321 | 322 | ||
323 | if (sysctl_ip_early_demux && !skb_dst(skb)) { | ||
324 | const struct net_protocol *ipprot; | ||
325 | int protocol = iph->protocol; | ||
326 | |||
327 | rcu_read_lock(); | ||
328 | ipprot = rcu_dereference(inet_protos[protocol]); | ||
329 | if (ipprot && ipprot->early_demux) | ||
330 | ipprot->early_demux(skb); | ||
331 | rcu_read_unlock(); | ||
332 | } | ||
333 | |||
322 | /* | 334 | /* |
323 | * Initialise the virtual path cache for the packet. It describes | 335 | * Initialise the virtual path cache for the packet. It describes |
324 | * how the packet travels inside Linux networking. | 336 | * how the packet travels inside Linux networking. |
325 | */ | 337 | */ |
326 | if (skb_dst(skb) == NULL) { | 338 | if (!skb_dst(skb)) { |
327 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | 339 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
328 | iph->tos, skb->dev); | 340 | iph->tos, skb->dev); |
329 | if (unlikely(err)) { | 341 | if (unlikely(err)) { |
330 | if (err == -EHOSTUNREACH) | 342 | if (err == -EXDEV) |
331 | IP_INC_STATS_BH(dev_net(skb->dev), | ||
332 | IPSTATS_MIB_INADDRERRORS); | ||
333 | else if (err == -ENETUNREACH) | ||
334 | IP_INC_STATS_BH(dev_net(skb->dev), | ||
335 | IPSTATS_MIB_INNOROUTES); | ||
336 | else if (err == -EXDEV) | ||
337 | NET_INC_STATS_BH(dev_net(skb->dev), | 343 | NET_INC_STATS_BH(dev_net(skb->dev), |
338 | LINUX_MIB_IPRPFILTER); | 344 | LINUX_MIB_IPRPFILTER); |
339 | goto drop; | 345 | goto drop; |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 708b99494e23..766dfe56885a 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <net/icmp.h> | 27 | #include <net/icmp.h> |
28 | #include <net/route.h> | 28 | #include <net/route.h> |
29 | #include <net/cipso_ipv4.h> | 29 | #include <net/cipso_ipv4.h> |
30 | #include <net/ip_fib.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Write options to IP header, record destination address to | 33 | * Write options to IP header, record destination address to |
@@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) | |||
104 | sptr = skb_network_header(skb); | 105 | sptr = skb_network_header(skb); |
105 | dptr = dopt->__data; | 106 | dptr = dopt->__data; |
106 | 107 | ||
107 | daddr = skb_rtable(skb)->rt_spec_dst; | 108 | daddr = fib_compute_spec_dst(skb); |
108 | 109 | ||
109 | if (sopt->rr) { | 110 | if (sopt->rr) { |
110 | optlen = sptr[sopt->rr+1]; | 111 | optlen = sptr[sopt->rr+1]; |
@@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb) | |||
250 | int ip_options_compile(struct net *net, | 251 | int ip_options_compile(struct net *net, |
251 | struct ip_options *opt, struct sk_buff *skb) | 252 | struct ip_options *opt, struct sk_buff *skb) |
252 | { | 253 | { |
253 | int l; | 254 | __be32 spec_dst = (__force __be32) 0; |
254 | unsigned char *iph; | ||
255 | unsigned char *optptr; | ||
256 | int optlen; | ||
257 | unsigned char *pp_ptr = NULL; | 255 | unsigned char *pp_ptr = NULL; |
258 | struct rtable *rt = NULL; | 256 | unsigned char *optptr; |
257 | unsigned char *iph; | ||
258 | int optlen, l; | ||
259 | 259 | ||
260 | if (skb != NULL) { | 260 | if (skb != NULL) { |
261 | rt = skb_rtable(skb); | 261 | spec_dst = fib_compute_spec_dst(skb); |
262 | optptr = (unsigned char *)&(ip_hdr(skb)[1]); | 262 | optptr = (unsigned char *)&(ip_hdr(skb)[1]); |
263 | } else | 263 | } else |
264 | optptr = opt->__data; | 264 | optptr = opt->__data; |
@@ -330,8 +330,8 @@ int ip_options_compile(struct net *net, | |||
330 | pp_ptr = optptr + 2; | 330 | pp_ptr = optptr + 2; |
331 | goto error; | 331 | goto error; |
332 | } | 332 | } |
333 | if (rt) { | 333 | if (skb) { |
334 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); | 334 | memcpy(&optptr[optptr[2]-1], &spec_dst, 4); |
335 | opt->is_changed = 1; | 335 | opt->is_changed = 1; |
336 | } | 336 | } |
337 | optptr[2] += 4; | 337 | optptr[2] += 4; |
@@ -372,8 +372,8 @@ int ip_options_compile(struct net *net, | |||
372 | goto error; | 372 | goto error; |
373 | } | 373 | } |
374 | opt->ts = optptr - iph; | 374 | opt->ts = optptr - iph; |
375 | if (rt) { | 375 | if (skb) { |
376 | memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); | 376 | memcpy(&optptr[optptr[2]-1], &spec_dst, 4); |
377 | timeptr = &optptr[optptr[2]+3]; | 377 | timeptr = &optptr[optptr[2]+3]; |
378 | } | 378 | } |
379 | opt->ts_needaddr = 1; | 379 | opt->ts_needaddr = 1; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 451f97c42eb4..2630900e480a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb) | |||
113 | } | 113 | } |
114 | EXPORT_SYMBOL_GPL(ip_local_out); | 114 | EXPORT_SYMBOL_GPL(ip_local_out); |
115 | 115 | ||
116 | /* dev_loopback_xmit for use with netfilter. */ | ||
117 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) | ||
118 | { | ||
119 | skb_reset_mac_header(newskb); | ||
120 | __skb_pull(newskb, skb_network_offset(newskb)); | ||
121 | newskb->pkt_type = PACKET_LOOPBACK; | ||
122 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | ||
123 | WARN_ON(!skb_dst(newskb)); | ||
124 | skb_dst_force(newskb); | ||
125 | netif_rx_ni(newskb); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | 116 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) |
130 | { | 117 | { |
131 | int ttl = inet->uc_ttl; | 118 | int ttl = inet->uc_ttl; |
@@ -200,7 +187,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
200 | } | 187 | } |
201 | if (skb->sk) | 188 | if (skb->sk) |
202 | skb_set_owner_w(skb2, skb->sk); | 189 | skb_set_owner_w(skb2, skb->sk); |
203 | kfree_skb(skb); | 190 | consume_skb(skb); |
204 | skb = skb2; | 191 | skb = skb2; |
205 | } | 192 | } |
206 | 193 | ||
@@ -281,7 +268,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
281 | if (newskb) | 268 | if (newskb) |
282 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, | 269 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, |
283 | newskb, NULL, newskb->dev, | 270 | newskb, NULL, newskb->dev, |
284 | ip_dev_loopback_xmit); | 271 | dev_loopback_xmit); |
285 | } | 272 | } |
286 | 273 | ||
287 | /* Multicasts with ttl 0 must not go beyond the host */ | 274 | /* Multicasts with ttl 0 must not go beyond the host */ |
@@ -296,7 +283,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
296 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 283 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
297 | if (newskb) | 284 | if (newskb) |
298 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, | 285 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, |
299 | NULL, newskb->dev, ip_dev_loopback_xmit); | 286 | NULL, newskb->dev, dev_loopback_xmit); |
300 | } | 287 | } |
301 | 288 | ||
302 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, | 289 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, |
@@ -709,7 +696,7 @@ slow_path: | |||
709 | 696 | ||
710 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); | 697 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); |
711 | } | 698 | } |
712 | kfree_skb(skb); | 699 | consume_skb(skb); |
713 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); | 700 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); |
714 | return err; | 701 | return err; |
715 | 702 | ||
@@ -1472,13 +1459,14 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, | |||
1472 | 1459 | ||
1473 | /* | 1460 | /* |
1474 | * Generic function to send a packet as reply to another packet. | 1461 | * Generic function to send a packet as reply to another packet. |
1475 | * Used to send TCP resets so far. ICMP should use this function too. | 1462 | * Used to send TCP resets so far. |
1476 | * | 1463 | * |
1477 | * Should run single threaded per socket because it uses the sock | 1464 | * Should run single threaded per socket because it uses the sock |
1478 | * structure to pass arguments. | 1465 | * structure to pass arguments. |
1479 | */ | 1466 | */ |
1480 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, | 1467 | void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
1481 | const struct ip_reply_arg *arg, unsigned int len) | 1468 | __be32 saddr, const struct ip_reply_arg *arg, |
1469 | unsigned int len) | ||
1482 | { | 1470 | { |
1483 | struct inet_sock *inet = inet_sk(sk); | 1471 | struct inet_sock *inet = inet_sk(sk); |
1484 | struct ip_options_data replyopts; | 1472 | struct ip_options_data replyopts; |
@@ -1504,7 +1492,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, | |||
1504 | RT_TOS(arg->tos), | 1492 | RT_TOS(arg->tos), |
1505 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | 1493 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1506 | ip_reply_arg_flowi_flags(arg), | 1494 | ip_reply_arg_flowi_flags(arg), |
1507 | daddr, rt->rt_spec_dst, | 1495 | daddr, saddr, |
1508 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); | 1496 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
1509 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 1497 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
1510 | rt = ip_route_output_key(sock_net(sk), &fl4); | 1498 | rt = ip_route_output_key(sock_net(sk), &fl4); |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0d11f234d615..de29f46f68b0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #if IS_ENABLED(CONFIG_IPV6) | 40 | #if IS_ENABLED(CONFIG_IPV6) |
41 | #include <net/transp_v6.h> | 41 | #include <net/transp_v6.h> |
42 | #endif | 42 | #endif |
43 | #include <net/ip_fib.h> | ||
43 | 44 | ||
44 | #include <linux/errqueue.h> | 45 | #include <linux/errqueue.h> |
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
@@ -1019,8 +1020,8 @@ e_inval: | |||
1019 | * @sk: socket | 1020 | * @sk: socket |
1020 | * @skb: buffer | 1021 | * @skb: buffer |
1021 | * | 1022 | * |
1022 | * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst | 1023 | * To support IP_CMSG_PKTINFO option, we store rt_iif and specific |
1023 | * in skb->cb[] before dst drop. | 1024 | * destination in skb->cb[] before dst drop. |
1024 | * This way, receiver doesnt make cache line misses to read rtable. | 1025 | * This way, receiver doesnt make cache line misses to read rtable. |
1025 | */ | 1026 | */ |
1026 | void ipv4_pktinfo_prepare(struct sk_buff *skb) | 1027 | void ipv4_pktinfo_prepare(struct sk_buff *skb) |
@@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb) | |||
1030 | 1031 | ||
1031 | if (rt) { | 1032 | if (rt) { |
1032 | pktinfo->ipi_ifindex = rt->rt_iif; | 1033 | pktinfo->ipi_ifindex = rt->rt_iif; |
1033 | pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; | 1034 | pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); |
1034 | } else { | 1035 | } else { |
1035 | pktinfo->ipi_ifindex = 0; | 1036 | pktinfo->ipi_ifindex = 0; |
1036 | pktinfo->ipi_spec_dst.s_addr = 0; | 1037 | pktinfo->ipi_spec_dst.s_addr = 0; |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 63b64c45a826..b91375482d84 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
42 | return; | 42 | return; |
43 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", | 43 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", |
44 | spi, &iph->daddr); | 44 | spi, &iph->daddr); |
45 | ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); | ||
45 | xfrm_state_put(x); | 46 | xfrm_state_put(x); |
46 | } | 47 | } |
47 | 48 | ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2d0f99bf61b3..715338a1b205 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
348 | case ICMP_PORT_UNREACH: | 348 | case ICMP_PORT_UNREACH: |
349 | /* Impossible event. */ | 349 | /* Impossible event. */ |
350 | return 0; | 350 | return 0; |
351 | case ICMP_FRAG_NEEDED: | ||
352 | /* Soft state for pmtu is maintained by IP core. */ | ||
353 | return 0; | ||
354 | default: | 351 | default: |
355 | /* All others are translated to HOST_UNREACH. | 352 | /* All others are translated to HOST_UNREACH. |
356 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 353 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
@@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
369 | 366 | ||
370 | rcu_read_lock(); | 367 | rcu_read_lock(); |
371 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 368 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
372 | if (t == NULL || t->parms.iph.daddr == 0) | 369 | if (t == NULL) |
370 | goto out; | ||
371 | |||
372 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | ||
373 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | ||
374 | t->dev->ifindex, 0, IPPROTO_IPIP, 0); | ||
375 | err = 0; | ||
376 | goto out; | ||
377 | } | ||
378 | |||
379 | if (t->parms.iph.daddr == 0) | ||
373 | goto out; | 380 | goto out; |
374 | 381 | ||
375 | err = 0; | 382 | err = 0; |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c94bbc6f2ba3..b4ac39f11d19 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -2006,37 +2006,37 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | |||
2006 | { | 2006 | { |
2007 | int ct; | 2007 | int ct; |
2008 | struct rtnexthop *nhp; | 2008 | struct rtnexthop *nhp; |
2009 | u8 *b = skb_tail_pointer(skb); | 2009 | struct nlattr *mp_attr; |
2010 | struct rtattr *mp_head; | ||
2011 | 2010 | ||
2012 | /* If cache is unresolved, don't try to parse IIF and OIF */ | 2011 | /* If cache is unresolved, don't try to parse IIF and OIF */ |
2013 | if (c->mfc_parent >= MAXVIFS) | 2012 | if (c->mfc_parent >= MAXVIFS) |
2014 | return -ENOENT; | 2013 | return -ENOENT; |
2015 | 2014 | ||
2016 | if (VIF_EXISTS(mrt, c->mfc_parent)) | 2015 | if (VIF_EXISTS(mrt, c->mfc_parent) && |
2017 | RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex); | 2016 | nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) |
2017 | return -EMSGSIZE; | ||
2018 | 2018 | ||
2019 | mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); | 2019 | if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) |
2020 | return -EMSGSIZE; | ||
2020 | 2021 | ||
2021 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { | 2022 | for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { |
2022 | if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { | 2023 | if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { |
2023 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 2024 | if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { |
2024 | goto rtattr_failure; | 2025 | nla_nest_cancel(skb, mp_attr); |
2025 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 2026 | return -EMSGSIZE; |
2027 | } | ||
2028 | |||
2026 | nhp->rtnh_flags = 0; | 2029 | nhp->rtnh_flags = 0; |
2027 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 2030 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
2028 | nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; | 2031 | nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; |
2029 | nhp->rtnh_len = sizeof(*nhp); | 2032 | nhp->rtnh_len = sizeof(*nhp); |
2030 | } | 2033 | } |
2031 | } | 2034 | } |
2032 | mp_head->rta_type = RTA_MULTIPATH; | 2035 | |
2033 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; | 2036 | nla_nest_end(skb, mp_attr); |
2037 | |||
2034 | rtm->rtm_type = RTN_MULTICAST; | 2038 | rtm->rtm_type = RTN_MULTICAST; |
2035 | return 1; | 2039 | return 1; |
2036 | |||
2037 | rtattr_failure: | ||
2038 | nlmsg_trim(skb, b); | ||
2039 | return -EMSGSIZE; | ||
2040 | } | 2040 | } |
2041 | 2041 | ||
2042 | int ipmr_get_route(struct net *net, struct sk_buff *skb, | 2042 | int ipmr_get_route(struct net *net, struct sk_buff *skb, |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index ba5756d20165..99b3f53f16a7 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -196,12 +196,15 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
196 | 196 | ||
197 | pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); | 197 | pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); |
198 | 198 | ||
199 | /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ | 199 | nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, |
200 | nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, | 200 | sizeof(*pm)+copy_len, 0); |
201 | sizeof(*pm)+copy_len); | 201 | if (!nlh) { |
202 | pr_debug("error during nlmsg_put\n"); | ||
203 | goto out_unlock; | ||
204 | } | ||
202 | ub->qlen++; | 205 | ub->qlen++; |
203 | 206 | ||
204 | pm = NLMSG_DATA(nlh); | 207 | pm = nlmsg_data(nlh); |
205 | 208 | ||
206 | /* We might not have a timestamp, get one */ | 209 | /* We might not have a timestamp, get one */ |
207 | if (skb->tstamp.tv64 == 0) | 210 | if (skb->tstamp.tv64 == 0) |
@@ -261,13 +264,11 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
261 | nlh->nlmsg_type = NLMSG_DONE; | 264 | nlh->nlmsg_type = NLMSG_DONE; |
262 | ulog_send(groupnum); | 265 | ulog_send(groupnum); |
263 | } | 266 | } |
264 | 267 | out_unlock: | |
265 | spin_unlock_bh(&ulog_lock); | 268 | spin_unlock_bh(&ulog_lock); |
266 | 269 | ||
267 | return; | 270 | return; |
268 | 271 | ||
269 | nlmsg_failure: | ||
270 | pr_debug("error during NLMSG_PUT\n"); | ||
271 | alloc_failure: | 272 | alloc_failure: |
272 | pr_debug("Error building netlink message\n"); | 273 | pr_debug("Error building netlink message\n"); |
273 | spin_unlock_bh(&ulog_lock); | 274 | spin_unlock_bh(&ulog_lock); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 91747d4ebc26..e7ff2dcab6ce 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -95,11 +95,11 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | |||
95 | return NF_ACCEPT; | 95 | return NF_ACCEPT; |
96 | } | 96 | } |
97 | 97 | ||
98 | static unsigned int ipv4_confirm(unsigned int hooknum, | 98 | static unsigned int ipv4_helper(unsigned int hooknum, |
99 | struct sk_buff *skb, | 99 | struct sk_buff *skb, |
100 | const struct net_device *in, | 100 | const struct net_device *in, |
101 | const struct net_device *out, | 101 | const struct net_device *out, |
102 | int (*okfn)(struct sk_buff *)) | 102 | int (*okfn)(struct sk_buff *)) |
103 | { | 103 | { |
104 | struct nf_conn *ct; | 104 | struct nf_conn *ct; |
105 | enum ip_conntrack_info ctinfo; | 105 | enum ip_conntrack_info ctinfo; |
@@ -110,24 +110,38 @@ static unsigned int ipv4_confirm(unsigned int hooknum, | |||
110 | /* This is where we call the helper: as the packet goes out. */ | 110 | /* This is where we call the helper: as the packet goes out. */ |
111 | ct = nf_ct_get(skb, &ctinfo); | 111 | ct = nf_ct_get(skb, &ctinfo); |
112 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | 112 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) |
113 | goto out; | 113 | return NF_ACCEPT; |
114 | 114 | ||
115 | help = nfct_help(ct); | 115 | help = nfct_help(ct); |
116 | if (!help) | 116 | if (!help) |
117 | goto out; | 117 | return NF_ACCEPT; |
118 | 118 | ||
119 | /* rcu_read_lock()ed by nf_hook_slow */ | 119 | /* rcu_read_lock()ed by nf_hook_slow */ |
120 | helper = rcu_dereference(help->helper); | 120 | helper = rcu_dereference(help->helper); |
121 | if (!helper) | 121 | if (!helper) |
122 | goto out; | 122 | return NF_ACCEPT; |
123 | 123 | ||
124 | ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), | 124 | ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), |
125 | ct, ctinfo); | 125 | ct, ctinfo); |
126 | if (ret != NF_ACCEPT) { | 126 | if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { |
127 | nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, | 127 | nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, |
128 | "nf_ct_%s: dropping packet", helper->name); | 128 | "nf_ct_%s: dropping packet", helper->name); |
129 | return ret; | ||
130 | } | 129 | } |
130 | return ret; | ||
131 | } | ||
132 | |||
133 | static unsigned int ipv4_confirm(unsigned int hooknum, | ||
134 | struct sk_buff *skb, | ||
135 | const struct net_device *in, | ||
136 | const struct net_device *out, | ||
137 | int (*okfn)(struct sk_buff *)) | ||
138 | { | ||
139 | struct nf_conn *ct; | ||
140 | enum ip_conntrack_info ctinfo; | ||
141 | |||
142 | ct = nf_ct_get(skb, &ctinfo); | ||
143 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | ||
144 | goto out; | ||
131 | 145 | ||
132 | /* adjust seqs for loopback traffic only in outgoing direction */ | 146 | /* adjust seqs for loopback traffic only in outgoing direction */ |
133 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && | 147 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && |
@@ -185,6 +199,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { | |||
185 | .priority = NF_IP_PRI_CONNTRACK, | 199 | .priority = NF_IP_PRI_CONNTRACK, |
186 | }, | 200 | }, |
187 | { | 201 | { |
202 | .hook = ipv4_helper, | ||
203 | .owner = THIS_MODULE, | ||
204 | .pf = NFPROTO_IPV4, | ||
205 | .hooknum = NF_INET_POST_ROUTING, | ||
206 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
207 | }, | ||
208 | { | ||
188 | .hook = ipv4_confirm, | 209 | .hook = ipv4_confirm, |
189 | .owner = THIS_MODULE, | 210 | .owner = THIS_MODULE, |
190 | .pf = NFPROTO_IPV4, | 211 | .pf = NFPROTO_IPV4, |
@@ -192,6 +213,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { | |||
192 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | 213 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
193 | }, | 214 | }, |
194 | { | 215 | { |
216 | .hook = ipv4_helper, | ||
217 | .owner = THIS_MODULE, | ||
218 | .pf = NFPROTO_IPV4, | ||
219 | .hooknum = NF_INET_LOCAL_IN, | ||
220 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
221 | }, | ||
222 | { | ||
195 | .hook = ipv4_confirm, | 223 | .hook = ipv4_confirm, |
196 | .owner = THIS_MODULE, | 224 | .owner = THIS_MODULE, |
197 | .pf = NFPROTO_IPV4, | 225 | .pf = NFPROTO_IPV4, |
@@ -207,35 +235,30 @@ static int log_invalid_proto_max = 255; | |||
207 | static ctl_table ip_ct_sysctl_table[] = { | 235 | static ctl_table ip_ct_sysctl_table[] = { |
208 | { | 236 | { |
209 | .procname = "ip_conntrack_max", | 237 | .procname = "ip_conntrack_max", |
210 | .data = &nf_conntrack_max, | ||
211 | .maxlen = sizeof(int), | 238 | .maxlen = sizeof(int), |
212 | .mode = 0644, | 239 | .mode = 0644, |
213 | .proc_handler = proc_dointvec, | 240 | .proc_handler = proc_dointvec, |
214 | }, | 241 | }, |
215 | { | 242 | { |
216 | .procname = "ip_conntrack_count", | 243 | .procname = "ip_conntrack_count", |
217 | .data = &init_net.ct.count, | ||
218 | .maxlen = sizeof(int), | 244 | .maxlen = sizeof(int), |
219 | .mode = 0444, | 245 | .mode = 0444, |
220 | .proc_handler = proc_dointvec, | 246 | .proc_handler = proc_dointvec, |
221 | }, | 247 | }, |
222 | { | 248 | { |
223 | .procname = "ip_conntrack_buckets", | 249 | .procname = "ip_conntrack_buckets", |
224 | .data = &init_net.ct.htable_size, | ||
225 | .maxlen = sizeof(unsigned int), | 250 | .maxlen = sizeof(unsigned int), |
226 | .mode = 0444, | 251 | .mode = 0444, |
227 | .proc_handler = proc_dointvec, | 252 | .proc_handler = proc_dointvec, |
228 | }, | 253 | }, |
229 | { | 254 | { |
230 | .procname = "ip_conntrack_checksum", | 255 | .procname = "ip_conntrack_checksum", |
231 | .data = &init_net.ct.sysctl_checksum, | ||
232 | .maxlen = sizeof(int), | 256 | .maxlen = sizeof(int), |
233 | .mode = 0644, | 257 | .mode = 0644, |
234 | .proc_handler = proc_dointvec, | 258 | .proc_handler = proc_dointvec, |
235 | }, | 259 | }, |
236 | { | 260 | { |
237 | .procname = "ip_conntrack_log_invalid", | 261 | .procname = "ip_conntrack_log_invalid", |
238 | .data = &init_net.ct.sysctl_log_invalid, | ||
239 | .maxlen = sizeof(unsigned int), | 262 | .maxlen = sizeof(unsigned int), |
240 | .mode = 0644, | 263 | .mode = 0644, |
241 | .proc_handler = proc_dointvec_minmax, | 264 | .proc_handler = proc_dointvec_minmax, |
@@ -351,6 +374,25 @@ static struct nf_sockopt_ops so_getorigdst = { | |||
351 | .owner = THIS_MODULE, | 374 | .owner = THIS_MODULE, |
352 | }; | 375 | }; |
353 | 376 | ||
377 | static int ipv4_init_net(struct net *net) | ||
378 | { | ||
379 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | ||
380 | struct nf_ip_net *in = &net->ct.nf_ct_proto; | ||
381 | in->ctl_table = kmemdup(ip_ct_sysctl_table, | ||
382 | sizeof(ip_ct_sysctl_table), | ||
383 | GFP_KERNEL); | ||
384 | if (!in->ctl_table) | ||
385 | return -ENOMEM; | ||
386 | |||
387 | in->ctl_table[0].data = &nf_conntrack_max; | ||
388 | in->ctl_table[1].data = &net->ct.count; | ||
389 | in->ctl_table[2].data = &net->ct.htable_size; | ||
390 | in->ctl_table[3].data = &net->ct.sysctl_checksum; | ||
391 | in->ctl_table[4].data = &net->ct.sysctl_log_invalid; | ||
392 | #endif | ||
393 | return 0; | ||
394 | } | ||
395 | |||
354 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | 396 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { |
355 | .l3proto = PF_INET, | 397 | .l3proto = PF_INET, |
356 | .name = "ipv4", | 398 | .name = "ipv4", |
@@ -366,8 +408,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | |||
366 | #endif | 408 | #endif |
367 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | 409 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) |
368 | .ctl_table_path = "net/ipv4/netfilter", | 410 | .ctl_table_path = "net/ipv4/netfilter", |
369 | .ctl_table = ip_ct_sysctl_table, | ||
370 | #endif | 411 | #endif |
412 | .init_net = ipv4_init_net, | ||
371 | .me = THIS_MODULE, | 413 | .me = THIS_MODULE, |
372 | }; | 414 | }; |
373 | 415 | ||
@@ -378,6 +420,65 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); | |||
378 | MODULE_ALIAS("ip_conntrack"); | 420 | MODULE_ALIAS("ip_conntrack"); |
379 | MODULE_LICENSE("GPL"); | 421 | MODULE_LICENSE("GPL"); |
380 | 422 | ||
423 | static int ipv4_net_init(struct net *net) | ||
424 | { | ||
425 | int ret = 0; | ||
426 | |||
427 | ret = nf_conntrack_l4proto_register(net, | ||
428 | &nf_conntrack_l4proto_tcp4); | ||
429 | if (ret < 0) { | ||
430 | pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n"); | ||
431 | goto out_tcp; | ||
432 | } | ||
433 | ret = nf_conntrack_l4proto_register(net, | ||
434 | &nf_conntrack_l4proto_udp4); | ||
435 | if (ret < 0) { | ||
436 | pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n"); | ||
437 | goto out_udp; | ||
438 | } | ||
439 | ret = nf_conntrack_l4proto_register(net, | ||
440 | &nf_conntrack_l4proto_icmp); | ||
441 | if (ret < 0) { | ||
442 | pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n"); | ||
443 | goto out_icmp; | ||
444 | } | ||
445 | ret = nf_conntrack_l3proto_register(net, | ||
446 | &nf_conntrack_l3proto_ipv4); | ||
447 | if (ret < 0) { | ||
448 | pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n"); | ||
449 | goto out_ipv4; | ||
450 | } | ||
451 | return 0; | ||
452 | out_ipv4: | ||
453 | nf_conntrack_l4proto_unregister(net, | ||
454 | &nf_conntrack_l4proto_icmp); | ||
455 | out_icmp: | ||
456 | nf_conntrack_l4proto_unregister(net, | ||
457 | &nf_conntrack_l4proto_udp4); | ||
458 | out_udp: | ||
459 | nf_conntrack_l4proto_unregister(net, | ||
460 | &nf_conntrack_l4proto_tcp4); | ||
461 | out_tcp: | ||
462 | return ret; | ||
463 | } | ||
464 | |||
465 | static void ipv4_net_exit(struct net *net) | ||
466 | { | ||
467 | nf_conntrack_l3proto_unregister(net, | ||
468 | &nf_conntrack_l3proto_ipv4); | ||
469 | nf_conntrack_l4proto_unregister(net, | ||
470 | &nf_conntrack_l4proto_icmp); | ||
471 | nf_conntrack_l4proto_unregister(net, | ||
472 | &nf_conntrack_l4proto_udp4); | ||
473 | nf_conntrack_l4proto_unregister(net, | ||
474 | &nf_conntrack_l4proto_tcp4); | ||
475 | } | ||
476 | |||
477 | static struct pernet_operations ipv4_net_ops = { | ||
478 | .init = ipv4_net_init, | ||
479 | .exit = ipv4_net_exit, | ||
480 | }; | ||
481 | |||
381 | static int __init nf_conntrack_l3proto_ipv4_init(void) | 482 | static int __init nf_conntrack_l3proto_ipv4_init(void) |
382 | { | 483 | { |
383 | int ret = 0; | 484 | int ret = 0; |
@@ -391,35 +492,17 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) | |||
391 | return ret; | 492 | return ret; |
392 | } | 493 | } |
393 | 494 | ||
394 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); | 495 | ret = register_pernet_subsys(&ipv4_net_ops); |
395 | if (ret < 0) { | 496 | if (ret < 0) { |
396 | pr_err("nf_conntrack_ipv4: can't register tcp.\n"); | 497 | pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); |
397 | goto cleanup_sockopt; | 498 | goto cleanup_sockopt; |
398 | } | 499 | } |
399 | 500 | ||
400 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); | ||
401 | if (ret < 0) { | ||
402 | pr_err("nf_conntrack_ipv4: can't register udp.\n"); | ||
403 | goto cleanup_tcp; | ||
404 | } | ||
405 | |||
406 | ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); | ||
407 | if (ret < 0) { | ||
408 | pr_err("nf_conntrack_ipv4: can't register icmp.\n"); | ||
409 | goto cleanup_udp; | ||
410 | } | ||
411 | |||
412 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); | ||
413 | if (ret < 0) { | ||
414 | pr_err("nf_conntrack_ipv4: can't register ipv4\n"); | ||
415 | goto cleanup_icmp; | ||
416 | } | ||
417 | |||
418 | ret = nf_register_hooks(ipv4_conntrack_ops, | 501 | ret = nf_register_hooks(ipv4_conntrack_ops, |
419 | ARRAY_SIZE(ipv4_conntrack_ops)); | 502 | ARRAY_SIZE(ipv4_conntrack_ops)); |
420 | if (ret < 0) { | 503 | if (ret < 0) { |
421 | pr_err("nf_conntrack_ipv4: can't register hooks.\n"); | 504 | pr_err("nf_conntrack_ipv4: can't register hooks.\n"); |
422 | goto cleanup_ipv4; | 505 | goto cleanup_pernet; |
423 | } | 506 | } |
424 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | 507 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) |
425 | ret = nf_conntrack_ipv4_compat_init(); | 508 | ret = nf_conntrack_ipv4_compat_init(); |
@@ -431,14 +514,8 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) | |||
431 | cleanup_hooks: | 514 | cleanup_hooks: |
432 | nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); | 515 | nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); |
433 | #endif | 516 | #endif |
434 | cleanup_ipv4: | 517 | cleanup_pernet: |
435 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); | 518 | unregister_pernet_subsys(&ipv4_net_ops); |
436 | cleanup_icmp: | ||
437 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); | ||
438 | cleanup_udp: | ||
439 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); | ||
440 | cleanup_tcp: | ||
441 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); | ||
442 | cleanup_sockopt: | 519 | cleanup_sockopt: |
443 | nf_unregister_sockopt(&so_getorigdst); | 520 | nf_unregister_sockopt(&so_getorigdst); |
444 | return ret; | 521 | return ret; |
@@ -451,10 +528,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) | |||
451 | nf_conntrack_ipv4_compat_fini(); | 528 | nf_conntrack_ipv4_compat_fini(); |
452 | #endif | 529 | #endif |
453 | nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); | 530 | nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); |
454 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); | 531 | unregister_pernet_subsys(&ipv4_net_ops); |
455 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); | ||
456 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); | ||
457 | nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); | ||
458 | nf_unregister_sockopt(&so_getorigdst); | 532 | nf_unregister_sockopt(&so_getorigdst); |
459 | } | 533 | } |
460 | 534 | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0847e373d33c..041923cb67ad 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -23,6 +23,11 @@ | |||
23 | 23 | ||
24 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; | 24 | static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; |
25 | 25 | ||
26 | static inline struct nf_icmp_net *icmp_pernet(struct net *net) | ||
27 | { | ||
28 | return &net->ct.nf_ct_proto.icmp; | ||
29 | } | ||
30 | |||
26 | static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, | 31 | static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, |
27 | struct nf_conntrack_tuple *tuple) | 32 | struct nf_conntrack_tuple *tuple) |
28 | { | 33 | { |
@@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s, | |||
77 | 82 | ||
78 | static unsigned int *icmp_get_timeouts(struct net *net) | 83 | static unsigned int *icmp_get_timeouts(struct net *net) |
79 | { | 84 | { |
80 | return &nf_ct_icmp_timeout; | 85 | return &icmp_pernet(net)->timeout; |
81 | } | 86 | } |
82 | 87 | ||
83 | /* Returns verdict for packet, or -1 for invalid. */ | 88 | /* Returns verdict for packet, or -1 for invalid. */ |
@@ -274,16 +279,18 @@ static int icmp_nlattr_tuple_size(void) | |||
274 | #include <linux/netfilter/nfnetlink.h> | 279 | #include <linux/netfilter/nfnetlink.h> |
275 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 280 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
276 | 281 | ||
277 | static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) | 282 | static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], |
283 | struct net *net, void *data) | ||
278 | { | 284 | { |
279 | unsigned int *timeout = data; | 285 | unsigned int *timeout = data; |
286 | struct nf_icmp_net *in = icmp_pernet(net); | ||
280 | 287 | ||
281 | if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { | 288 | if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { |
282 | *timeout = | 289 | *timeout = |
283 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; | 290 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; |
284 | } else { | 291 | } else { |
285 | /* Set default ICMP timeout. */ | 292 | /* Set default ICMP timeout. */ |
286 | *timeout = nf_ct_icmp_timeout; | 293 | *timeout = in->timeout; |
287 | } | 294 | } |
288 | return 0; | 295 | return 0; |
289 | } | 296 | } |
@@ -308,11 +315,9 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { | |||
308 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 315 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
309 | 316 | ||
310 | #ifdef CONFIG_SYSCTL | 317 | #ifdef CONFIG_SYSCTL |
311 | static struct ctl_table_header *icmp_sysctl_header; | ||
312 | static struct ctl_table icmp_sysctl_table[] = { | 318 | static struct ctl_table icmp_sysctl_table[] = { |
313 | { | 319 | { |
314 | .procname = "nf_conntrack_icmp_timeout", | 320 | .procname = "nf_conntrack_icmp_timeout", |
315 | .data = &nf_ct_icmp_timeout, | ||
316 | .maxlen = sizeof(unsigned int), | 321 | .maxlen = sizeof(unsigned int), |
317 | .mode = 0644, | 322 | .mode = 0644, |
318 | .proc_handler = proc_dointvec_jiffies, | 323 | .proc_handler = proc_dointvec_jiffies, |
@@ -323,7 +328,6 @@ static struct ctl_table icmp_sysctl_table[] = { | |||
323 | static struct ctl_table icmp_compat_sysctl_table[] = { | 328 | static struct ctl_table icmp_compat_sysctl_table[] = { |
324 | { | 329 | { |
325 | .procname = "ip_conntrack_icmp_timeout", | 330 | .procname = "ip_conntrack_icmp_timeout", |
326 | .data = &nf_ct_icmp_timeout, | ||
327 | .maxlen = sizeof(unsigned int), | 331 | .maxlen = sizeof(unsigned int), |
328 | .mode = 0644, | 332 | .mode = 0644, |
329 | .proc_handler = proc_dointvec_jiffies, | 333 | .proc_handler = proc_dointvec_jiffies, |
@@ -333,6 +337,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = { | |||
333 | #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ | 337 | #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ |
334 | #endif /* CONFIG_SYSCTL */ | 338 | #endif /* CONFIG_SYSCTL */ |
335 | 339 | ||
340 | static int icmp_init_net(struct net *net) | ||
341 | { | ||
342 | struct nf_icmp_net *in = icmp_pernet(net); | ||
343 | struct nf_proto_net *pn = (struct nf_proto_net *)in; | ||
344 | in->timeout = nf_ct_icmp_timeout; | ||
345 | |||
346 | #ifdef CONFIG_SYSCTL | ||
347 | pn->ctl_table = kmemdup(icmp_sysctl_table, | ||
348 | sizeof(icmp_sysctl_table), | ||
349 | GFP_KERNEL); | ||
350 | if (!pn->ctl_table) | ||
351 | return -ENOMEM; | ||
352 | pn->ctl_table[0].data = &in->timeout; | ||
353 | #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT | ||
354 | pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, | ||
355 | sizeof(icmp_compat_sysctl_table), | ||
356 | GFP_KERNEL); | ||
357 | if (!pn->ctl_compat_table) { | ||
358 | kfree(pn->ctl_table); | ||
359 | pn->ctl_table = NULL; | ||
360 | return -ENOMEM; | ||
361 | } | ||
362 | pn->ctl_compat_table[0].data = &in->timeout; | ||
363 | #endif | ||
364 | #endif | ||
365 | return 0; | ||
366 | } | ||
367 | |||
336 | struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = | 368 | struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = |
337 | { | 369 | { |
338 | .l3proto = PF_INET, | 370 | .l3proto = PF_INET, |
@@ -362,11 +394,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = | |||
362 | .nla_policy = icmp_timeout_nla_policy, | 394 | .nla_policy = icmp_timeout_nla_policy, |
363 | }, | 395 | }, |
364 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 396 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
365 | #ifdef CONFIG_SYSCTL | 397 | .init_net = icmp_init_net, |
366 | .ctl_table_header = &icmp_sysctl_header, | ||
367 | .ctl_table = icmp_sysctl_table, | ||
368 | #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT | ||
369 | .ctl_compat_table = icmp_compat_sysctl_table, | ||
370 | #endif | ||
371 | #endif | ||
372 | }; | 398 | }; |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 9bb1b8a37a22..742815518b0f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -94,14 +94,14 @@ static struct nf_hook_ops ipv4_defrag_ops[] = { | |||
94 | { | 94 | { |
95 | .hook = ipv4_conntrack_defrag, | 95 | .hook = ipv4_conntrack_defrag, |
96 | .owner = THIS_MODULE, | 96 | .owner = THIS_MODULE, |
97 | .pf = PF_INET, | 97 | .pf = NFPROTO_IPV4, |
98 | .hooknum = NF_INET_PRE_ROUTING, | 98 | .hooknum = NF_INET_PRE_ROUTING, |
99 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | 99 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, |
100 | }, | 100 | }, |
101 | { | 101 | { |
102 | .hook = ipv4_conntrack_defrag, | 102 | .hook = ipv4_conntrack_defrag, |
103 | .owner = THIS_MODULE, | 103 | .owner = THIS_MODULE, |
104 | .pf = PF_INET, | 104 | .pf = NFPROTO_IPV4, |
105 | .hooknum = NF_INET_LOCAL_OUT, | 105 | .hooknum = NF_INET_LOCAL_OUT, |
106 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | 106 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, |
107 | }, | 107 | }, |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 7b22382ff0e9..3c04d24e2976 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
@@ -13,10 +13,10 @@ | |||
13 | #include <linux/skbuff.h> | 13 | #include <linux/skbuff.h> |
14 | #include <linux/udp.h> | 14 | #include <linux/udp.h> |
15 | 15 | ||
16 | #include <net/netfilter/nf_nat_helper.h> | ||
17 | #include <net/netfilter/nf_nat_rule.h> | ||
18 | #include <net/netfilter/nf_conntrack_helper.h> | 16 | #include <net/netfilter/nf_conntrack_helper.h> |
19 | #include <net/netfilter/nf_conntrack_expect.h> | 17 | #include <net/netfilter/nf_conntrack_expect.h> |
18 | #include <net/netfilter/nf_nat_helper.h> | ||
19 | #include <net/netfilter/nf_nat_rule.h> | ||
20 | #include <linux/netfilter/nf_conntrack_amanda.h> | 20 | #include <linux/netfilter/nf_conntrack_amanda.h> |
21 | 21 | ||
22 | MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); | 22 | MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index abb52adf5acd..44b082fd48ab 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -691,6 +691,10 @@ static struct nf_ct_helper_expectfn follow_master_nat = { | |||
691 | .expectfn = nf_nat_follow_master, | 691 | .expectfn = nf_nat_follow_master, |
692 | }; | 692 | }; |
693 | 693 | ||
694 | static struct nfq_ct_nat_hook nfq_ct_nat = { | ||
695 | .seq_adjust = nf_nat_tcp_seq_adjust, | ||
696 | }; | ||
697 | |||
694 | static int __init nf_nat_init(void) | 698 | static int __init nf_nat_init(void) |
695 | { | 699 | { |
696 | size_t i; | 700 | size_t i; |
@@ -731,6 +735,7 @@ static int __init nf_nat_init(void) | |||
731 | nfnetlink_parse_nat_setup); | 735 | nfnetlink_parse_nat_setup); |
732 | BUG_ON(nf_ct_nat_offset != NULL); | 736 | BUG_ON(nf_ct_nat_offset != NULL); |
733 | RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); | 737 | RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); |
738 | RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); | ||
734 | return 0; | 739 | return 0; |
735 | 740 | ||
736 | cleanup_extend: | 741 | cleanup_extend: |
@@ -747,6 +752,7 @@ static void __exit nf_nat_cleanup(void) | |||
747 | RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); | 752 | RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); |
748 | RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); | 753 | RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); |
749 | RCU_INIT_POINTER(nf_ct_nat_offset, NULL); | 754 | RCU_INIT_POINTER(nf_ct_nat_offset, NULL); |
755 | RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); | ||
750 | synchronize_net(); | 756 | synchronize_net(); |
751 | } | 757 | } |
752 | 758 | ||
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index cad29c121318..c6784a18c1c4 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -95,7 +95,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
95 | unsigned char **data, | 95 | unsigned char **data, |
96 | TransportAddress *taddr, int count) | 96 | TransportAddress *taddr, int count) |
97 | { | 97 | { |
98 | const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; | 98 | const struct nf_ct_h323_master *info = nfct_help_data(ct); |
99 | int dir = CTINFO2DIR(ctinfo); | 99 | int dir = CTINFO2DIR(ctinfo); |
100 | int i; | 100 | int i; |
101 | __be16 port; | 101 | __be16 port; |
@@ -178,7 +178,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
178 | struct nf_conntrack_expect *rtp_exp, | 178 | struct nf_conntrack_expect *rtp_exp, |
179 | struct nf_conntrack_expect *rtcp_exp) | 179 | struct nf_conntrack_expect *rtcp_exp) |
180 | { | 180 | { |
181 | struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; | 181 | struct nf_ct_h323_master *info = nfct_help_data(ct); |
182 | int dir = CTINFO2DIR(ctinfo); | 182 | int dir = CTINFO2DIR(ctinfo); |
183 | int i; | 183 | int i; |
184 | u_int16_t nated_port; | 184 | u_int16_t nated_port; |
@@ -330,7 +330,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
330 | TransportAddress *taddr, __be16 port, | 330 | TransportAddress *taddr, __be16 port, |
331 | struct nf_conntrack_expect *exp) | 331 | struct nf_conntrack_expect *exp) |
332 | { | 332 | { |
333 | struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; | 333 | struct nf_ct_h323_master *info = nfct_help_data(ct); |
334 | int dir = CTINFO2DIR(ctinfo); | 334 | int dir = CTINFO2DIR(ctinfo); |
335 | u_int16_t nated_port = ntohs(port); | 335 | u_int16_t nated_port = ntohs(port); |
336 | 336 | ||
@@ -419,7 +419,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
419 | unsigned char **data, TransportAddress *taddr, int idx, | 419 | unsigned char **data, TransportAddress *taddr, int idx, |
420 | __be16 port, struct nf_conntrack_expect *exp) | 420 | __be16 port, struct nf_conntrack_expect *exp) |
421 | { | 421 | { |
422 | struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; | 422 | struct nf_ct_h323_master *info = nfct_help_data(ct); |
423 | int dir = CTINFO2DIR(ctinfo); | 423 | int dir = CTINFO2DIR(ctinfo); |
424 | u_int16_t nated_port = ntohs(port); | 424 | u_int16_t nated_port = ntohs(port); |
425 | union nf_inet_addr addr; | 425 | union nf_inet_addr addr; |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index af65958f6308..2e59ad0b90ca 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -153,6 +153,19 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | |||
153 | } | 153 | } |
154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); | 154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); |
155 | 155 | ||
156 | void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, | ||
157 | u32 ctinfo, int off) | ||
158 | { | ||
159 | const struct tcphdr *th; | ||
160 | |||
161 | if (nf_ct_protonum(ct) != IPPROTO_TCP) | ||
162 | return; | ||
163 | |||
164 | th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); | ||
165 | nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); | ||
168 | |||
156 | static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, | 169 | static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, |
157 | int datalen, __sum16 *check, int oldlen) | 170 | int datalen, __sum16 *check, int oldlen) |
158 | { | 171 | { |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index c273d58980ae..388140881ebe 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -49,7 +49,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
49 | const struct nf_nat_pptp *nat_pptp_info; | 49 | const struct nf_nat_pptp *nat_pptp_info; |
50 | struct nf_nat_ipv4_range range; | 50 | struct nf_nat_ipv4_range range; |
51 | 51 | ||
52 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; | 52 | ct_pptp_info = nfct_help_data(master); |
53 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; | 53 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; |
54 | 54 | ||
55 | /* And here goes the grand finale of corrosion... */ | 55 | /* And here goes the grand finale of corrosion... */ |
@@ -123,7 +123,7 @@ pptp_outbound_pkt(struct sk_buff *skb, | |||
123 | __be16 new_callid; | 123 | __be16 new_callid; |
124 | unsigned int cid_off; | 124 | unsigned int cid_off; |
125 | 125 | ||
126 | ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; | 126 | ct_pptp_info = nfct_help_data(ct); |
127 | nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; | 127 | nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; |
128 | 128 | ||
129 | new_callid = ct_pptp_info->pns_call_id; | 129 | new_callid = ct_pptp_info->pns_call_id; |
@@ -192,7 +192,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, | |||
192 | struct nf_ct_pptp_master *ct_pptp_info; | 192 | struct nf_ct_pptp_master *ct_pptp_info; |
193 | struct nf_nat_pptp *nat_pptp_info; | 193 | struct nf_nat_pptp *nat_pptp_info; |
194 | 194 | ||
195 | ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; | 195 | ct_pptp_info = nfct_help_data(ct); |
196 | nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; | 196 | nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; |
197 | 197 | ||
198 | /* save original PAC call ID in nat_info */ | 198 | /* save original PAC call ID in nat_info */ |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 746edec8b86e..bac712293fd6 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -405,7 +405,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, | |||
405 | 405 | ||
406 | ptr = *octets; | 406 | ptr = *octets; |
407 | while (ctx->pointer < eoc) { | 407 | while (ctx->pointer < eoc) { |
408 | if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { | 408 | if (!asn1_octet_decode(ctx, ptr++)) { |
409 | kfree(*octets); | 409 | kfree(*octets); |
410 | *octets = NULL; | 410 | *octets = NULL; |
411 | return 0; | 411 | return 0; |
@@ -759,7 +759,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
759 | } | 759 | } |
760 | break; | 760 | break; |
761 | case SNMP_OBJECTID: | 761 | case SNMP_OBJECTID: |
762 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { | 762 | if (!asn1_oid_decode(ctx, end, &lp, &len)) { |
763 | kfree(id); | 763 | kfree(id); |
764 | return 0; | 764 | return 0; |
765 | } | 765 | } |
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index a2901bf829c0..9dbb8d284f99 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c | |||
@@ -8,10 +8,10 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/udp.h> | 9 | #include <linux/udp.h> |
10 | 10 | ||
11 | #include <net/netfilter/nf_nat_helper.h> | ||
12 | #include <net/netfilter/nf_nat_rule.h> | ||
13 | #include <net/netfilter/nf_conntrack_helper.h> | 11 | #include <net/netfilter/nf_conntrack_helper.h> |
14 | #include <net/netfilter/nf_conntrack_expect.h> | 12 | #include <net/netfilter/nf_conntrack_expect.h> |
13 | #include <net/netfilter/nf_nat_helper.h> | ||
14 | #include <net/netfilter/nf_nat_rule.h> | ||
15 | #include <linux/netfilter/nf_conntrack_tftp.h> | 15 | #include <linux/netfilter/nf_conntrack_tftp.h> |
16 | 16 | ||
17 | MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); | 17 | MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); |
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c00e8bf684d..340fcf29a966 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
@@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info) | |||
371 | break; | 371 | break; |
372 | case ICMP_DEST_UNREACH: | 372 | case ICMP_DEST_UNREACH: |
373 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | 373 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ |
374 | ipv4_sk_update_pmtu(skb, sk, info); | ||
374 | if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { | 375 | if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { |
375 | err = EMSGSIZE; | 376 | err = EMSGSIZE; |
376 | harderr = 1; | 377 | harderr = 1; |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 9ae5c01cd0b2..8918eff1426d 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
@@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; | |||
36 | 36 | ||
37 | int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) | 37 | int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) |
38 | { | 38 | { |
39 | int hash = protocol & (MAX_INET_PROTOS - 1); | 39 | return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], |
40 | |||
41 | return !cmpxchg((const struct net_protocol **)&inet_protos[hash], | ||
42 | NULL, prot) ? 0 : -1; | 40 | NULL, prot) ? 0 : -1; |
43 | } | 41 | } |
44 | EXPORT_SYMBOL(inet_add_protocol); | 42 | EXPORT_SYMBOL(inet_add_protocol); |
@@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol); | |||
49 | 47 | ||
50 | int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) | 48 | int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) |
51 | { | 49 | { |
52 | int ret, hash = protocol & (MAX_INET_PROTOS - 1); | 50 | int ret; |
53 | 51 | ||
54 | ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], | 52 | ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], |
55 | prot, NULL) == prot) ? 0 : -1; | 53 | prot, NULL) == prot) ? 0 : -1; |
56 | 54 | ||
57 | synchronize_net(); | 55 | synchronize_net(); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4032b818f3e4..659ddfb10947 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) | |||
216 | int err = 0; | 216 | int err = 0; |
217 | int harderr = 0; | 217 | int harderr = 0; |
218 | 218 | ||
219 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) | ||
220 | ipv4_sk_update_pmtu(skb, sk, info); | ||
221 | |||
219 | /* Report error on raw socket, if: | 222 | /* Report error on raw socket, if: |
220 | 1. User requested ip_recverr. | 223 | 1. User requested ip_recverr. |
221 | 2. Socket is connected (otherwise the error indication | 224 | 2. Socket is connected (otherwise the error indication |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98b30d08efe9..83d56a016625 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | |||
162 | struct inet_peer *peer; | 162 | struct inet_peer *peer; |
163 | u32 *p = NULL; | 163 | u32 *p = NULL; |
164 | 164 | ||
165 | if (!rt->peer) | 165 | peer = rt_get_peer_create(rt, rt->rt_dst); |
166 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
167 | |||
168 | peer = rt->peer; | ||
169 | if (peer) { | 166 | if (peer) { |
170 | u32 *old_p = __DST_METRICS_PTR(old); | 167 | u32 *old_p = __DST_METRICS_PTR(old); |
171 | unsigned long prev, new; | 168 | unsigned long prev, new; |
@@ -443,7 +440,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
443 | r->rt_key_tos, | 440 | r->rt_key_tos, |
444 | -1, | 441 | -1, |
445 | HHUptod, | 442 | HHUptod, |
446 | r->rt_spec_dst, &len); | 443 | 0, &len); |
447 | 444 | ||
448 | seq_printf(seq, "%*s\n", 127 - len, ""); | 445 | seq_printf(seq, "%*s\n", 127 - len, ""); |
449 | } | 446 | } |
@@ -680,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
680 | static inline int rt_valuable(struct rtable *rth) | 677 | static inline int rt_valuable(struct rtable *rth) |
681 | { | 678 | { |
682 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 679 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
683 | (rth->peer && rth->peer->pmtu_expires); | 680 | (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); |
684 | } | 681 | } |
685 | 682 | ||
686 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 683 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -873,34 +870,22 @@ static void rt_check_expire(void) | |||
873 | while ((rth = rcu_dereference_protected(*rthp, | 870 | while ((rth = rcu_dereference_protected(*rthp, |
874 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | 871 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { |
875 | prefetch(rth->dst.rt_next); | 872 | prefetch(rth->dst.rt_next); |
876 | if (rt_is_expired(rth)) { | 873 | if (rt_is_expired(rth) || |
874 | rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | ||
877 | *rthp = rth->dst.rt_next; | 875 | *rthp = rth->dst.rt_next; |
878 | rt_free(rth); | 876 | rt_free(rth); |
879 | continue; | 877 | continue; |
880 | } | 878 | } |
881 | if (rth->dst.expires) { | ||
882 | /* Entry is expired even if it is in use */ | ||
883 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
884 | nofree: | ||
885 | tmo >>= 1; | ||
886 | rthp = &rth->dst.rt_next; | ||
887 | /* | ||
888 | * We only count entries on | ||
889 | * a chain with equal hash inputs once | ||
890 | * so that entries for different QOS | ||
891 | * levels, and other non-hash input | ||
892 | * attributes don't unfairly skew | ||
893 | * the length computation | ||
894 | */ | ||
895 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
896 | continue; | ||
897 | } | ||
898 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
899 | goto nofree; | ||
900 | 879 | ||
901 | /* Cleanup aged off entries. */ | 880 | /* We only count entries on a chain with equal |
902 | *rthp = rth->dst.rt_next; | 881 | * hash inputs once so that entries for |
903 | rt_free(rth); | 882 | * different QOS levels, and other non-hash |
883 | * input attributes don't unfairly skew the | ||
884 | * length computation | ||
885 | */ | ||
886 | tmo >>= 1; | ||
887 | rthp = &rth->dst.rt_next; | ||
888 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
904 | } | 889 | } |
905 | spin_unlock_bh(rt_hash_lock_addr(i)); | 890 | spin_unlock_bh(rt_hash_lock_addr(i)); |
906 | sum += length; | 891 | sum += length; |
@@ -938,7 +923,7 @@ static void rt_cache_invalidate(struct net *net) | |||
938 | 923 | ||
939 | get_random_bytes(&shuffle, sizeof(shuffle)); | 924 | get_random_bytes(&shuffle, sizeof(shuffle)); |
940 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); | 925 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
941 | inetpeer_invalidate_tree(AF_INET); | 926 | inetpeer_invalidate_family(AF_INET); |
942 | } | 927 | } |
943 | 928 | ||
944 | /* | 929 | /* |
@@ -1159,7 +1144,7 @@ restart: | |||
1159 | candp = NULL; | 1144 | candp = NULL; |
1160 | now = jiffies; | 1145 | now = jiffies; |
1161 | 1146 | ||
1162 | if (!rt_caching(dev_net(rt->dst.dev))) { | 1147 | if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { |
1163 | /* | 1148 | /* |
1164 | * If we're not caching, just tell the caller we | 1149 | * If we're not caching, just tell the caller we |
1165 | * were successful and don't touch the route. The | 1150 | * were successful and don't touch the route. The |
@@ -1328,14 +1313,20 @@ static u32 rt_peer_genid(void) | |||
1328 | 1313 | ||
1329 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) | 1314 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) |
1330 | { | 1315 | { |
1316 | struct inet_peer_base *base; | ||
1331 | struct inet_peer *peer; | 1317 | struct inet_peer *peer; |
1332 | 1318 | ||
1333 | peer = inet_getpeer_v4(daddr, create); | 1319 | base = inetpeer_base_ptr(rt->_peer); |
1320 | if (!base) | ||
1321 | return; | ||
1334 | 1322 | ||
1335 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1323 | peer = inet_getpeer_v4(base, daddr, create); |
1336 | inet_putpeer(peer); | 1324 | if (peer) { |
1337 | else | 1325 | if (!rt_set_peer(rt, peer)) |
1338 | rt->rt_peer_genid = rt_peer_genid(); | 1326 | inet_putpeer(peer); |
1327 | else | ||
1328 | rt->rt_peer_genid = rt_peer_genid(); | ||
1329 | } | ||
1339 | } | 1330 | } |
1340 | 1331 | ||
1341 | /* | 1332 | /* |
@@ -1363,14 +1354,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1363 | struct rtable *rt = (struct rtable *) dst; | 1354 | struct rtable *rt = (struct rtable *) dst; |
1364 | 1355 | ||
1365 | if (rt && !(rt->dst.flags & DST_NOPEER)) { | 1356 | if (rt && !(rt->dst.flags & DST_NOPEER)) { |
1366 | if (rt->peer == NULL) | 1357 | struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); |
1367 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1368 | 1358 | ||
1369 | /* If peer is attached to destination, it is never detached, | 1359 | /* If peer is attached to destination, it is never detached, |
1370 | so that we need not to grab a lock to dereference it. | 1360 | so that we need not to grab a lock to dereference it. |
1371 | */ | 1361 | */ |
1372 | if (rt->peer) { | 1362 | if (peer) { |
1373 | iph->id = htons(inet_getid(rt->peer, more)); | 1363 | iph->id = htons(inet_getid(peer, more)); |
1374 | return; | 1364 | return; |
1375 | } | 1365 | } |
1376 | } else if (!rt) | 1366 | } else if (!rt) |
@@ -1480,10 +1470,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1480 | rt->rt_gateway != old_gw) | 1470 | rt->rt_gateway != old_gw) |
1481 | continue; | 1471 | continue; |
1482 | 1472 | ||
1483 | if (!rt->peer) | 1473 | peer = rt_get_peer_create(rt, rt->rt_dst); |
1484 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1485 | |||
1486 | peer = rt->peer; | ||
1487 | if (peer) { | 1474 | if (peer) { |
1488 | if (peer->redirect_learned.a4 != new_gw) { | 1475 | if (peer->redirect_learned.a4 != new_gw) { |
1489 | peer->redirect_learned.a4 = new_gw; | 1476 | peer->redirect_learned.a4 = new_gw; |
@@ -1539,8 +1526,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1539 | rt_genid(dev_net(dst->dev))); | 1526 | rt_genid(dev_net(dst->dev))); |
1540 | rt_del(hash, rt); | 1527 | rt_del(hash, rt); |
1541 | ret = NULL; | 1528 | ret = NULL; |
1542 | } else if (rt->peer && peer_pmtu_expired(rt->peer)) { | 1529 | } else if (rt_has_peer(rt)) { |
1543 | dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); | 1530 | struct inet_peer *peer = rt_peer_ptr(rt); |
1531 | if (peer_pmtu_expired(peer)) | ||
1532 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1544 | } | 1533 | } |
1545 | } | 1534 | } |
1546 | return ret; | 1535 | return ret; |
@@ -1578,9 +1567,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1578 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1567 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1579 | rcu_read_unlock(); | 1568 | rcu_read_unlock(); |
1580 | 1569 | ||
1581 | if (!rt->peer) | 1570 | peer = rt_get_peer_create(rt, rt->rt_dst); |
1582 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1583 | peer = rt->peer; | ||
1584 | if (!peer) { | 1571 | if (!peer) { |
1585 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1572 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1586 | return; | 1573 | return; |
@@ -1622,12 +1609,28 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1622 | 1609 | ||
1623 | static int ip_error(struct sk_buff *skb) | 1610 | static int ip_error(struct sk_buff *skb) |
1624 | { | 1611 | { |
1612 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); | ||
1625 | struct rtable *rt = skb_rtable(skb); | 1613 | struct rtable *rt = skb_rtable(skb); |
1626 | struct inet_peer *peer; | 1614 | struct inet_peer *peer; |
1627 | unsigned long now; | 1615 | unsigned long now; |
1616 | struct net *net; | ||
1628 | bool send; | 1617 | bool send; |
1629 | int code; | 1618 | int code; |
1630 | 1619 | ||
1620 | net = dev_net(rt->dst.dev); | ||
1621 | if (!IN_DEV_FORWARD(in_dev)) { | ||
1622 | switch (rt->dst.error) { | ||
1623 | case EHOSTUNREACH: | ||
1624 | IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS); | ||
1625 | break; | ||
1626 | |||
1627 | case ENETUNREACH: | ||
1628 | IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); | ||
1629 | break; | ||
1630 | } | ||
1631 | goto out; | ||
1632 | } | ||
1633 | |||
1631 | switch (rt->dst.error) { | 1634 | switch (rt->dst.error) { |
1632 | case EINVAL: | 1635 | case EINVAL: |
1633 | default: | 1636 | default: |
@@ -1637,17 +1640,14 @@ static int ip_error(struct sk_buff *skb) | |||
1637 | break; | 1640 | break; |
1638 | case ENETUNREACH: | 1641 | case ENETUNREACH: |
1639 | code = ICMP_NET_UNREACH; | 1642 | code = ICMP_NET_UNREACH; |
1640 | IP_INC_STATS_BH(dev_net(rt->dst.dev), | 1643 | IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); |
1641 | IPSTATS_MIB_INNOROUTES); | ||
1642 | break; | 1644 | break; |
1643 | case EACCES: | 1645 | case EACCES: |
1644 | code = ICMP_PKT_FILTERED; | 1646 | code = ICMP_PKT_FILTERED; |
1645 | break; | 1647 | break; |
1646 | } | 1648 | } |
1647 | 1649 | ||
1648 | if (!rt->peer) | 1650 | peer = rt_get_peer_create(rt, rt->rt_dst); |
1649 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1650 | peer = rt->peer; | ||
1651 | 1651 | ||
1652 | send = true; | 1652 | send = true; |
1653 | if (peer) { | 1653 | if (peer) { |
@@ -1668,67 +1668,6 @@ out: kfree_skb(skb); | |||
1668 | return 0; | 1668 | return 0; |
1669 | } | 1669 | } |
1670 | 1670 | ||
1671 | /* | ||
1672 | * The last two values are not from the RFC but | ||
1673 | * are needed for AMPRnet AX.25 paths. | ||
1674 | */ | ||
1675 | |||
1676 | static const unsigned short mtu_plateau[] = | ||
1677 | {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; | ||
1678 | |||
1679 | static inline unsigned short guess_mtu(unsigned short old_mtu) | ||
1680 | { | ||
1681 | int i; | ||
1682 | |||
1683 | for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) | ||
1684 | if (old_mtu > mtu_plateau[i]) | ||
1685 | return mtu_plateau[i]; | ||
1686 | return 68; | ||
1687 | } | ||
1688 | |||
1689 | unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, | ||
1690 | unsigned short new_mtu, | ||
1691 | struct net_device *dev) | ||
1692 | { | ||
1693 | unsigned short old_mtu = ntohs(iph->tot_len); | ||
1694 | unsigned short est_mtu = 0; | ||
1695 | struct inet_peer *peer; | ||
1696 | |||
1697 | peer = inet_getpeer_v4(iph->daddr, 1); | ||
1698 | if (peer) { | ||
1699 | unsigned short mtu = new_mtu; | ||
1700 | |||
1701 | if (new_mtu < 68 || new_mtu >= old_mtu) { | ||
1702 | /* BSD 4.2 derived systems incorrectly adjust | ||
1703 | * tot_len by the IP header length, and report | ||
1704 | * a zero MTU in the ICMP message. | ||
1705 | */ | ||
1706 | if (mtu == 0 && | ||
1707 | old_mtu >= 68 + (iph->ihl << 2)) | ||
1708 | old_mtu -= iph->ihl << 2; | ||
1709 | mtu = guess_mtu(old_mtu); | ||
1710 | } | ||
1711 | |||
1712 | if (mtu < ip_rt_min_pmtu) | ||
1713 | mtu = ip_rt_min_pmtu; | ||
1714 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { | ||
1715 | unsigned long pmtu_expires; | ||
1716 | |||
1717 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1718 | if (!pmtu_expires) | ||
1719 | pmtu_expires = 1UL; | ||
1720 | |||
1721 | est_mtu = mtu; | ||
1722 | peer->pmtu_learned = mtu; | ||
1723 | peer->pmtu_expires = pmtu_expires; | ||
1724 | atomic_inc(&__rt_peer_genid); | ||
1725 | } | ||
1726 | |||
1727 | inet_putpeer(peer); | ||
1728 | } | ||
1729 | return est_mtu ? : new_mtu; | ||
1730 | } | ||
1731 | |||
1732 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | 1671 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) |
1733 | { | 1672 | { |
1734 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); | 1673 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); |
@@ -1753,9 +1692,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1753 | 1692 | ||
1754 | dst_confirm(dst); | 1693 | dst_confirm(dst); |
1755 | 1694 | ||
1756 | if (!rt->peer) | 1695 | peer = rt_get_peer_create(rt, rt->rt_dst); |
1757 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1758 | peer = rt->peer; | ||
1759 | if (peer) { | 1696 | if (peer) { |
1760 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); | 1697 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); |
1761 | 1698 | ||
@@ -1777,16 +1714,40 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1777 | } | 1714 | } |
1778 | } | 1715 | } |
1779 | 1716 | ||
1717 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | ||
1718 | int oif, u32 mark, u8 protocol, int flow_flags) | ||
1719 | { | ||
1720 | const struct iphdr *iph = (const struct iphdr *)skb->data; | ||
1721 | struct flowi4 fl4; | ||
1722 | struct rtable *rt; | ||
1723 | |||
1724 | flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, | ||
1725 | protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, | ||
1726 | iph->daddr, iph->saddr, 0, 0); | ||
1727 | rt = __ip_route_output_key(net, &fl4); | ||
1728 | if (!IS_ERR(rt)) { | ||
1729 | ip_rt_update_pmtu(&rt->dst, mtu); | ||
1730 | ip_rt_put(rt); | ||
1731 | } | ||
1732 | } | ||
1733 | EXPORT_SYMBOL_GPL(ipv4_update_pmtu); | ||
1734 | |||
1735 | void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | ||
1736 | { | ||
1737 | const struct inet_sock *inet = inet_sk(sk); | ||
1738 | |||
1739 | return ipv4_update_pmtu(skb, sock_net(sk), mtu, | ||
1740 | sk->sk_bound_dev_if, sk->sk_mark, | ||
1741 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, | ||
1742 | inet_sk_flowi_flags(sk)); | ||
1743 | } | ||
1744 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); | ||
1780 | 1745 | ||
1781 | static void ipv4_validate_peer(struct rtable *rt) | 1746 | static void ipv4_validate_peer(struct rtable *rt) |
1782 | { | 1747 | { |
1783 | if (rt->rt_peer_genid != rt_peer_genid()) { | 1748 | if (rt->rt_peer_genid != rt_peer_genid()) { |
1784 | struct inet_peer *peer; | 1749 | struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); |
1785 | 1750 | ||
1786 | if (!rt->peer) | ||
1787 | rt_bind_peer(rt, rt->rt_dst, 0); | ||
1788 | |||
1789 | peer = rt->peer; | ||
1790 | if (peer) { | 1751 | if (peer) { |
1791 | check_peer_pmtu(&rt->dst, peer); | 1752 | check_peer_pmtu(&rt->dst, peer); |
1792 | 1753 | ||
@@ -1812,14 +1773,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | |||
1812 | static void ipv4_dst_destroy(struct dst_entry *dst) | 1773 | static void ipv4_dst_destroy(struct dst_entry *dst) |
1813 | { | 1774 | { |
1814 | struct rtable *rt = (struct rtable *) dst; | 1775 | struct rtable *rt = (struct rtable *) dst; |
1815 | struct inet_peer *peer = rt->peer; | ||
1816 | 1776 | ||
1817 | if (rt->fi) { | 1777 | if (rt->fi) { |
1818 | fib_info_put(rt->fi); | 1778 | fib_info_put(rt->fi); |
1819 | rt->fi = NULL; | 1779 | rt->fi = NULL; |
1820 | } | 1780 | } |
1821 | if (peer) { | 1781 | if (rt_has_peer(rt)) { |
1822 | rt->peer = NULL; | 1782 | struct inet_peer *peer = rt_peer_ptr(rt); |
1823 | inet_putpeer(peer); | 1783 | inet_putpeer(peer); |
1824 | } | 1784 | } |
1825 | } | 1785 | } |
@@ -1832,8 +1792,11 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1832 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1792 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1833 | 1793 | ||
1834 | rt = skb_rtable(skb); | 1794 | rt = skb_rtable(skb); |
1835 | if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) | 1795 | if (rt && rt_has_peer(rt)) { |
1836 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | 1796 | struct inet_peer *peer = rt_peer_ptr(rt); |
1797 | if (peer_pmtu_cleaned(peer)) | ||
1798 | dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); | ||
1799 | } | ||
1837 | } | 1800 | } |
1838 | 1801 | ||
1839 | static int ip_rt_bug(struct sk_buff *skb) | 1802 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1935,6 +1898,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1935 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | 1898 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, |
1936 | struct fib_info *fi) | 1899 | struct fib_info *fi) |
1937 | { | 1900 | { |
1901 | struct inet_peer_base *base; | ||
1938 | struct inet_peer *peer; | 1902 | struct inet_peer *peer; |
1939 | int create = 0; | 1903 | int create = 0; |
1940 | 1904 | ||
@@ -1944,8 +1908,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | |||
1944 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | 1908 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) |
1945 | create = 1; | 1909 | create = 1; |
1946 | 1910 | ||
1947 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | 1911 | base = inetpeer_base_ptr(rt->_peer); |
1912 | BUG_ON(!base); | ||
1913 | |||
1914 | peer = inet_getpeer_v4(base, rt->rt_dst, create); | ||
1948 | if (peer) { | 1915 | if (peer) { |
1916 | __rt_set_peer(rt, peer); | ||
1949 | rt->rt_peer_genid = rt_peer_genid(); | 1917 | rt->rt_peer_genid = rt_peer_genid(); |
1950 | if (inet_metrics_new(peer)) | 1918 | if (inet_metrics_new(peer)) |
1951 | memcpy(peer->metrics, fi->fib_metrics, | 1919 | memcpy(peer->metrics, fi->fib_metrics, |
@@ -1986,8 +1954,6 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | |||
1986 | 1954 | ||
1987 | if (dst_mtu(dst) > IP_MAX_MTU) | 1955 | if (dst_mtu(dst) > IP_MAX_MTU) |
1988 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); | 1956 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); |
1989 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | ||
1990 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | ||
1991 | 1957 | ||
1992 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1958 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1993 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1959 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -2012,7 +1978,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2012 | { | 1978 | { |
2013 | unsigned int hash; | 1979 | unsigned int hash; |
2014 | struct rtable *rth; | 1980 | struct rtable *rth; |
2015 | __be32 spec_dst; | ||
2016 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1981 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2017 | u32 itag = 0; | 1982 | u32 itag = 0; |
2018 | int err; | 1983 | int err; |
@@ -2023,16 +1988,18 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2023 | return -EINVAL; | 1988 | return -EINVAL; |
2024 | 1989 | ||
2025 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || | 1990 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || |
2026 | ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP)) | 1991 | skb->protocol != htons(ETH_P_IP)) |
2027 | goto e_inval; | 1992 | goto e_inval; |
2028 | 1993 | ||
1994 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) | ||
1995 | if (ipv4_is_loopback(saddr)) | ||
1996 | goto e_inval; | ||
1997 | |||
2029 | if (ipv4_is_zeronet(saddr)) { | 1998 | if (ipv4_is_zeronet(saddr)) { |
2030 | if (!ipv4_is_local_multicast(daddr)) | 1999 | if (!ipv4_is_local_multicast(daddr)) |
2031 | goto e_inval; | 2000 | goto e_inval; |
2032 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | ||
2033 | } else { | 2001 | } else { |
2034 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, | 2002 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); |
2035 | &itag); | ||
2036 | if (err < 0) | 2003 | if (err < 0) |
2037 | goto e_err; | 2004 | goto e_err; |
2038 | } | 2005 | } |
@@ -2059,9 +2026,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2059 | rth->rt_oif = 0; | 2026 | rth->rt_oif = 0; |
2060 | rth->rt_mark = skb->mark; | 2027 | rth->rt_mark = skb->mark; |
2061 | rth->rt_gateway = daddr; | 2028 | rth->rt_gateway = daddr; |
2062 | rth->rt_spec_dst= spec_dst; | ||
2063 | rth->rt_peer_genid = 0; | 2029 | rth->rt_peer_genid = 0; |
2064 | rth->peer = NULL; | 2030 | rt_init_peer(rth, dev_net(dev)->ipv4.peers); |
2065 | rth->fi = NULL; | 2031 | rth->fi = NULL; |
2066 | if (our) { | 2032 | if (our) { |
2067 | rth->dst.input= ip_local_deliver; | 2033 | rth->dst.input= ip_local_deliver; |
@@ -2123,7 +2089,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2123 | int err; | 2089 | int err; |
2124 | struct in_device *out_dev; | 2090 | struct in_device *out_dev; |
2125 | unsigned int flags = 0; | 2091 | unsigned int flags = 0; |
2126 | __be32 spec_dst; | ||
2127 | u32 itag; | 2092 | u32 itag; |
2128 | 2093 | ||
2129 | /* get a working reference to the output device */ | 2094 | /* get a working reference to the output device */ |
@@ -2135,7 +2100,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2135 | 2100 | ||
2136 | 2101 | ||
2137 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), | 2102 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
2138 | in_dev->dev, &spec_dst, &itag); | 2103 | in_dev->dev, &itag); |
2139 | if (err < 0) { | 2104 | if (err < 0) { |
2140 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 2105 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
2141 | saddr); | 2106 | saddr); |
@@ -2187,9 +2152,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2187 | rth->rt_oif = 0; | 2152 | rth->rt_oif = 0; |
2188 | rth->rt_mark = skb->mark; | 2153 | rth->rt_mark = skb->mark; |
2189 | rth->rt_gateway = daddr; | 2154 | rth->rt_gateway = daddr; |
2190 | rth->rt_spec_dst= spec_dst; | ||
2191 | rth->rt_peer_genid = 0; | 2155 | rth->rt_peer_genid = 0; |
2192 | rth->peer = NULL; | 2156 | rt_init_peer(rth, &res->table->tb_peers); |
2193 | rth->fi = NULL; | 2157 | rth->fi = NULL; |
2194 | 2158 | ||
2195 | rth->dst.input = ip_forward; | 2159 | rth->dst.input = ip_forward; |
@@ -2253,7 +2217,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2253 | u32 itag = 0; | 2217 | u32 itag = 0; |
2254 | struct rtable *rth; | 2218 | struct rtable *rth; |
2255 | unsigned int hash; | 2219 | unsigned int hash; |
2256 | __be32 spec_dst; | ||
2257 | int err = -EINVAL; | 2220 | int err = -EINVAL; |
2258 | struct net *net = dev_net(dev); | 2221 | struct net *net = dev_net(dev); |
2259 | 2222 | ||
@@ -2266,8 +2229,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2266 | by fib_lookup. | 2229 | by fib_lookup. |
2267 | */ | 2230 | */ |
2268 | 2231 | ||
2269 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || | 2232 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) |
2270 | ipv4_is_loopback(saddr)) | ||
2271 | goto martian_source; | 2233 | goto martian_source; |
2272 | 2234 | ||
2273 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) | 2235 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) |
@@ -2279,9 +2241,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2279 | if (ipv4_is_zeronet(saddr)) | 2241 | if (ipv4_is_zeronet(saddr)) |
2280 | goto martian_source; | 2242 | goto martian_source; |
2281 | 2243 | ||
2282 | if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) | 2244 | if (ipv4_is_zeronet(daddr)) |
2283 | goto martian_destination; | 2245 | goto martian_destination; |
2284 | 2246 | ||
2247 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { | ||
2248 | if (ipv4_is_loopback(daddr)) | ||
2249 | goto martian_destination; | ||
2250 | |||
2251 | if (ipv4_is_loopback(saddr)) | ||
2252 | goto martian_source; | ||
2253 | } | ||
2254 | |||
2285 | /* | 2255 | /* |
2286 | * Now we are ready to route packet. | 2256 | * Now we are ready to route packet. |
2287 | */ | 2257 | */ |
@@ -2293,11 +2263,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2293 | fl4.daddr = daddr; | 2263 | fl4.daddr = daddr; |
2294 | fl4.saddr = saddr; | 2264 | fl4.saddr = saddr; |
2295 | err = fib_lookup(net, &fl4, &res); | 2265 | err = fib_lookup(net, &fl4, &res); |
2296 | if (err != 0) { | 2266 | if (err != 0) |
2297 | if (!IN_DEV_FORWARD(in_dev)) | ||
2298 | goto e_hostunreach; | ||
2299 | goto no_route; | 2267 | goto no_route; |
2300 | } | ||
2301 | 2268 | ||
2302 | RT_CACHE_STAT_INC(in_slow_tot); | 2269 | RT_CACHE_STAT_INC(in_slow_tot); |
2303 | 2270 | ||
@@ -2307,17 +2274,16 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2307 | if (res.type == RTN_LOCAL) { | 2274 | if (res.type == RTN_LOCAL) { |
2308 | err = fib_validate_source(skb, saddr, daddr, tos, | 2275 | err = fib_validate_source(skb, saddr, daddr, tos, |
2309 | net->loopback_dev->ifindex, | 2276 | net->loopback_dev->ifindex, |
2310 | dev, &spec_dst, &itag); | 2277 | dev, &itag); |
2311 | if (err < 0) | 2278 | if (err < 0) |
2312 | goto martian_source_keep_err; | 2279 | goto martian_source_keep_err; |
2313 | if (err) | 2280 | if (err) |
2314 | flags |= RTCF_DIRECTSRC; | 2281 | flags |= RTCF_DIRECTSRC; |
2315 | spec_dst = daddr; | ||
2316 | goto local_input; | 2282 | goto local_input; |
2317 | } | 2283 | } |
2318 | 2284 | ||
2319 | if (!IN_DEV_FORWARD(in_dev)) | 2285 | if (!IN_DEV_FORWARD(in_dev)) |
2320 | goto e_hostunreach; | 2286 | goto no_route; |
2321 | if (res.type != RTN_UNICAST) | 2287 | if (res.type != RTN_UNICAST) |
2322 | goto martian_destination; | 2288 | goto martian_destination; |
2323 | 2289 | ||
@@ -2328,11 +2294,8 @@ brd_input: | |||
2328 | if (skb->protocol != htons(ETH_P_IP)) | 2294 | if (skb->protocol != htons(ETH_P_IP)) |
2329 | goto e_inval; | 2295 | goto e_inval; |
2330 | 2296 | ||
2331 | if (ipv4_is_zeronet(saddr)) | 2297 | if (!ipv4_is_zeronet(saddr)) { |
2332 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2298 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); |
2333 | else { | ||
2334 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, | ||
2335 | &itag); | ||
2336 | if (err < 0) | 2299 | if (err < 0) |
2337 | goto martian_source_keep_err; | 2300 | goto martian_source_keep_err; |
2338 | if (err) | 2301 | if (err) |
@@ -2370,9 +2333,8 @@ local_input: | |||
2370 | rth->rt_oif = 0; | 2333 | rth->rt_oif = 0; |
2371 | rth->rt_mark = skb->mark; | 2334 | rth->rt_mark = skb->mark; |
2372 | rth->rt_gateway = daddr; | 2335 | rth->rt_gateway = daddr; |
2373 | rth->rt_spec_dst= spec_dst; | ||
2374 | rth->rt_peer_genid = 0; | 2336 | rth->rt_peer_genid = 0; |
2375 | rth->peer = NULL; | 2337 | rt_init_peer(rth, net->ipv4.peers); |
2376 | rth->fi = NULL; | 2338 | rth->fi = NULL; |
2377 | if (res.type == RTN_UNREACHABLE) { | 2339 | if (res.type == RTN_UNREACHABLE) { |
2378 | rth->dst.input= ip_error; | 2340 | rth->dst.input= ip_error; |
@@ -2388,7 +2350,6 @@ local_input: | |||
2388 | 2350 | ||
2389 | no_route: | 2351 | no_route: |
2390 | RT_CACHE_STAT_INC(in_no_route); | 2352 | RT_CACHE_STAT_INC(in_no_route); |
2391 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | ||
2392 | res.type = RTN_UNREACHABLE; | 2353 | res.type = RTN_UNREACHABLE; |
2393 | if (err == -ESRCH) | 2354 | if (err == -ESRCH) |
2394 | err = -ENETUNREACH; | 2355 | err = -ENETUNREACH; |
@@ -2405,10 +2366,6 @@ martian_destination: | |||
2405 | &daddr, &saddr, dev->name); | 2366 | &daddr, &saddr, dev->name); |
2406 | #endif | 2367 | #endif |
2407 | 2368 | ||
2408 | e_hostunreach: | ||
2409 | err = -EHOSTUNREACH; | ||
2410 | goto out; | ||
2411 | |||
2412 | e_inval: | 2369 | e_inval: |
2413 | err = -EINVAL; | 2370 | err = -EINVAL; |
2414 | goto out; | 2371 | goto out; |
@@ -2520,9 +2477,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2520 | u16 type = res->type; | 2477 | u16 type = res->type; |
2521 | struct rtable *rth; | 2478 | struct rtable *rth; |
2522 | 2479 | ||
2523 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) | 2480 | in_dev = __in_dev_get_rcu(dev_out); |
2481 | if (!in_dev) | ||
2524 | return ERR_PTR(-EINVAL); | 2482 | return ERR_PTR(-EINVAL); |
2525 | 2483 | ||
2484 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) | ||
2485 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) | ||
2486 | return ERR_PTR(-EINVAL); | ||
2487 | |||
2526 | if (ipv4_is_lbcast(fl4->daddr)) | 2488 | if (ipv4_is_lbcast(fl4->daddr)) |
2527 | type = RTN_BROADCAST; | 2489 | type = RTN_BROADCAST; |
2528 | else if (ipv4_is_multicast(fl4->daddr)) | 2490 | else if (ipv4_is_multicast(fl4->daddr)) |
@@ -2533,10 +2495,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2533 | if (dev_out->flags & IFF_LOOPBACK) | 2495 | if (dev_out->flags & IFF_LOOPBACK) |
2534 | flags |= RTCF_LOCAL; | 2496 | flags |= RTCF_LOCAL; |
2535 | 2497 | ||
2536 | in_dev = __in_dev_get_rcu(dev_out); | ||
2537 | if (!in_dev) | ||
2538 | return ERR_PTR(-EINVAL); | ||
2539 | |||
2540 | if (type == RTN_BROADCAST) { | 2498 | if (type == RTN_BROADCAST) { |
2541 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2499 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2542 | fi = NULL; | 2500 | fi = NULL; |
@@ -2574,19 +2532,17 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2574 | rth->rt_oif = orig_oif; | 2532 | rth->rt_oif = orig_oif; |
2575 | rth->rt_mark = fl4->flowi4_mark; | 2533 | rth->rt_mark = fl4->flowi4_mark; |
2576 | rth->rt_gateway = fl4->daddr; | 2534 | rth->rt_gateway = fl4->daddr; |
2577 | rth->rt_spec_dst= fl4->saddr; | ||
2578 | rth->rt_peer_genid = 0; | 2535 | rth->rt_peer_genid = 0; |
2579 | rth->peer = NULL; | 2536 | rt_init_peer(rth, (res->table ? |
2537 | &res->table->tb_peers : | ||
2538 | dev_net(dev_out)->ipv4.peers)); | ||
2580 | rth->fi = NULL; | 2539 | rth->fi = NULL; |
2581 | 2540 | ||
2582 | RT_CACHE_STAT_INC(out_slow_tot); | 2541 | RT_CACHE_STAT_INC(out_slow_tot); |
2583 | 2542 | ||
2584 | if (flags & RTCF_LOCAL) { | 2543 | if (flags & RTCF_LOCAL) |
2585 | rth->dst.input = ip_local_deliver; | 2544 | rth->dst.input = ip_local_deliver; |
2586 | rth->rt_spec_dst = fl4->daddr; | ||
2587 | } | ||
2588 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2545 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2589 | rth->rt_spec_dst = fl4->saddr; | ||
2590 | if (flags & RTCF_LOCAL && | 2546 | if (flags & RTCF_LOCAL && |
2591 | !(dev_out->flags & IFF_LOOPBACK)) { | 2547 | !(dev_out->flags & IFF_LOOPBACK)) { |
2592 | rth->dst.output = ip_mc_output; | 2548 | rth->dst.output = ip_mc_output; |
@@ -2605,6 +2561,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2605 | 2561 | ||
2606 | rt_set_nexthop(rth, fl4, res, fi, type, 0); | 2562 | rt_set_nexthop(rth, fl4, res, fi, type, 0); |
2607 | 2563 | ||
2564 | if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) | ||
2565 | rth->dst.flags |= DST_NOCACHE; | ||
2566 | |||
2608 | return rth; | 2567 | return rth; |
2609 | } | 2568 | } |
2610 | 2569 | ||
@@ -2625,6 +2584,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | |||
2625 | int orig_oif; | 2584 | int orig_oif; |
2626 | 2585 | ||
2627 | res.fi = NULL; | 2586 | res.fi = NULL; |
2587 | res.table = NULL; | ||
2628 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2588 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2629 | res.r = NULL; | 2589 | res.r = NULL; |
2630 | #endif | 2590 | #endif |
@@ -2730,6 +2690,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | |||
2730 | 2690 | ||
2731 | if (fib_lookup(net, fl4, &res)) { | 2691 | if (fib_lookup(net, fl4, &res)) { |
2732 | res.fi = NULL; | 2692 | res.fi = NULL; |
2693 | res.table = NULL; | ||
2733 | if (fl4->flowi4_oif) { | 2694 | if (fl4->flowi4_oif) { |
2734 | /* Apparently, routing tables are wrong. Assume, | 2695 | /* Apparently, routing tables are wrong. Assume, |
2735 | that the destination is on link. | 2696 | that the destination is on link. |
@@ -2912,10 +2873,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2912 | rt->rt_dst = ort->rt_dst; | 2873 | rt->rt_dst = ort->rt_dst; |
2913 | rt->rt_src = ort->rt_src; | 2874 | rt->rt_src = ort->rt_src; |
2914 | rt->rt_gateway = ort->rt_gateway; | 2875 | rt->rt_gateway = ort->rt_gateway; |
2915 | rt->rt_spec_dst = ort->rt_spec_dst; | 2876 | rt_transfer_peer(rt, ort); |
2916 | rt->peer = ort->peer; | ||
2917 | if (rt->peer) | ||
2918 | atomic_inc(&rt->peer->refcnt); | ||
2919 | rt->fi = ort->fi; | 2877 | rt->fi = ort->fi; |
2920 | if (rt->fi) | 2878 | if (rt->fi) |
2921 | atomic_inc(&rt->fi->fib_clntref); | 2879 | atomic_inc(&rt->fi->fib_clntref); |
@@ -2953,7 +2911,6 @@ static int rt_fill_info(struct net *net, | |||
2953 | struct rtmsg *r; | 2911 | struct rtmsg *r; |
2954 | struct nlmsghdr *nlh; | 2912 | struct nlmsghdr *nlh; |
2955 | unsigned long expires = 0; | 2913 | unsigned long expires = 0; |
2956 | const struct inet_peer *peer = rt->peer; | ||
2957 | u32 id = 0, ts = 0, tsage = 0, error; | 2914 | u32 id = 0, ts = 0, tsage = 0, error; |
2958 | 2915 | ||
2959 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); | 2916 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); |
@@ -2990,10 +2947,8 @@ static int rt_fill_info(struct net *net, | |||
2990 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) | 2947 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) |
2991 | goto nla_put_failure; | 2948 | goto nla_put_failure; |
2992 | #endif | 2949 | #endif |
2993 | if (rt_is_input_route(rt)) { | 2950 | if (!rt_is_input_route(rt) && |
2994 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) | 2951 | rt->rt_src != rt->rt_key_src) { |
2995 | goto nla_put_failure; | ||
2996 | } else if (rt->rt_src != rt->rt_key_src) { | ||
2997 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) | 2952 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) |
2998 | goto nla_put_failure; | 2953 | goto nla_put_failure; |
2999 | } | 2954 | } |
@@ -3009,8 +2964,9 @@ static int rt_fill_info(struct net *net, | |||
3009 | goto nla_put_failure; | 2964 | goto nla_put_failure; |
3010 | 2965 | ||
3011 | error = rt->dst.error; | 2966 | error = rt->dst.error; |
3012 | if (peer) { | 2967 | if (rt_has_peer(rt)) { |
3013 | inet_peer_refcheck(rt->peer); | 2968 | const struct inet_peer *peer = rt_peer_ptr(rt); |
2969 | inet_peer_refcheck(peer); | ||
3014 | id = atomic_read(&peer->ip_id_count) & 0xffff; | 2970 | id = atomic_read(&peer->ip_id_count) & 0xffff; |
3015 | if (peer->tcp_ts_stamp) { | 2971 | if (peer->tcp_ts_stamp) { |
3016 | ts = peer->tcp_ts; | 2972 | ts = peer->tcp_ts; |
@@ -3400,6 +3356,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3400 | .init = rt_genid_init, | 3356 | .init = rt_genid_init, |
3401 | }; | 3357 | }; |
3402 | 3358 | ||
3359 | static int __net_init ipv4_inetpeer_init(struct net *net) | ||
3360 | { | ||
3361 | struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); | ||
3362 | |||
3363 | if (!bp) | ||
3364 | return -ENOMEM; | ||
3365 | inet_peer_base_init(bp); | ||
3366 | net->ipv4.peers = bp; | ||
3367 | return 0; | ||
3368 | } | ||
3369 | |||
3370 | static void __net_exit ipv4_inetpeer_exit(struct net *net) | ||
3371 | { | ||
3372 | struct inet_peer_base *bp = net->ipv4.peers; | ||
3373 | |||
3374 | net->ipv4.peers = NULL; | ||
3375 | inetpeer_invalidate_tree(bp); | ||
3376 | kfree(bp); | ||
3377 | } | ||
3378 | |||
3379 | static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { | ||
3380 | .init = ipv4_inetpeer_init, | ||
3381 | .exit = ipv4_inetpeer_exit, | ||
3382 | }; | ||
3403 | 3383 | ||
3404 | #ifdef CONFIG_IP_ROUTE_CLASSID | 3384 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3405 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3385 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
@@ -3480,6 +3460,7 @@ int __init ip_rt_init(void) | |||
3480 | register_pernet_subsys(&sysctl_route_ops); | 3460 | register_pernet_subsys(&sysctl_route_ops); |
3481 | #endif | 3461 | #endif |
3482 | register_pernet_subsys(&rt_genid_ops); | 3462 | register_pernet_subsys(&rt_genid_ops); |
3463 | register_pernet_subsys(&ipv4_inetpeer_ops); | ||
3483 | return rc; | 3464 | return rc; |
3484 | } | 3465 | } |
3485 | 3466 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef32956ed655..12aa0c5867c4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -301,6 +301,13 @@ static struct ctl_table ipv4_table[] = { | |||
301 | .proc_handler = proc_dointvec | 301 | .proc_handler = proc_dointvec |
302 | }, | 302 | }, |
303 | { | 303 | { |
304 | .procname = "ip_early_demux", | ||
305 | .data = &sysctl_ip_early_demux, | ||
306 | .maxlen = sizeof(int), | ||
307 | .mode = 0644, | ||
308 | .proc_handler = proc_dointvec | ||
309 | }, | ||
310 | { | ||
304 | .procname = "ip_dynaddr", | 311 | .procname = "ip_dynaddr", |
305 | .data = &sysctl_ip_dynaddr, | 312 | .data = &sysctl_ip_dynaddr, |
306 | .maxlen = sizeof(int), | 313 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b224eb8bce8b..8416f8a68e65 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -5518,6 +5518,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5518 | struct tcp_sock *tp = tcp_sk(sk); | 5518 | struct tcp_sock *tp = tcp_sk(sk); |
5519 | int res; | 5519 | int res; |
5520 | 5520 | ||
5521 | if (sk->sk_rx_dst) { | ||
5522 | struct dst_entry *dst = sk->sk_rx_dst; | ||
5523 | if (unlikely(dst->obsolete)) { | ||
5524 | if (dst->ops->check(dst, 0) == NULL) { | ||
5525 | dst_release(dst); | ||
5526 | sk->sk_rx_dst = NULL; | ||
5527 | } | ||
5528 | } | ||
5529 | } | ||
5530 | if (unlikely(sk->sk_rx_dst == NULL)) | ||
5531 | sk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
5532 | |||
5521 | /* | 5533 | /* |
5522 | * Header prediction. | 5534 | * Header prediction. |
5523 | * The code loosely follows the one in the famous | 5535 | * The code loosely follows the one in the famous |
@@ -5729,8 +5741,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | |||
5729 | 5741 | ||
5730 | tcp_set_state(sk, TCP_ESTABLISHED); | 5742 | tcp_set_state(sk, TCP_ESTABLISHED); |
5731 | 5743 | ||
5732 | if (skb != NULL) | 5744 | if (skb != NULL) { |
5745 | sk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
5733 | security_inet_conn_established(sk, skb); | 5746 | security_inet_conn_established(sk, skb); |
5747 | } | ||
5734 | 5748 | ||
5735 | /* Make sure socket is routed, for correct metrics. */ | 5749 | /* Make sure socket is routed, for correct metrics. */ |
5736 | icsk->icsk_af_ops->rebuild_header(sk); | 5750 | icsk->icsk_af_ops->rebuild_header(sk); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c8d28c433b2b..64568fa21d05 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
698 | 698 | ||
699 | net = dev_net(skb_dst(skb)->dev); | 699 | net = dev_net(skb_dst(skb)->dev); |
700 | arg.tos = ip_hdr(skb)->tos; | 700 | arg.tos = ip_hdr(skb)->tos; |
701 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, | 701 | ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
702 | &arg, arg.iov[0].iov_len); | 702 | ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); |
703 | 703 | ||
704 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 704 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
705 | TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); | 705 | TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); |
@@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
781 | if (oif) | 781 | if (oif) |
782 | arg.bound_dev_if = oif; | 782 | arg.bound_dev_if = oif; |
783 | arg.tos = tos; | 783 | arg.tos = tos; |
784 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, | 784 | ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
785 | &arg, arg.iov[0].iov_len); | 785 | ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); |
786 | 786 | ||
787 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | 787 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); |
788 | } | 788 | } |
@@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
825 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 825 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
826 | struct request_sock *req, | 826 | struct request_sock *req, |
827 | struct request_values *rvp, | 827 | struct request_values *rvp, |
828 | u16 queue_mapping) | 828 | u16 queue_mapping, |
829 | bool nocache) | ||
829 | { | 830 | { |
830 | const struct inet_request_sock *ireq = inet_rsk(req); | 831 | const struct inet_request_sock *ireq = inet_rsk(req); |
831 | struct flowi4 fl4; | 832 | struct flowi4 fl4; |
@@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
833 | struct sk_buff * skb; | 834 | struct sk_buff * skb; |
834 | 835 | ||
835 | /* First, grab a route. */ | 836 | /* First, grab a route. */ |
836 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | 837 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) |
837 | return -1; | 838 | return -1; |
838 | 839 | ||
839 | skb = tcp_make_synack(sk, dst, req, rvp); | 840 | skb = tcp_make_synack(sk, dst, req, rvp); |
@@ -848,7 +849,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
848 | err = net_xmit_eval(err); | 849 | err = net_xmit_eval(err); |
849 | } | 850 | } |
850 | 851 | ||
851 | dst_release(dst); | ||
852 | return err; | 852 | return err; |
853 | } | 853 | } |
854 | 854 | ||
@@ -856,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, | |||
856 | struct request_values *rvp) | 856 | struct request_values *rvp) |
857 | { | 857 | { |
858 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | 858 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
859 | return tcp_v4_send_synack(sk, NULL, req, rvp, 0); | 859 | return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); |
860 | } | 860 | } |
861 | 861 | ||
862 | /* | 862 | /* |
@@ -1389,7 +1389,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1389 | */ | 1389 | */ |
1390 | if (tmp_opt.saw_tstamp && | 1390 | if (tmp_opt.saw_tstamp && |
1391 | tcp_death_row.sysctl_tw_recycle && | 1391 | tcp_death_row.sysctl_tw_recycle && |
1392 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && | 1392 | (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && |
1393 | fl4.daddr == saddr && | 1393 | fl4.daddr == saddr && |
1394 | (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { | 1394 | (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { |
1395 | inet_peer_refcheck(peer); | 1395 | inet_peer_refcheck(peer); |
@@ -1425,7 +1425,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1425 | 1425 | ||
1426 | if (tcp_v4_send_synack(sk, dst, req, | 1426 | if (tcp_v4_send_synack(sk, dst, req, |
1427 | (struct request_values *)&tmp_ext, | 1427 | (struct request_values *)&tmp_ext, |
1428 | skb_get_queue_mapping(skb)) || | 1428 | skb_get_queue_mapping(skb), |
1429 | want_cookie) || | ||
1429 | want_cookie) | 1430 | want_cookie) |
1430 | goto drop_and_free; | 1431 | goto drop_and_free; |
1431 | 1432 | ||
@@ -1672,6 +1673,51 @@ csum_err: | |||
1672 | } | 1673 | } |
1673 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 1674 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
1674 | 1675 | ||
1676 | void tcp_v4_early_demux(struct sk_buff *skb) | ||
1677 | { | ||
1678 | struct net *net = dev_net(skb->dev); | ||
1679 | const struct iphdr *iph; | ||
1680 | const struct tcphdr *th; | ||
1681 | struct net_device *dev; | ||
1682 | struct sock *sk; | ||
1683 | |||
1684 | if (skb->pkt_type != PACKET_HOST) | ||
1685 | return; | ||
1686 | |||
1687 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) | ||
1688 | return; | ||
1689 | |||
1690 | iph = ip_hdr(skb); | ||
1691 | th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); | ||
1692 | |||
1693 | if (th->doff < sizeof(struct tcphdr) / 4) | ||
1694 | return; | ||
1695 | |||
1696 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) | ||
1697 | return; | ||
1698 | |||
1699 | dev = skb->dev; | ||
1700 | sk = __inet_lookup_established(net, &tcp_hashinfo, | ||
1701 | iph->saddr, th->source, | ||
1702 | iph->daddr, ntohs(th->dest), | ||
1703 | dev->ifindex); | ||
1704 | if (sk) { | ||
1705 | skb->sk = sk; | ||
1706 | skb->destructor = sock_edemux; | ||
1707 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
1708 | struct dst_entry *dst = sk->sk_rx_dst; | ||
1709 | if (dst) | ||
1710 | dst = dst_check(dst, 0); | ||
1711 | if (dst) { | ||
1712 | struct rtable *rt = (struct rtable *) dst; | ||
1713 | |||
1714 | if (rt->rt_iif == dev->ifindex) | ||
1715 | skb_dst_set_noref(skb, dst); | ||
1716 | } | ||
1717 | } | ||
1718 | } | ||
1719 | } | ||
1720 | |||
1675 | /* | 1721 | /* |
1676 | * From tcp_input.c | 1722 | * From tcp_input.c |
1677 | */ | 1723 | */ |
@@ -1821,40 +1867,25 @@ do_time_wait: | |||
1821 | goto discard_it; | 1867 | goto discard_it; |
1822 | } | 1868 | } |
1823 | 1869 | ||
1824 | struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) | 1870 | struct inet_peer *tcp_v4_get_peer(struct sock *sk) |
1825 | { | 1871 | { |
1826 | struct rtable *rt = (struct rtable *) __sk_dst_get(sk); | 1872 | struct rtable *rt = (struct rtable *) __sk_dst_get(sk); |
1827 | struct inet_sock *inet = inet_sk(sk); | 1873 | struct inet_sock *inet = inet_sk(sk); |
1828 | struct inet_peer *peer; | ||
1829 | 1874 | ||
1830 | if (!rt || | 1875 | /* If we don't have a valid cached route, or we're doing IP |
1831 | inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { | 1876 | * options which make the IPv4 header destination address |
1832 | peer = inet_getpeer_v4(inet->inet_daddr, 1); | 1877 | * different from our peer's, do not bother with this. |
1833 | *release_it = true; | 1878 | */ |
1834 | } else { | 1879 | if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) |
1835 | if (!rt->peer) | 1880 | return NULL; |
1836 | rt_bind_peer(rt, inet->inet_daddr, 1); | 1881 | return rt_get_peer_create(rt, inet->inet_daddr); |
1837 | peer = rt->peer; | ||
1838 | *release_it = false; | ||
1839 | } | ||
1840 | |||
1841 | return peer; | ||
1842 | } | 1882 | } |
1843 | EXPORT_SYMBOL(tcp_v4_get_peer); | 1883 | EXPORT_SYMBOL(tcp_v4_get_peer); |
1844 | 1884 | ||
1845 | void *tcp_v4_tw_get_peer(struct sock *sk) | ||
1846 | { | ||
1847 | const struct inet_timewait_sock *tw = inet_twsk(sk); | ||
1848 | |||
1849 | return inet_getpeer_v4(tw->tw_daddr, 1); | ||
1850 | } | ||
1851 | EXPORT_SYMBOL(tcp_v4_tw_get_peer); | ||
1852 | |||
1853 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | 1885 | static struct timewait_sock_ops tcp_timewait_sock_ops = { |
1854 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | 1886 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), |
1855 | .twsk_unique = tcp_twsk_unique, | 1887 | .twsk_unique = tcp_twsk_unique, |
1856 | .twsk_destructor= tcp_twsk_destructor, | 1888 | .twsk_destructor= tcp_twsk_destructor, |
1857 | .twsk_getpeer = tcp_v4_tw_get_peer, | ||
1858 | }; | 1889 | }; |
1859 | 1890 | ||
1860 | const struct inet_connection_sock_af_ops ipv4_specific = { | 1891 | const struct inet_connection_sock_af_ops ipv4_specific = { |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b85d9fe7d663..72b7c63b1a39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk) | |||
60 | const struct inet_connection_sock *icsk = inet_csk(sk); | 60 | const struct inet_connection_sock *icsk = inet_csk(sk); |
61 | struct tcp_sock *tp = tcp_sk(sk); | 61 | struct tcp_sock *tp = tcp_sk(sk); |
62 | struct inet_peer *peer; | 62 | struct inet_peer *peer; |
63 | bool release_it; | ||
64 | 63 | ||
65 | peer = icsk->icsk_af_ops->get_peer(sk, &release_it); | 64 | peer = icsk->icsk_af_ops->get_peer(sk); |
66 | if (peer) { | 65 | if (peer) { |
67 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | 66 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || |
68 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | 67 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && |
@@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk) | |||
70 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | 69 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; |
71 | peer->tcp_ts = tp->rx_opt.ts_recent; | 70 | peer->tcp_ts = tp->rx_opt.ts_recent; |
72 | } | 71 | } |
73 | if (release_it) | ||
74 | inet_putpeer(peer); | ||
75 | return true; | 72 | return true; |
76 | } | 73 | } |
77 | 74 | ||
@@ -80,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk) | |||
80 | 77 | ||
81 | static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | 78 | static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) |
82 | { | 79 | { |
80 | const struct tcp_timewait_sock *tcptw; | ||
83 | struct sock *sk = (struct sock *) tw; | 81 | struct sock *sk = (struct sock *) tw; |
84 | struct inet_peer *peer; | 82 | struct inet_peer *peer; |
85 | 83 | ||
86 | peer = twsk_getpeer(sk); | 84 | tcptw = tcp_twsk(sk); |
85 | peer = tcptw->tw_peer; | ||
87 | if (peer) { | 86 | if (peer) { |
88 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
89 | |||
90 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | 87 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || |
91 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | 88 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && |
92 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | 89 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { |
93 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | 90 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; |
94 | peer->tcp_ts = tcptw->tw_ts_recent; | 91 | peer->tcp_ts = tcptw->tw_ts_recent; |
95 | } | 92 | } |
96 | inet_putpeer(peer); | ||
97 | return true; | 93 | return true; |
98 | } | 94 | } |
99 | return false; | 95 | return false; |
@@ -317,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
317 | const struct inet_connection_sock *icsk = inet_csk(sk); | 313 | const struct inet_connection_sock *icsk = inet_csk(sk); |
318 | const struct tcp_sock *tp = tcp_sk(sk); | 314 | const struct tcp_sock *tp = tcp_sk(sk); |
319 | bool recycle_ok = false; | 315 | bool recycle_ok = false; |
316 | bool recycle_on = false; | ||
320 | 317 | ||
321 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) | 318 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { |
322 | recycle_ok = tcp_remember_stamp(sk); | 319 | recycle_ok = tcp_remember_stamp(sk); |
320 | recycle_on = true; | ||
321 | } | ||
323 | 322 | ||
324 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) | 323 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
325 | tw = inet_twsk_alloc(sk, state); | 324 | tw = inet_twsk_alloc(sk, state); |
@@ -327,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
327 | if (tw != NULL) { | 326 | if (tw != NULL) { |
328 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 327 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
329 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | 328 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
329 | struct inet_sock *inet = inet_sk(sk); | ||
330 | struct inet_peer *peer = NULL; | ||
330 | 331 | ||
331 | tw->tw_transparent = inet_sk(sk)->transparent; | 332 | tw->tw_transparent = inet->transparent; |
332 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; | 333 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; |
333 | tcptw->tw_rcv_nxt = tp->rcv_nxt; | 334 | tcptw->tw_rcv_nxt = tp->rcv_nxt; |
334 | tcptw->tw_snd_nxt = tp->snd_nxt; | 335 | tcptw->tw_snd_nxt = tp->snd_nxt; |
@@ -350,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
350 | } | 351 | } |
351 | #endif | 352 | #endif |
352 | 353 | ||
354 | if (recycle_on) | ||
355 | peer = icsk->icsk_af_ops->get_peer(sk); | ||
356 | tcptw->tw_peer = peer; | ||
357 | if (peer) | ||
358 | atomic_inc(&peer->refcnt); | ||
359 | |||
353 | #ifdef CONFIG_TCP_MD5SIG | 360 | #ifdef CONFIG_TCP_MD5SIG |
354 | /* | 361 | /* |
355 | * The timewait bucket does not have the key DB from the | 362 | * The timewait bucket does not have the key DB from the |
@@ -401,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
401 | 408 | ||
402 | void tcp_twsk_destructor(struct sock *sk) | 409 | void tcp_twsk_destructor(struct sock *sk) |
403 | { | 410 | { |
404 | #ifdef CONFIG_TCP_MD5SIG | ||
405 | struct tcp_timewait_sock *twsk = tcp_twsk(sk); | 411 | struct tcp_timewait_sock *twsk = tcp_twsk(sk); |
412 | |||
413 | if (twsk->tw_peer) | ||
414 | inet_putpeer(twsk->tw_peer); | ||
415 | #ifdef CONFIG_TCP_MD5SIG | ||
406 | if (twsk->tw_md5_key) { | 416 | if (twsk->tw_md5_key) { |
407 | tcp_free_md5sig_pool(); | 417 | tcp_free_md5sig_pool(); |
408 | kfree_rcu(twsk->tw_md5_key, rcu); | 418 | kfree_rcu(twsk->tw_md5_key, rcu); |
@@ -435,6 +445,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
435 | struct tcp_sock *oldtp = tcp_sk(sk); | 445 | struct tcp_sock *oldtp = tcp_sk(sk); |
436 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; | 446 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; |
437 | 447 | ||
448 | newsk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
449 | |||
438 | /* TCP Cookie Transactions require space for the cookie pair, | 450 | /* TCP Cookie Transactions require space for the cookie pair, |
439 | * as it differs for each connection. There is no need to | 451 | * as it differs for each connection. There is no need to |
440 | * copy any s_data_payload stored at the original socket. | 452 | * copy any s_data_payload stored at the original socket. |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 803cbfe82fbc..c465d3e51e28 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2442,7 +2442,16 @@ int tcp_send_synack(struct sock *sk) | |||
2442 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2442 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2443 | } | 2443 | } |
2444 | 2444 | ||
2445 | /* Prepare a SYN-ACK. */ | 2445 | /** |
2446 | * tcp_make_synack - Prepare a SYN-ACK. | ||
2447 | * sk: listener socket | ||
2448 | * dst: dst entry attached to the SYNACK | ||
2449 | * req: request_sock pointer | ||
2450 | * rvp: request_values pointer | ||
2451 | * | ||
2452 | * Allocate one skb and build a SYNACK packet. | ||
2453 | * @dst is consumed : Caller should not use it again. | ||
2454 | */ | ||
2446 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2455 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2447 | struct request_sock *req, | 2456 | struct request_sock *req, |
2448 | struct request_values *rvp) | 2457 | struct request_values *rvp) |
@@ -2461,14 +2470,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2461 | 2470 | ||
2462 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2471 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) |
2463 | s_data_desired = cvp->s_data_desired; | 2472 | s_data_desired = cvp->s_data_desired; |
2464 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); | 2473 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); |
2465 | if (skb == NULL) | 2474 | if (unlikely(!skb)) { |
2475 | dst_release(dst); | ||
2466 | return NULL; | 2476 | return NULL; |
2467 | 2477 | } | |
2468 | /* Reserve space for headers. */ | 2478 | /* Reserve space for headers. */ |
2469 | skb_reserve(skb, MAX_TCP_HEADER); | 2479 | skb_reserve(skb, MAX_TCP_HEADER); |
2470 | 2480 | ||
2471 | skb_dst_set(skb, dst_clone(dst)); | 2481 | skb_dst_set(skb, dst); |
2472 | 2482 | ||
2473 | mss = dst_metric_advmss(dst); | 2483 | mss = dst_metric_advmss(dst); |
2474 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2484 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eaca73644e79..ee37d47d472e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -108,6 +108,7 @@ | |||
108 | #include <net/xfrm.h> | 108 | #include <net/xfrm.h> |
109 | #include <trace/events/udp.h> | 109 | #include <trace/events/udp.h> |
110 | #include <linux/static_key.h> | 110 | #include <linux/static_key.h> |
111 | #include <trace/events/skb.h> | ||
111 | #include "udp_impl.h" | 112 | #include "udp_impl.h" |
112 | 113 | ||
113 | struct udp_table udp_table __read_mostly; | 114 | struct udp_table udp_table __read_mostly; |
@@ -615,6 +616,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
615 | break; | 616 | break; |
616 | case ICMP_DEST_UNREACH: | 617 | case ICMP_DEST_UNREACH: |
617 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | 618 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ |
619 | ipv4_sk_update_pmtu(skb, sk, info); | ||
618 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | 620 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { |
619 | err = EMSGSIZE; | 621 | err = EMSGSIZE; |
620 | harderr = 1; | 622 | harderr = 1; |
@@ -1219,8 +1221,10 @@ try_again: | |||
1219 | goto csum_copy_err; | 1221 | goto csum_copy_err; |
1220 | } | 1222 | } |
1221 | 1223 | ||
1222 | if (err) | 1224 | if (unlikely(err)) { |
1225 | trace_kfree_skb(skb, udp_recvmsg); | ||
1223 | goto out_free; | 1226 | goto out_free; |
1227 | } | ||
1224 | 1228 | ||
1225 | if (!peeked) | 1229 | if (!peeked) |
1226 | UDP_INC_STATS_USER(sock_net(sk), | 1230 | UDP_INC_STATS_USER(sock_net(sk), |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0d3426cb5c4f..9815ea0bca7f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
90 | xdst->u.dst.dev = dev; | 90 | xdst->u.dst.dev = dev; |
91 | dev_hold(dev); | 91 | dev_hold(dev); |
92 | 92 | ||
93 | xdst->u.rt.peer = rt->peer; | 93 | rt_transfer_peer(&xdst->u.rt, rt); |
94 | if (rt->peer) | ||
95 | atomic_inc(&rt->peer->refcnt); | ||
96 | 94 | ||
97 | /* Sheit... I remember I did this right. Apparently, | 95 | /* Sheit... I remember I did this right. Apparently, |
98 | * it was magically lost, so this code needs audit */ | 96 | * it was magically lost, so this code needs audit */ |
@@ -102,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
102 | xdst->u.rt.rt_src = rt->rt_src; | 100 | xdst->u.rt.rt_src = rt->rt_src; |
103 | xdst->u.rt.rt_dst = rt->rt_dst; | 101 | xdst->u.rt.rt_dst = rt->rt_dst; |
104 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 102 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
105 | xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; | ||
106 | 103 | ||
107 | return 0; | 104 | return 0; |
108 | } | 105 | } |
@@ -212,8 +209,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) | |||
212 | 209 | ||
213 | dst_destroy_metrics_generic(dst); | 210 | dst_destroy_metrics_generic(dst); |
214 | 211 | ||
215 | if (likely(xdst->u.rt.peer)) | 212 | if (rt_has_peer(&xdst->u.rt)) { |
216 | inet_putpeer(xdst->u.rt.peer); | 213 | struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); |
214 | inet_putpeer(peer); | ||
215 | } | ||
217 | 216 | ||
218 | xfrm_dst_destroy(xdst); | 217 | xfrm_dst_destroy(xdst); |
219 | } | 218 | } |