aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c36
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/ip_forward.c54
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_output.c67
-rw-r--r--net/ipv4/ip_tunnel.c23
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ip_vti.c5
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c5
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/ping.c21
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c42
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_output.c16
-rw-r--r--net/ipv4/xfrm4_output.c34
-rw-r--r--net/ipv4/xfrm4_protocol.c19
20 files changed, 206 insertions, 158 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8c54870db792..6d6dd345bc4d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1650,6 +1650,39 @@ static int __init init_ipv4_mibs(void)
1650 return register_pernet_subsys(&ipv4_mib_ops); 1650 return register_pernet_subsys(&ipv4_mib_ops);
1651} 1651}
1652 1652
1653static __net_init int inet_init_net(struct net *net)
1654{
1655 /*
1656 * Set defaults for local port range
1657 */
1658 seqlock_init(&net->ipv4.ip_local_ports.lock);
1659 net->ipv4.ip_local_ports.range[0] = 32768;
1660 net->ipv4.ip_local_ports.range[1] = 61000;
1661
1662 seqlock_init(&net->ipv4.ping_group_range.lock);
1663 /*
1664 * Sane defaults - nobody may create ping sockets.
1665 * Boot scripts should set this to distro-specific group.
1666 */
1667 net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
1668 net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
1669 return 0;
1670}
1671
1672static __net_exit void inet_exit_net(struct net *net)
1673{
1674}
1675
1676static __net_initdata struct pernet_operations af_inet_ops = {
1677 .init = inet_init_net,
1678 .exit = inet_exit_net,
1679};
1680
1681static int __init init_inet_pernet_ops(void)
1682{
1683 return register_pernet_subsys(&af_inet_ops);
1684}
1685
1653static int ipv4_proc_init(void); 1686static int ipv4_proc_init(void);
1654 1687
1655/* 1688/*
@@ -1794,6 +1827,9 @@ static int __init inet_init(void)
1794 if (ip_mr_init()) 1827 if (ip_mr_init())
1795 pr_crit("%s: Cannot init ipv4 mroute\n", __func__); 1828 pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
1796#endif 1829#endif
1830
1831 if (init_inet_pernet_ops())
1832 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
1797 /* 1833 /*
1798 * Initialise per-cpu ipv4 mibs 1834 * Initialise per-cpu ipv4 mibs
1799 */ 1835 */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1a629f870274..255aa9946fe7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
250 bool dev_match; 250 bool dev_match;
251 251
252 fl4.flowi4_oif = 0; 252 fl4.flowi4_oif = 0;
253 fl4.flowi4_iif = oif; 253 fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
254 fl4.daddr = src; 254 fl4.daddr = src;
255 fl4.saddr = dst; 255 fl4.saddr = dst;
256 fl4.flowi4_tos = tos; 256 fl4.flowi4_tos = tos;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b53f0bf84dca..b10cd43a4722 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -631,6 +631,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
631 .daddr = nh->nh_gw, 631 .daddr = nh->nh_gw,
632 .flowi4_scope = cfg->fc_scope + 1, 632 .flowi4_scope = cfg->fc_scope + 1,
633 .flowi4_oif = nh->nh_oif, 633 .flowi4_oif = nh->nh_oif,
634 .flowi4_iif = LOOPBACK_IFINDEX,
634 }; 635 };
635 636
636 /* It is not necessary, but requires a bit of thinking */ 637 /* It is not necessary, but requires a bit of thinking */
@@ -820,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
820 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 821 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
821 if (fi == NULL) 822 if (fi == NULL)
822 goto failure; 823 goto failure;
824 fib_info_cnt++;
823 if (cfg->fc_mx) { 825 if (cfg->fc_mx) {
824 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 826 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
825 if (!fi->fib_metrics) 827 if (!fi->fib_metrics)
826 goto failure; 828 goto failure;
827 } else 829 } else
828 fi->fib_metrics = (u32 *) dst_default_metrics; 830 fi->fib_metrics = (u32 *) dst_default_metrics;
829 fib_info_cnt++;
830 831
831 fi->fib_net = hold_net(net); 832 fi->fib_net = hold_net(net);
832 fi->fib_protocol = cfg->fc_protocol; 833 fi->fib_protocol = cfg->fc_protocol;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0d1e2cb877ec..a56b8e6e866a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
37 unsigned int seq; 37 unsigned int seq;
38 38
39 do { 39 do {
40 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 40 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
41 41
42 *low = net->ipv4.sysctl_local_ports.range[0]; 42 *low = net->ipv4.ip_local_ports.range[0];
43 *high = net->ipv4.sysctl_local_ports.range[1]; 43 *high = net->ipv4.ip_local_ports.range[1];
44 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 44 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
45} 45}
46EXPORT_SYMBOL(inet_get_local_port_range); 46EXPORT_SYMBOL(inet_get_local_port_range);
47 47
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index be8abe73bb9f..6f111e48e11c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
42static bool ip_may_fragment(const struct sk_buff *skb) 42static bool ip_may_fragment(const struct sk_buff *skb)
43{ 43{
44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || 44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
45 !skb->local_df; 45 skb->local_df;
46} 46}
47 47
48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
49{ 49{
50 if (skb->len <= mtu || skb->local_df) 50 if (skb->len <= mtu)
51 return false; 51 return false;
52 52
53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
56 return true; 56 return true;
57} 57}
58 58
59static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
60{
61 unsigned int mtu;
62
63 if (skb->local_df || !skb_is_gso(skb))
64 return false;
65
66 mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
67
68 /* if seglen > mtu, do software segmentation for IP fragmentation on
69 * output. DF bit cannot be set since ip_forward would have sent
70 * icmp error.
71 */
72 return skb_gso_network_seglen(skb) > mtu;
73}
74
75/* called if GSO skb needs to be fragmented on forward */
76static int ip_forward_finish_gso(struct sk_buff *skb)
77{
78 struct dst_entry *dst = skb_dst(skb);
79 netdev_features_t features;
80 struct sk_buff *segs;
81 int ret = 0;
82
83 features = netif_skb_dev_features(skb, dst->dev);
84 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
85 if (IS_ERR(segs)) {
86 kfree_skb(skb);
87 return -ENOMEM;
88 }
89
90 consume_skb(skb);
91
92 do {
93 struct sk_buff *nskb = segs->next;
94 int err;
95
96 segs->next = NULL;
97 err = dst_output(segs);
98
99 if (err && ret == 0)
100 ret = err;
101 segs = nskb;
102 } while (segs);
103
104 return ret;
105}
106 59
107static int ip_forward_finish(struct sk_buff *skb) 60static int ip_forward_finish(struct sk_buff *skb)
108{ 61{
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
114 if (unlikely(opt->optlen)) 67 if (unlikely(opt->optlen))
115 ip_forward_options(skb); 68 ip_forward_options(skb);
116 69
117 if (ip_gso_exceeds_dst_mtu(skb))
118 return ip_forward_finish_gso(skb);
119
120 return dst_output(skb); 70 return dst_output(skb);
121} 71}
122 72
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c10a3ce5cbff..ed32313e307c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg)
232 * "Fragment Reassembly Timeout" message, per RFC792. 232 * "Fragment Reassembly Timeout" message, per RFC792.
233 */ 233 */
234 if (qp->user == IP_DEFRAG_AF_PACKET || 234 if (qp->user == IP_DEFRAG_AF_PACKET ||
235 (qp->user == IP_DEFRAG_CONNTRACK_IN && 235 ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
236 skb_rtable(head)->rt_type != RTN_LOCAL)) 236 (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
237 (skb_rtable(head)->rt_type != RTN_LOCAL)))
237 goto out_rcu_unlock; 238 goto out_rcu_unlock;
238 239
239 240
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1a0755fea491..a52f50187b54 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb)
101 skb_dst(skb)->dev, dst_output); 101 skb_dst(skb)->dev, dst_output);
102} 102}
103 103
104int ip_local_out(struct sk_buff *skb) 104int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
105{ 105{
106 int err; 106 int err;
107 107
108 err = __ip_local_out(skb); 108 err = __ip_local_out(skb);
109 if (likely(err == 1)) 109 if (likely(err == 1))
110 err = dst_output(skb); 110 err = dst_output_sk(sk, skb);
111 111
112 return err; 112 return err;
113} 113}
114EXPORT_SYMBOL_GPL(ip_local_out); 114EXPORT_SYMBOL_GPL(ip_local_out_sk);
115 115
116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
117{ 117{
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
211 return -EINVAL; 211 return -EINVAL;
212} 212}
213 213
214static int ip_finish_output_gso(struct sk_buff *skb)
215{
216 netdev_features_t features;
217 struct sk_buff *segs;
218 int ret = 0;
219
220 /* common case: locally created skb or seglen is <= mtu */
221 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
222 skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
223 return ip_finish_output2(skb);
224
225 /* Slowpath - GSO segment length is exceeding the dst MTU.
226 *
227 * This can happen in two cases:
228 * 1) TCP GRO packet, DF bit not set
229 * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
230 * from host network stack.
231 */
232 features = netif_skb_features(skb);
233 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
234 if (IS_ERR(segs)) {
235 kfree_skb(skb);
236 return -ENOMEM;
237 }
238
239 consume_skb(skb);
240
241 do {
242 struct sk_buff *nskb = segs->next;
243 int err;
244
245 segs->next = NULL;
246 err = ip_fragment(segs, ip_finish_output2);
247
248 if (err && ret == 0)
249 ret = err;
250 segs = nskb;
251 } while (segs);
252
253 return ret;
254}
255
214static int ip_finish_output(struct sk_buff *skb) 256static int ip_finish_output(struct sk_buff *skb)
215{ 257{
216#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 258#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -220,15 +262,17 @@ static int ip_finish_output(struct sk_buff *skb)
220 return dst_output(skb); 262 return dst_output(skb);
221 } 263 }
222#endif 264#endif
223 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) 265 if (skb_is_gso(skb))
266 return ip_finish_output_gso(skb);
267
268 if (skb->len > ip_skb_dst_mtu(skb))
224 return ip_fragment(skb, ip_finish_output2); 269 return ip_fragment(skb, ip_finish_output2);
225 else 270
226 return ip_finish_output2(skb); 271 return ip_finish_output2(skb);
227} 272}
228 273
229int ip_mc_output(struct sk_buff *skb) 274int ip_mc_output(struct sock *sk, struct sk_buff *skb)
230{ 275{
231 struct sock *sk = skb->sk;
232 struct rtable *rt = skb_rtable(skb); 276 struct rtable *rt = skb_rtable(skb);
233 struct net_device *dev = rt->dst.dev; 277 struct net_device *dev = rt->dst.dev;
234 278
@@ -287,7 +331,7 @@ int ip_mc_output(struct sk_buff *skb)
287 !(IPCB(skb)->flags & IPSKB_REROUTED)); 331 !(IPCB(skb)->flags & IPSKB_REROUTED));
288} 332}
289 333
290int ip_output(struct sk_buff *skb) 334int ip_output(struct sock *sk, struct sk_buff *skb)
291{ 335{
292 struct net_device *dev = skb_dst(skb)->dev; 336 struct net_device *dev = skb_dst(skb)->dev;
293 337
@@ -315,9 +359,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
315 sizeof(fl4->saddr) + sizeof(fl4->daddr)); 359 sizeof(fl4->saddr) + sizeof(fl4->daddr));
316} 360}
317 361
318int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) 362/* Note: skb->sk can be different from sk, in case of tunnels */
363int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
319{ 364{
320 struct sock *sk = skb->sk;
321 struct inet_sock *inet = inet_sk(sk); 365 struct inet_sock *inet = inet_sk(sk);
322 struct ip_options_rcu *inet_opt; 366 struct ip_options_rcu *inet_opt;
323 struct flowi4 *fl4; 367 struct flowi4 *fl4;
@@ -389,6 +433,7 @@ packet_routed:
389 ip_select_ident_more(skb, &rt->dst, sk, 433 ip_select_ident_more(skb, &rt->dst, sk,
390 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 434 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
391 435
436 /* TODO : should we use skb->sk here instead of sk ? */
392 skb->priority = sk->sk_priority; 437 skb->priority = sk->sk_priority;
393 skb->mark = sk->sk_mark; 438 skb->mark = sk->sk_mark;
394 439
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e77381d1df9a..2acc2337d38b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -442,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
442 tunnel->i_seqno = ntohl(tpi->seq) + 1; 442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 } 443 }
444 444
445 skb_reset_network_header(skb);
446
445 err = IP_ECN_decapsulate(iph, skb); 447 err = IP_ECN_decapsulate(iph, skb);
446 if (unlikely(err)) { 448 if (unlikely(err)) {
447 if (log_ecn_error) 449 if (log_ecn_error)
@@ -538,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
538 unsigned int max_headroom; /* The extra header space needed */ 540 unsigned int max_headroom; /* The extra header space needed */
539 __be32 dst; 541 __be32 dst;
540 int err; 542 int err;
541 bool connected = true; 543 bool connected;
542 544
543 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 545 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
546 connected = (tunnel->parms.iph.daddr != 0);
544 547
545 dst = tnl_params->daddr; 548 dst = tnl_params->daddr;
546 if (dst == 0) { 549 if (dst == 0) {
@@ -670,7 +673,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
670 return; 673 return;
671 } 674 }
672 675
673 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 676 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
674 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 677 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
675 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 678 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
676 679
@@ -722,19 +725,18 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
722int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 725int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
723{ 726{
724 int err = 0; 727 int err = 0;
725 struct ip_tunnel *t; 728 struct ip_tunnel *t = netdev_priv(dev);
726 struct net *net = dev_net(dev); 729 struct net *net = t->net;
727 struct ip_tunnel *tunnel = netdev_priv(dev); 730 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
728 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
729 731
730 BUG_ON(!itn->fb_tunnel_dev); 732 BUG_ON(!itn->fb_tunnel_dev);
731 switch (cmd) { 733 switch (cmd) {
732 case SIOCGETTUNNEL: 734 case SIOCGETTUNNEL:
733 t = NULL; 735 if (dev == itn->fb_tunnel_dev) {
734 if (dev == itn->fb_tunnel_dev)
735 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 736 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
736 if (t == NULL) 737 if (t == NULL)
737 t = netdev_priv(dev); 738 t = netdev_priv(dev);
739 }
738 memcpy(p, &t->parms, sizeof(*p)); 740 memcpy(p, &t->parms, sizeof(*p));
739 break; 741 break;
740 742
@@ -881,6 +883,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
881 */ 883 */
882 if (!IS_ERR(itn->fb_tunnel_dev)) { 884 if (!IS_ERR(itn->fb_tunnel_dev)) {
883 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 885 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
886 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
884 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 887 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
885 } 888 }
886 rtnl_unlock(); 889 rtnl_unlock();
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e0c2b1d2ea4e..bcf206c79005 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
46#include <net/netns/generic.h> 46#include <net/netns/generic.h>
47#include <net/rtnetlink.h> 47#include <net/rtnetlink.h>
48 48
49int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, 49int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
50 __be32 src, __be32 dst, __u8 proto, 50 __be32 src, __be32 dst, __u8 proto,
51 __u8 tos, __u8 ttl, __be16 df, bool xnet) 51 __u8 tos, __u8 ttl, __be16 df, bool xnet)
52{ 52{
@@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
76 iph->ttl = ttl; 76 iph->ttl = ttl;
77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); 77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
78 78
79 err = ip_local_out(skb); 79 err = ip_local_out_sk(sk, skb);
80 if (unlikely(net_xmit_eval(err))) 80 if (unlikely(net_xmit_eval(err)))
81 pkt_len = 0; 81 pkt_len = 0;
82 return pkt_len; 82 return pkt_len;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index afcee51b90ed..13ef00f1e17b 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
239static int vti4_err(struct sk_buff *skb, u32 info) 239static int vti4_err(struct sk_buff *skb, u32 info)
240{ 240{
241 __be32 spi; 241 __be32 spi;
242 __u32 mark;
242 struct xfrm_state *x; 243 struct xfrm_state *x;
243 struct ip_tunnel *tunnel; 244 struct ip_tunnel *tunnel;
244 struct ip_esp_hdr *esph; 245 struct ip_esp_hdr *esph;
@@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info)
254 if (!tunnel) 255 if (!tunnel)
255 return -1; 256 return -1;
256 257
258 mark = be32_to_cpu(tunnel->parms.o_key);
259
257 switch (protocol) { 260 switch (protocol) {
258 case IPPROTO_ESP: 261 case IPPROTO_ESP:
259 esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); 262 esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
@@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info)
281 return 0; 284 return 0;
282 } 285 }
283 286
284 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, 287 x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
285 spi, protocol, AF_INET); 288 spi, protocol, AF_INET);
286 if (!x) 289 if (!x)
287 return 0; 290 return 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 28863570dd60..d84dc8d4c916 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -455,7 +455,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
455 struct mr_table *mrt; 455 struct mr_table *mrt;
456 struct flowi4 fl4 = { 456 struct flowi4 fl4 = {
457 .flowi4_oif = dev->ifindex, 457 .flowi4_oif = dev->ifindex,
458 .flowi4_iif = skb->skb_iif, 458 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
459 .flowi4_mark = skb->mark, 459 .flowi4_mark = skb->mark,
460 }; 460 };
461 int err; 461 int err;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c49dcd0284a0..4bfaedf9b34e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
89 if (ipv4_is_multicast(iph->daddr)) { 89 if (ipv4_is_multicast(iph->daddr)) {
90 if (ipv4_is_zeronet(iph->saddr)) 90 if (ipv4_is_zeronet(iph->saddr))
91 return ipv4_is_local_multicast(iph->daddr) ^ invert; 91 return ipv4_is_local_multicast(iph->daddr) ^ invert;
92 flow.flowi4_iif = 0;
93 } else {
94 flow.flowi4_iif = LOOPBACK_IFINDEX;
95 } 92 }
96 93 flow.flowi4_iif = LOOPBACK_IFINDEX;
97 flow.daddr = iph->saddr; 94 flow.daddr = iph->saddr;
98 flow.saddr = rpfilter_get_saddr(iph->daddr); 95 flow.saddr = rpfilter_get_saddr(iph->daddr);
99 flow.flowi4_oif = 0; 96 flow.flowi4_oif = 0;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 12e13bd82b5b..f40f321b41fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
22#endif 22#endif
23#include <net/netfilter/nf_conntrack_zones.h> 23#include <net/netfilter/nf_conntrack_zones.h>
24 24
25/* Returns new sk_buff, or NULL */
26static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 25static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
27{ 26{
28 int err; 27 int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
33 err = ip_defrag(skb, user); 32 err = ip_defrag(skb, user);
34 local_bh_enable(); 33 local_bh_enable();
35 34
36 if (!err) 35 if (!err) {
37 ip_send_check(ip_hdr(skb)); 36 ip_send_check(ip_hdr(skb));
37 skb->local_df = 1;
38 }
38 39
39 return err; 40 return err;
40} 41}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index f4b19e5dde54..044a0ddf6a79 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -236,15 +236,15 @@ exit:
236static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, 236static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
237 kgid_t *high) 237 kgid_t *high)
238{ 238{
239 kgid_t *data = net->ipv4.sysctl_ping_group_range; 239 kgid_t *data = net->ipv4.ping_group_range.range;
240 unsigned int seq; 240 unsigned int seq;
241 241
242 do { 242 do {
243 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 243 seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
244 244
245 *low = data[0]; 245 *low = data[0];
246 *high = data[1]; 246 *high = data[1];
247 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 247 } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
248} 248}
249 249
250 250
@@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk)
252{ 252{
253 struct net *net = sock_net(sk); 253 struct net *net = sock_net(sk);
254 kgid_t group = current_egid(); 254 kgid_t group = current_egid();
255 struct group_info *group_info = get_current_groups(); 255 struct group_info *group_info;
256 int i, j, count = group_info->ngroups; 256 int i, j, count;
257 kgid_t low, high; 257 kgid_t low, high;
258 int ret = 0;
258 259
259 inet_get_ping_group_range_net(net, &low, &high); 260 inet_get_ping_group_range_net(net, &low, &high);
260 if (gid_lte(low, group) && gid_lte(group, high)) 261 if (gid_lte(low, group) && gid_lte(group, high))
261 return 0; 262 return 0;
262 263
264 group_info = get_current_groups();
265 count = group_info->ngroups;
263 for (i = 0; i < group_info->nblocks; i++) { 266 for (i = 0; i < group_info->nblocks; i++) {
264 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); 267 int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
265 for (j = 0; j < cp_count; j++) { 268 for (j = 0; j < cp_count; j++) {
266 kgid_t gid = group_info->blocks[i][j]; 269 kgid_t gid = group_info->blocks[i][j];
267 if (gid_lte(low, gid) && gid_lte(gid, high)) 270 if (gid_lte(low, gid) && gid_lte(gid, high))
268 return 0; 271 goto out_release_group;
269 } 272 }
270 273
271 count -= cp_count; 274 count -= cp_count;
272 } 275 }
273 276
274 return -EACCES; 277 ret = -EACCES;
278
279out_release_group:
280 put_group_info(group_info);
281 return ret;
275} 282}
276EXPORT_SYMBOL_GPL(ping_init_sock); 283EXPORT_SYMBOL_GPL(ping_init_sock);
277 284
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34d094cadb11..5e676be3daeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1129 dst_set_expires(&rt->dst, 0); 1129 dst_set_expires(&rt->dst, 0);
1130} 1130}
1131 1131
1132static int ip_rt_bug(struct sk_buff *skb) 1132static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
1133{ 1133{
1134 pr_debug("%s: %pI4 -> %pI4, %s\n", 1134 pr_debug("%s: %pI4 -> %pI4, %s\n",
1135 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1135 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1519,7 +1519,7 @@ static int __mkroute_input(struct sk_buff *skb,
1519 struct in_device *out_dev; 1519 struct in_device *out_dev;
1520 unsigned int flags = 0; 1520 unsigned int flags = 0;
1521 bool do_cache; 1521 bool do_cache;
1522 u32 itag; 1522 u32 itag = 0;
1523 1523
1524 /* get a working reference to the output device */ 1524 /* get a working reference to the output device */
1525 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); 1525 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -1700,8 +1700,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1700 1700
1701 if (res.type == RTN_LOCAL) { 1701 if (res.type == RTN_LOCAL) {
1702 err = fib_validate_source(skb, saddr, daddr, tos, 1702 err = fib_validate_source(skb, saddr, daddr, tos,
1703 LOOPBACK_IFINDEX, 1703 0, dev, in_dev, &itag);
1704 dev, in_dev, &itag);
1705 if (err < 0) 1704 if (err < 0)
1706 goto martian_source_keep_err; 1705 goto martian_source_keep_err;
1707 goto local_input; 1706 goto local_input;
@@ -2218,7 +2217,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2218 2217
2219 new->__use = 1; 2218 new->__use = 1;
2220 new->input = dst_discard; 2219 new->input = dst_discard;
2221 new->output = dst_discard; 2220 new->output = dst_discard_sk;
2222 2221
2223 new->dev = ort->dst.dev; 2222 new->dev = ort->dst.dev;
2224 if (new->dev) 2223 if (new->dev)
@@ -2357,7 +2356,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2357 } 2356 }
2358 } else 2357 } else
2359#endif 2358#endif
2360 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) 2359 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2361 goto nla_put_failure; 2360 goto nla_put_failure;
2362 } 2361 }
2363 2362
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43d..5cde8f263d40 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
45/* Update system visible IP port range */ 45/* Update system visible IP port range */
46static void set_local_port_range(struct net *net, int range[2]) 46static void set_local_port_range(struct net *net, int range[2])
47{ 47{
48 write_seqlock(&net->ipv4.sysctl_local_ports.lock); 48 write_seqlock(&net->ipv4.ip_local_ports.lock);
49 net->ipv4.sysctl_local_ports.range[0] = range[0]; 49 net->ipv4.ip_local_ports.range[0] = range[0];
50 net->ipv4.sysctl_local_ports.range[1] = range[1]; 50 net->ipv4.ip_local_ports.range[1] = range[1];
51 write_sequnlock(&net->ipv4.sysctl_local_ports.lock); 51 write_sequnlock(&net->ipv4.ip_local_ports.lock);
52} 52}
53 53
54/* Validate changes from /proc interface. */ 54/* Validate changes from /proc interface. */
@@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
57 size_t *lenp, loff_t *ppos) 57 size_t *lenp, loff_t *ppos)
58{ 58{
59 struct net *net = 59 struct net *net =
60 container_of(table->data, struct net, ipv4.sysctl_local_ports.range); 60 container_of(table->data, struct net, ipv4.ip_local_ports.range);
61 int ret; 61 int ret;
62 int range[2]; 62 int range[2];
63 struct ctl_table tmp = { 63 struct ctl_table tmp = {
@@ -87,14 +87,14 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
87{ 87{
88 kgid_t *data = table->data; 88 kgid_t *data = table->data;
89 struct net *net = 89 struct net *net =
90 container_of(table->data, struct net, ipv4.sysctl_ping_group_range); 90 container_of(table->data, struct net, ipv4.ping_group_range.range);
91 unsigned int seq; 91 unsigned int seq;
92 do { 92 do {
93 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 93 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
94 94
95 *low = data[0]; 95 *low = data[0];
96 *high = data[1]; 96 *high = data[1];
97 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 97 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
98} 98}
99 99
100/* Update system visible IP port range */ 100/* Update system visible IP port range */
@@ -102,11 +102,11 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
102{ 102{
103 kgid_t *data = table->data; 103 kgid_t *data = table->data;
104 struct net *net = 104 struct net *net =
105 container_of(table->data, struct net, ipv4.sysctl_ping_group_range); 105 container_of(table->data, struct net, ipv4.ping_group_range.range);
106 write_seqlock(&net->ipv4.sysctl_local_ports.lock); 106 write_seqlock(&net->ipv4.ip_local_ports.lock);
107 data[0] = low; 107 data[0] = low;
108 data[1] = high; 108 data[1] = high;
109 write_sequnlock(&net->ipv4.sysctl_local_ports.lock); 109 write_sequnlock(&net->ipv4.ip_local_ports.lock);
110} 110}
111 111
112/* Validate changes from /proc interface. */ 112/* Validate changes from /proc interface. */
@@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = {
805 }, 805 },
806 { 806 {
807 .procname = "ping_group_range", 807 .procname = "ping_group_range",
808 .data = &init_net.ipv4.sysctl_ping_group_range, 808 .data = &init_net.ipv4.ping_group_range.range,
809 .maxlen = sizeof(gid_t)*2, 809 .maxlen = sizeof(gid_t)*2,
810 .mode = 0644, 810 .mode = 0644,
811 .proc_handler = ipv4_ping_group_range, 811 .proc_handler = ipv4_ping_group_range,
@@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = {
819 }, 819 },
820 { 820 {
821 .procname = "ip_local_port_range", 821 .procname = "ip_local_port_range",
822 .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), 822 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
823 .data = &init_net.ipv4.sysctl_local_ports.range, 823 .data = &init_net.ipv4.ip_local_ports.range,
824 .mode = 0644, 824 .mode = 0644,
825 .proc_handler = ipv4_local_port_range, 825 .proc_handler = ipv4_local_port_range,
826 }, 826 },
@@ -858,20 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
858 table[i].data += (void *)net - (void *)&init_net; 858 table[i].data += (void *)net - (void *)&init_net;
859 } 859 }
860 860
861 /*
862 * Sane defaults - nobody may create ping sockets.
863 * Boot scripts should set this to distro-specific group.
864 */
865 net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
866 net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
867
868 /*
869 * Set defaults for local port range
870 */
871 seqlock_init(&net->ipv4.sysctl_local_ports.lock);
872 net->ipv4.sysctl_local_ports.range[0] = 32768;
873 net->ipv4.sysctl_local_ports.range[1] = 61000;
874
875 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 861 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
876 if (net->ipv4.ipv4_hdr == NULL) 862 if (net->ipv4.ipv4_hdr == NULL)
877 goto err_reg; 863 goto err_reg;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8bf224516ba2..b4f1b29b08bd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
409 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; 409 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
410 ratio += cnt; 410 ratio += cnt;
411 411
412 ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); 412 ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
413 } 413 }
414 414
415 /* Some calls are for duplicates without timetamps */ 415 /* Some calls are for duplicates without timetamps */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb102e971..12d6016bdd9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -981,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
981 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 981 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
982 tcp_skb_pcount(skb)); 982 tcp_skb_pcount(skb));
983 983
984 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); 984 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
985 if (likely(err <= 0)) 985 if (likely(err <= 0))
986 return err; 986 return err;
987 987
@@ -2441,8 +2441,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2441 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2441 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2442 } 2442 }
2443 2443
2444 if (likely(!err)) 2444 if (likely(!err)) {
2445 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2445 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2446 /* Update global TCP statistics. */
2447 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2448 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2449 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2450 tp->total_retrans++;
2451 }
2446 return err; 2452 return err;
2447} 2453}
2448 2454
@@ -2452,12 +2458,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2452 int err = __tcp_retransmit_skb(sk, skb); 2458 int err = __tcp_retransmit_skb(sk, skb);
2453 2459
2454 if (err == 0) { 2460 if (err == 0) {
2455 /* Update global TCP statistics. */
2456 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2457 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2458 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2459 tp->total_retrans++;
2460
2461#if FASTRETRANS_DEBUG > 0 2461#if FASTRETRANS_DEBUG > 0
2462 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2462 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2463 net_dbg_ratelimited("retrans_out leaked\n"); 2463 net_dbg_ratelimited("retrans_out leaked\n");
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63731fd..186a8ecf92fa 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
62 if (err) 62 if (err)
63 return err; 63 return err;
64 64
65 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 65 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
66 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
67
68 skb->protocol = htons(ETH_P_IP);
69 66
70 return x->outer_mode->output2(x, skb); 67 return x->outer_mode->output2(x, skb);
71} 68}
@@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
73 70
74int xfrm4_output_finish(struct sk_buff *skb) 71int xfrm4_output_finish(struct sk_buff *skb)
75{ 72{
76#ifdef CONFIG_NETFILTER 73 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
77 if (!skb_dst(skb)->xfrm) { 74 skb->protocol = htons(ETH_P_IP);
78 IPCB(skb)->flags |= IPSKB_REROUTED;
79 return dst_output(skb);
80 }
81 75
76#ifdef CONFIG_NETFILTER
82 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 77 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
83#endif 78#endif
84 79
85 skb->protocol = htons(ETH_P_IP);
86 return xfrm_output(skb); 80 return xfrm_output(skb);
87} 81}
88 82
89int xfrm4_output(struct sk_buff *skb) 83static int __xfrm4_output(struct sk_buff *skb)
90{ 84{
91 struct dst_entry *dst = skb_dst(skb); 85 struct xfrm_state *x = skb_dst(skb)->xfrm;
92 struct xfrm_state *x = dst->xfrm; 86
87#ifdef CONFIG_NETFILTER
88 if (!x) {
89 IPCB(skb)->flags |= IPSKB_REROUTED;
90 return dst_output(skb);
91 }
92#endif
93 93
94 return x->outer_mode->afinfo->output_finish(skb);
95}
96
97int xfrm4_output(struct sock *sk, struct sk_buff *skb)
98{
94 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, 99 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
95 NULL, dst->dev, 100 NULL, skb_dst(skb)->dev, __xfrm4_output,
96 x->outer_mode->afinfo->output_finish,
97 !(IPCB(skb)->flags & IPSKB_REROUTED)); 101 !(IPCB(skb)->flags & IPSKB_REROUTED));
98} 102}
99 103
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 7f7b243e8139..a2ce0101eaac 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
50{ 50{
51 int ret; 51 int ret;
52 struct xfrm4_protocol *handler; 52 struct xfrm4_protocol *handler;
53 struct xfrm4_protocol __rcu **head = proto_handlers(protocol);
53 54
54 for_each_protocol_rcu(*proto_handlers(protocol), handler) 55 if (!head)
56 return 0;
57
58 for_each_protocol_rcu(*head, handler)
55 if ((ret = handler->cb_handler(skb, err)) <= 0) 59 if ((ret = handler->cb_handler(skb, err)) <= 0)
56 return ret; 60 return ret;
57 61
@@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
64{ 68{
65 int ret; 69 int ret;
66 struct xfrm4_protocol *handler; 70 struct xfrm4_protocol *handler;
71 struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr);
67 72
68 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; 73 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
69 XFRM_SPI_SKB_CB(skb)->family = AF_INET; 74 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
70 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); 75 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
71 76
72 for_each_protocol_rcu(*proto_handlers(nexthdr), handler) 77 if (!head)
78 goto out;
79
80 for_each_protocol_rcu(*head, handler)
73 if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) 81 if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
74 return ret; 82 return ret;
75 83
84out:
76 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 85 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
77 86
78 kfree_skb(skb); 87 kfree_skb(skb);
@@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler,
208 int ret = -EEXIST; 217 int ret = -EEXIST;
209 int priority = handler->priority; 218 int priority = handler->priority;
210 219
220 if (!proto_handlers(protocol) || !netproto(protocol))
221 return -EINVAL;
222
211 mutex_lock(&xfrm4_protocol_mutex); 223 mutex_lock(&xfrm4_protocol_mutex);
212 224
213 if (!rcu_dereference_protected(*proto_handlers(protocol), 225 if (!rcu_dereference_protected(*proto_handlers(protocol),
@@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler,
250 struct xfrm4_protocol *t; 262 struct xfrm4_protocol *t;
251 int ret = -ENOENT; 263 int ret = -ENOENT;
252 264
265 if (!proto_handlers(protocol) || !netproto(protocol))
266 return -EINVAL;
267
253 mutex_lock(&xfrm4_protocol_mutex); 268 mutex_lock(&xfrm4_protocol_mutex);
254 269
255 for (pprev = proto_handlers(protocol); 270 for (pprev = proto_handlers(protocol);