aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c36
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/ip_forward.c54
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_output.c51
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/ip_vti.c5
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/ping.c6
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c42
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_output.c14
-rw-r--r--net/ipv4/xfrm4_output.c32
-rw-r--r--net/ipv4/xfrm4_protocol.c19
17 files changed, 172 insertions, 128 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8c54870db792..6d6dd345bc4d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1650,6 +1650,39 @@ static int __init init_ipv4_mibs(void)
1650 return register_pernet_subsys(&ipv4_mib_ops); 1650 return register_pernet_subsys(&ipv4_mib_ops);
1651} 1651}
1652 1652
1653static __net_init int inet_init_net(struct net *net)
1654{
1655 /*
1656 * Set defaults for local port range
1657 */
1658 seqlock_init(&net->ipv4.ip_local_ports.lock);
1659 net->ipv4.ip_local_ports.range[0] = 32768;
1660 net->ipv4.ip_local_ports.range[1] = 61000;
1661
1662 seqlock_init(&net->ipv4.ping_group_range.lock);
1663 /*
1664 * Sane defaults - nobody may create ping sockets.
1665 * Boot scripts should set this to distro-specific group.
1666 */
1667 net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
1668 net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
1669 return 0;
1670}
1671
1672static __net_exit void inet_exit_net(struct net *net)
1673{
1674}
1675
1676static __net_initdata struct pernet_operations af_inet_ops = {
1677 .init = inet_init_net,
1678 .exit = inet_exit_net,
1679};
1680
1681static int __init init_inet_pernet_ops(void)
1682{
1683 return register_pernet_subsys(&af_inet_ops);
1684}
1685
1653static int ipv4_proc_init(void); 1686static int ipv4_proc_init(void);
1654 1687
1655/* 1688/*
@@ -1794,6 +1827,9 @@ static int __init inet_init(void)
1794 if (ip_mr_init()) 1827 if (ip_mr_init())
1795 pr_crit("%s: Cannot init ipv4 mroute\n", __func__); 1828 pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
1796#endif 1829#endif
1830
1831 if (init_inet_pernet_ops())
1832 pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
1797 /* 1833 /*
1798 * Initialise per-cpu ipv4 mibs 1834 * Initialise per-cpu ipv4 mibs
1799 */ 1835 */
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8a043f03c88e..b10cd43a4722 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -821,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
821 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 821 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
822 if (fi == NULL) 822 if (fi == NULL)
823 goto failure; 823 goto failure;
824 fib_info_cnt++;
824 if (cfg->fc_mx) { 825 if (cfg->fc_mx) {
825 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 826 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
826 if (!fi->fib_metrics) 827 if (!fi->fib_metrics)
827 goto failure; 828 goto failure;
828 } else 829 } else
829 fi->fib_metrics = (u32 *) dst_default_metrics; 830 fi->fib_metrics = (u32 *) dst_default_metrics;
830 fib_info_cnt++;
831 831
832 fi->fib_net = hold_net(net); 832 fi->fib_net = hold_net(net);
833 fi->fib_protocol = cfg->fc_protocol; 833 fi->fib_protocol = cfg->fc_protocol;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0d1e2cb877ec..a56b8e6e866a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
37 unsigned int seq; 37 unsigned int seq;
38 38
39 do { 39 do {
40 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 40 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
41 41
42 *low = net->ipv4.sysctl_local_ports.range[0]; 42 *low = net->ipv4.ip_local_ports.range[0];
43 *high = net->ipv4.sysctl_local_ports.range[1]; 43 *high = net->ipv4.ip_local_ports.range[1];
44 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 44 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
45} 45}
46EXPORT_SYMBOL(inet_get_local_port_range); 46EXPORT_SYMBOL(inet_get_local_port_range);
47 47
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index be8abe73bb9f..6f111e48e11c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
42static bool ip_may_fragment(const struct sk_buff *skb) 42static bool ip_may_fragment(const struct sk_buff *skb)
43{ 43{
44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || 44 return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
45 !skb->local_df; 45 skb->local_df;
46} 46}
47 47
48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 48static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
49{ 49{
50 if (skb->len <= mtu || skb->local_df) 50 if (skb->len <= mtu)
51 return false; 51 return false;
52 52
53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 53 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
56 return true; 56 return true;
57} 57}
58 58
59static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
60{
61 unsigned int mtu;
62
63 if (skb->local_df || !skb_is_gso(skb))
64 return false;
65
66 mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
67
68 /* if seglen > mtu, do software segmentation for IP fragmentation on
69 * output. DF bit cannot be set since ip_forward would have sent
70 * icmp error.
71 */
72 return skb_gso_network_seglen(skb) > mtu;
73}
74
75/* called if GSO skb needs to be fragmented on forward */
76static int ip_forward_finish_gso(struct sk_buff *skb)
77{
78 struct dst_entry *dst = skb_dst(skb);
79 netdev_features_t features;
80 struct sk_buff *segs;
81 int ret = 0;
82
83 features = netif_skb_dev_features(skb, dst->dev);
84 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
85 if (IS_ERR(segs)) {
86 kfree_skb(skb);
87 return -ENOMEM;
88 }
89
90 consume_skb(skb);
91
92 do {
93 struct sk_buff *nskb = segs->next;
94 int err;
95
96 segs->next = NULL;
97 err = dst_output(segs);
98
99 if (err && ret == 0)
100 ret = err;
101 segs = nskb;
102 } while (segs);
103
104 return ret;
105}
106 59
107static int ip_forward_finish(struct sk_buff *skb) 60static int ip_forward_finish(struct sk_buff *skb)
108{ 61{
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
114 if (unlikely(opt->optlen)) 67 if (unlikely(opt->optlen))
115 ip_forward_options(skb); 68 ip_forward_options(skb);
116 69
117 if (ip_gso_exceeds_dst_mtu(skb))
118 return ip_forward_finish_gso(skb);
119
120 return dst_output(skb); 70 return dst_output(skb);
121} 71}
122 72
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c10a3ce5cbff..ed32313e307c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg)
232 * "Fragment Reassembly Timeout" message, per RFC792. 232 * "Fragment Reassembly Timeout" message, per RFC792.
233 */ 233 */
234 if (qp->user == IP_DEFRAG_AF_PACKET || 234 if (qp->user == IP_DEFRAG_AF_PACKET ||
235 (qp->user == IP_DEFRAG_CONNTRACK_IN && 235 ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
236 skb_rtable(head)->rt_type != RTN_LOCAL)) 236 (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
237 (skb_rtable(head)->rt_type != RTN_LOCAL)))
237 goto out_rcu_unlock; 238 goto out_rcu_unlock;
238 239
239 240
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1cbeba5edff9..a52f50187b54 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
211 return -EINVAL; 211 return -EINVAL;
212} 212}
213 213
214static int ip_finish_output_gso(struct sk_buff *skb)
215{
216 netdev_features_t features;
217 struct sk_buff *segs;
218 int ret = 0;
219
220 /* common case: locally created skb or seglen is <= mtu */
221 if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
222 skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
223 return ip_finish_output2(skb);
224
225 /* Slowpath - GSO segment length is exceeding the dst MTU.
226 *
227 * This can happen in two cases:
228 * 1) TCP GRO packet, DF bit not set
229 * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
230 * from host network stack.
231 */
232 features = netif_skb_features(skb);
233 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
234 if (IS_ERR(segs)) {
235 kfree_skb(skb);
236 return -ENOMEM;
237 }
238
239 consume_skb(skb);
240
241 do {
242 struct sk_buff *nskb = segs->next;
243 int err;
244
245 segs->next = NULL;
246 err = ip_fragment(segs, ip_finish_output2);
247
248 if (err && ret == 0)
249 ret = err;
250 segs = nskb;
251 } while (segs);
252
253 return ret;
254}
255
214static int ip_finish_output(struct sk_buff *skb) 256static int ip_finish_output(struct sk_buff *skb)
215{ 257{
216#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 258#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb)
220 return dst_output(skb); 262 return dst_output(skb);
221 } 263 }
222#endif 264#endif
223 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) 265 if (skb_is_gso(skb))
266 return ip_finish_output_gso(skb);
267
268 if (skb->len > ip_skb_dst_mtu(skb))
224 return ip_fragment(skb, ip_finish_output2); 269 return ip_fragment(skb, ip_finish_output2);
225 else 270
226 return ip_finish_output2(skb); 271 return ip_finish_output2(skb);
227} 272}
228 273
229int ip_mc_output(struct sock *sk, struct sk_buff *skb) 274int ip_mc_output(struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fa5b7519765f..2acc2337d38b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -442,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
442 tunnel->i_seqno = ntohl(tpi->seq) + 1; 442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 } 443 }
444 444
445 skb_reset_network_header(skb);
446
445 err = IP_ECN_decapsulate(iph, skb); 447 err = IP_ECN_decapsulate(iph, skb);
446 if (unlikely(err)) { 448 if (unlikely(err)) {
447 if (log_ecn_error) 449 if (log_ecn_error)
@@ -538,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
538 unsigned int max_headroom; /* The extra header space needed */ 540 unsigned int max_headroom; /* The extra header space needed */
539 __be32 dst; 541 __be32 dst;
540 int err; 542 int err;
541 bool connected = true; 543 bool connected;
542 544
543 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 545 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
546 connected = (tunnel->parms.iph.daddr != 0);
544 547
545 dst = tnl_params->daddr; 548 dst = tnl_params->daddr;
546 if (dst == 0) { 549 if (dst == 0) {
@@ -880,6 +883,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
880 */ 883 */
881 if (!IS_ERR(itn->fb_tunnel_dev)) { 884 if (!IS_ERR(itn->fb_tunnel_dev)) {
882 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 885 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
886 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
883 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 887 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
884 } 888 }
885 rtnl_unlock(); 889 rtnl_unlock();
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index afcee51b90ed..13ef00f1e17b 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
239static int vti4_err(struct sk_buff *skb, u32 info) 239static int vti4_err(struct sk_buff *skb, u32 info)
240{ 240{
241 __be32 spi; 241 __be32 spi;
242 __u32 mark;
242 struct xfrm_state *x; 243 struct xfrm_state *x;
243 struct ip_tunnel *tunnel; 244 struct ip_tunnel *tunnel;
244 struct ip_esp_hdr *esph; 245 struct ip_esp_hdr *esph;
@@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info)
254 if (!tunnel) 255 if (!tunnel)
255 return -1; 256 return -1;
256 257
258 mark = be32_to_cpu(tunnel->parms.o_key);
259
257 switch (protocol) { 260 switch (protocol) {
258 case IPPROTO_ESP: 261 case IPPROTO_ESP:
259 esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); 262 esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
@@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info)
281 return 0; 284 return 0;
282 } 285 }
283 286
284 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, 287 x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
285 spi, protocol, AF_INET); 288 spi, protocol, AF_INET);
286 if (!x) 289 if (!x)
287 return 0; 290 return 0;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 12e13bd82b5b..f40f321b41fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
22#endif 22#endif
23#include <net/netfilter/nf_conntrack_zones.h> 23#include <net/netfilter/nf_conntrack_zones.h>
24 24
25/* Returns new sk_buff, or NULL */
26static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 25static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
27{ 26{
28 int err; 27 int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
33 err = ip_defrag(skb, user); 32 err = ip_defrag(skb, user);
34 local_bh_enable(); 33 local_bh_enable();
35 34
36 if (!err) 35 if (!err) {
37 ip_send_check(ip_hdr(skb)); 36 ip_send_check(ip_hdr(skb));
37 skb->local_df = 1;
38 }
38 39
39 return err; 40 return err;
40} 41}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8210964a9f19..044a0ddf6a79 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -236,15 +236,15 @@ exit:
236static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, 236static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
237 kgid_t *high) 237 kgid_t *high)
238{ 238{
239 kgid_t *data = net->ipv4.sysctl_ping_group_range; 239 kgid_t *data = net->ipv4.ping_group_range.range;
240 unsigned int seq; 240 unsigned int seq;
241 241
242 do { 242 do {
243 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 243 seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
244 244
245 *low = data[0]; 245 *low = data[0];
246 *high = data[1]; 246 *high = data[1];
247 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 247 } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
248} 248}
249 249
250 250
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index db1e0da871f4..5e676be3daeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1519,7 +1519,7 @@ static int __mkroute_input(struct sk_buff *skb,
1519 struct in_device *out_dev; 1519 struct in_device *out_dev;
1520 unsigned int flags = 0; 1520 unsigned int flags = 0;
1521 bool do_cache; 1521 bool do_cache;
1522 u32 itag; 1522 u32 itag = 0;
1523 1523
1524 /* get a working reference to the output device */ 1524 /* get a working reference to the output device */
1525 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); 1525 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43d..5cde8f263d40 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
45/* Update system visible IP port range */ 45/* Update system visible IP port range */
46static void set_local_port_range(struct net *net, int range[2]) 46static void set_local_port_range(struct net *net, int range[2])
47{ 47{
48 write_seqlock(&net->ipv4.sysctl_local_ports.lock); 48 write_seqlock(&net->ipv4.ip_local_ports.lock);
49 net->ipv4.sysctl_local_ports.range[0] = range[0]; 49 net->ipv4.ip_local_ports.range[0] = range[0];
50 net->ipv4.sysctl_local_ports.range[1] = range[1]; 50 net->ipv4.ip_local_ports.range[1] = range[1];
51 write_sequnlock(&net->ipv4.sysctl_local_ports.lock); 51 write_sequnlock(&net->ipv4.ip_local_ports.lock);
52} 52}
53 53
54/* Validate changes from /proc interface. */ 54/* Validate changes from /proc interface. */
@@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
57 size_t *lenp, loff_t *ppos) 57 size_t *lenp, loff_t *ppos)
58{ 58{
59 struct net *net = 59 struct net *net =
60 container_of(table->data, struct net, ipv4.sysctl_local_ports.range); 60 container_of(table->data, struct net, ipv4.ip_local_ports.range);
61 int ret; 61 int ret;
62 int range[2]; 62 int range[2];
63 struct ctl_table tmp = { 63 struct ctl_table tmp = {
@@ -87,14 +87,14 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
87{ 87{
88 kgid_t *data = table->data; 88 kgid_t *data = table->data;
89 struct net *net = 89 struct net *net =
90 container_of(table->data, struct net, ipv4.sysctl_ping_group_range); 90 container_of(table->data, struct net, ipv4.ping_group_range.range);
91 unsigned int seq; 91 unsigned int seq;
92 do { 92 do {
93 seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); 93 seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
94 94
95 *low = data[0]; 95 *low = data[0];
96 *high = data[1]; 96 *high = data[1];
97 } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); 97 } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
98} 98}
99 99
100/* Update system visible IP port range */ 100/* Update system visible IP port range */
@@ -102,11 +102,11 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
102{ 102{
103 kgid_t *data = table->data; 103 kgid_t *data = table->data;
104 struct net *net = 104 struct net *net =
105 container_of(table->data, struct net, ipv4.sysctl_ping_group_range); 105 container_of(table->data, struct net, ipv4.ping_group_range.range);
106 write_seqlock(&net->ipv4.sysctl_local_ports.lock); 106 write_seqlock(&net->ipv4.ip_local_ports.lock);
107 data[0] = low; 107 data[0] = low;
108 data[1] = high; 108 data[1] = high;
109 write_sequnlock(&net->ipv4.sysctl_local_ports.lock); 109 write_sequnlock(&net->ipv4.ip_local_ports.lock);
110} 110}
111 111
112/* Validate changes from /proc interface. */ 112/* Validate changes from /proc interface. */
@@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = {
805 }, 805 },
806 { 806 {
807 .procname = "ping_group_range", 807 .procname = "ping_group_range",
808 .data = &init_net.ipv4.sysctl_ping_group_range, 808 .data = &init_net.ipv4.ping_group_range.range,
809 .maxlen = sizeof(gid_t)*2, 809 .maxlen = sizeof(gid_t)*2,
810 .mode = 0644, 810 .mode = 0644,
811 .proc_handler = ipv4_ping_group_range, 811 .proc_handler = ipv4_ping_group_range,
@@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = {
819 }, 819 },
820 { 820 {
821 .procname = "ip_local_port_range", 821 .procname = "ip_local_port_range",
822 .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), 822 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
823 .data = &init_net.ipv4.sysctl_local_ports.range, 823 .data = &init_net.ipv4.ip_local_ports.range,
824 .mode = 0644, 824 .mode = 0644,
825 .proc_handler = ipv4_local_port_range, 825 .proc_handler = ipv4_local_port_range,
826 }, 826 },
@@ -858,20 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
858 table[i].data += (void *)net - (void *)&init_net; 858 table[i].data += (void *)net - (void *)&init_net;
859 } 859 }
860 860
861 /*
862 * Sane defaults - nobody may create ping sockets.
863 * Boot scripts should set this to distro-specific group.
864 */
865 net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
866 net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
867
868 /*
869 * Set defaults for local port range
870 */
871 seqlock_init(&net->ipv4.sysctl_local_ports.lock);
872 net->ipv4.sysctl_local_ports.range[0] = 32768;
873 net->ipv4.sysctl_local_ports.range[1] = 61000;
874
875 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 861 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
876 if (net->ipv4.ipv4_hdr == NULL) 862 if (net->ipv4.ipv4_hdr == NULL)
877 goto err_reg; 863 goto err_reg;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8bf224516ba2..b4f1b29b08bd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
409 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; 409 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
410 ratio += cnt; 410 ratio += cnt;
411 411
412 ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); 412 ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
413 } 413 }
414 414
415 /* Some calls are for duplicates without timetamps */ 415 /* Some calls are for duplicates without timetamps */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6b46eb2f94c..3a26b3b23f16 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2684,13 +2684,12 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2684 bool recovered = !before(tp->snd_una, tp->high_seq); 2684 bool recovered = !before(tp->snd_una, tp->high_seq);
2685 2685
2686 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ 2686 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
2687 if (flag & FLAG_ORIG_SACK_ACKED) { 2687 /* Step 3.b. A timeout is spurious if not all data are
2688 /* Step 3.b. A timeout is spurious if not all data are 2688 * lost, i.e., never-retransmitted data are (s)acked.
2689 * lost, i.e., never-retransmitted data are (s)acked. 2689 */
2690 */ 2690 if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
2691 tcp_try_undo_loss(sk, true);
2692 return; 2691 return;
2693 } 2692
2694 if (after(tp->snd_nxt, tp->high_seq) && 2693 if (after(tp->snd_nxt, tp->high_seq) &&
2695 (flag & FLAG_DATA_SACKED || is_dupack)) { 2694 (flag & FLAG_DATA_SACKED || is_dupack)) {
2696 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ 2695 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 025e25093984..12d6016bdd9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2441,8 +2441,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2441 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2441 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2442 } 2442 }
2443 2443
2444 if (likely(!err)) 2444 if (likely(!err)) {
2445 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 2445 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2446 /* Update global TCP statistics. */
2447 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2448 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2449 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2450 tp->total_retrans++;
2451 }
2446 return err; 2452 return err;
2447} 2453}
2448 2454
@@ -2452,12 +2458,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2452 int err = __tcp_retransmit_skb(sk, skb); 2458 int err = __tcp_retransmit_skb(sk, skb);
2453 2459
2454 if (err == 0) { 2460 if (err == 0) {
2455 /* Update global TCP statistics. */
2456 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
2457 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2458 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2459 tp->total_retrans++;
2460
2461#if FASTRETRANS_DEBUG > 0 2461#if FASTRETRANS_DEBUG > 0
2462 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2462 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2463 net_dbg_ratelimited("retrans_out leaked\n"); 2463 net_dbg_ratelimited("retrans_out leaked\n");
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 40e701f2e1e0..186a8ecf92fa 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
62 if (err) 62 if (err)
63 return err; 63 return err;
64 64
65 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 65 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
66 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
67
68 skb->protocol = htons(ETH_P_IP);
69 66
70 return x->outer_mode->output2(x, skb); 67 return x->outer_mode->output2(x, skb);
71} 68}
@@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
73 70
74int xfrm4_output_finish(struct sk_buff *skb) 71int xfrm4_output_finish(struct sk_buff *skb)
75{ 72{
73 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
74 skb->protocol = htons(ETH_P_IP);
75
76#ifdef CONFIG_NETFILTER
77 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
78#endif
79
80 return xfrm_output(skb);
81}
82
83static int __xfrm4_output(struct sk_buff *skb)
84{
85 struct xfrm_state *x = skb_dst(skb)->xfrm;
86
76#ifdef CONFIG_NETFILTER 87#ifdef CONFIG_NETFILTER
77 if (!skb_dst(skb)->xfrm) { 88 if (!x) {
78 IPCB(skb)->flags |= IPSKB_REROUTED; 89 IPCB(skb)->flags |= IPSKB_REROUTED;
79 return dst_output(skb); 90 return dst_output(skb);
80 } 91 }
81
82 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
83#endif 92#endif
84 93
85 skb->protocol = htons(ETH_P_IP); 94 return x->outer_mode->afinfo->output_finish(skb);
86 return xfrm_output(skb);
87} 95}
88 96
89int xfrm4_output(struct sock *sk, struct sk_buff *skb) 97int xfrm4_output(struct sock *sk, struct sk_buff *skb)
90{ 98{
91 struct dst_entry *dst = skb_dst(skb);
92 struct xfrm_state *x = dst->xfrm;
93
94 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, 99 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
95 NULL, dst->dev, 100 NULL, skb_dst(skb)->dev, __xfrm4_output,
96 x->outer_mode->afinfo->output_finish,
97 !(IPCB(skb)->flags & IPSKB_REROUTED)); 101 !(IPCB(skb)->flags & IPSKB_REROUTED));
98} 102}
99 103
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 7f7b243e8139..a2ce0101eaac 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
50{ 50{
51 int ret; 51 int ret;
52 struct xfrm4_protocol *handler; 52 struct xfrm4_protocol *handler;
53 struct xfrm4_protocol __rcu **head = proto_handlers(protocol);
53 54
54 for_each_protocol_rcu(*proto_handlers(protocol), handler) 55 if (!head)
56 return 0;
57
58 for_each_protocol_rcu(*head, handler)
55 if ((ret = handler->cb_handler(skb, err)) <= 0) 59 if ((ret = handler->cb_handler(skb, err)) <= 0)
56 return ret; 60 return ret;
57 61
@@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
64{ 68{
65 int ret; 69 int ret;
66 struct xfrm4_protocol *handler; 70 struct xfrm4_protocol *handler;
71 struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr);
67 72
68 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; 73 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
69 XFRM_SPI_SKB_CB(skb)->family = AF_INET; 74 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
70 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); 75 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
71 76
72 for_each_protocol_rcu(*proto_handlers(nexthdr), handler) 77 if (!head)
78 goto out;
79
80 for_each_protocol_rcu(*head, handler)
73 if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) 81 if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
74 return ret; 82 return ret;
75 83
84out:
76 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 85 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
77 86
78 kfree_skb(skb); 87 kfree_skb(skb);
@@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler,
208 int ret = -EEXIST; 217 int ret = -EEXIST;
209 int priority = handler->priority; 218 int priority = handler->priority;
210 219
220 if (!proto_handlers(protocol) || !netproto(protocol))
221 return -EINVAL;
222
211 mutex_lock(&xfrm4_protocol_mutex); 223 mutex_lock(&xfrm4_protocol_mutex);
212 224
213 if (!rcu_dereference_protected(*proto_handlers(protocol), 225 if (!rcu_dereference_protected(*proto_handlers(protocol),
@@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler,
250 struct xfrm4_protocol *t; 262 struct xfrm4_protocol *t;
251 int ret = -ENOENT; 263 int ret = -ENOENT;
252 264
265 if (!proto_handlers(protocol) || !netproto(protocol))
266 return -EINVAL;
267
253 mutex_lock(&xfrm4_protocol_mutex); 268 mutex_lock(&xfrm4_protocol_mutex);
254 269
255 for (pprev = proto_handlers(protocol); 270 for (pprev = proto_handlers(protocol);