aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-01-24 06:47:48 -0500
committerIngo Molnar <mingo@kernel.org>2013-01-24 06:47:48 -0500
commitbefddb21c845f8fb49e637997891ef97c6a869dc (patch)
tree0e7629123184f2dd50291ad6d477b894175f0f26 /net/ipv4
parente716efde75267eab919cdb2bef5b2cb77f305326 (diff)
parent7d1f9aeff1ee4a20b1aeb377dd0f579fe9647619 (diff)
Merge tag 'v3.8-rc4' into irq/core
Merge Linux 3.8-rc4 before pulling in new commits - we were on an old v3.7 base. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c93
-rw-r--r--net/ipv4/arp.c12
-rw-r--r--net/ipv4/devinet.c200
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c41
-rw-r--r--net/ipv4/inet_diag.c164
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/ip_fragment.c23
-rw-r--r--net/ipv4/ip_gre.c45
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c42
-rw-r--r--net/ipv4/ip_vti.c31
-rw-r--r--net/ipv4/ipconfig.c14
-rw-r--r--net/ipv4/ipip.c271
-rw-r--r--net/ipv4/ipmr.c141
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c9
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c1
-rw-r--r--net/ipv4/netfilter/iptable_nat.c19
-rw-r--r--net/ipv4/protocol.c21
-rw-r--r--net/ipv4/route.c36
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp.c59
-rw-r--r--net/ipv4/tcp_cong.c5
-rw-r--r--net/ipv4/tcp_illinois.c8
-rw-r--r--net/ipv4/tcp_input.c83
-rw-r--r--net/ipv4/tcp_ipv4.c44
-rw-r--r--net/ipv4/tcp_metrics.c14
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_output.c24
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/xfrm4_policy.c13
37 files changed, 1117 insertions, 386 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 766c59658563..24b384b7903e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -346,7 +346,8 @@ lookup_protocol:
346 } 346 }
347 347
348 err = -EPERM; 348 err = -EPERM;
349 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) 349 if (sock->type == SOCK_RAW && !kern &&
350 !ns_capable(net->user_ns, CAP_NET_RAW))
350 goto out_rcu_unlock; 351 goto out_rcu_unlock;
351 352
352 err = -EAFNOSUPPORT; 353 err = -EAFNOSUPPORT;
@@ -473,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
473 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 474 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
474 struct sock *sk = sock->sk; 475 struct sock *sk = sock->sk;
475 struct inet_sock *inet = inet_sk(sk); 476 struct inet_sock *inet = inet_sk(sk);
477 struct net *net = sock_net(sk);
476 unsigned short snum; 478 unsigned short snum;
477 int chk_addr_ret; 479 int chk_addr_ret;
478 int err; 480 int err;
@@ -496,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
496 goto out; 498 goto out;
497 } 499 }
498 500
499 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 501 chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
500 502
501 /* Not specified by any standard per-se, however it breaks too 503 /* Not specified by any standard per-se, however it breaks too
502 * many applications when removed. It is unfortunate since 504 * many applications when removed. It is unfortunate since
@@ -516,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
516 518
517 snum = ntohs(addr->sin_port); 519 snum = ntohs(addr->sin_port);
518 err = -EACCES; 520 err = -EACCES;
519 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 521 if (snum && snum < PROT_SOCK &&
522 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
520 goto out; 523 goto out;
521 524
522 /* We keep a pair of addresses. rcv_saddr is the one 525 /* We keep a pair of addresses. rcv_saddr is the one
@@ -1251,7 +1254,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1251 1254
1252static int inet_gso_send_check(struct sk_buff *skb) 1255static int inet_gso_send_check(struct sk_buff *skb)
1253{ 1256{
1254 const struct net_protocol *ops; 1257 const struct net_offload *ops;
1255 const struct iphdr *iph; 1258 const struct iphdr *iph;
1256 int proto; 1259 int proto;
1257 int ihl; 1260 int ihl;
@@ -1275,9 +1278,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
1275 err = -EPROTONOSUPPORT; 1278 err = -EPROTONOSUPPORT;
1276 1279
1277 rcu_read_lock(); 1280 rcu_read_lock();
1278 ops = rcu_dereference(inet_protos[proto]); 1281 ops = rcu_dereference(inet_offloads[proto]);
1279 if (likely(ops && ops->gso_send_check)) 1282 if (likely(ops && ops->callbacks.gso_send_check))
1280 err = ops->gso_send_check(skb); 1283 err = ops->callbacks.gso_send_check(skb);
1281 rcu_read_unlock(); 1284 rcu_read_unlock();
1282 1285
1283out: 1286out:
@@ -1288,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1288 netdev_features_t features) 1291 netdev_features_t features)
1289{ 1292{
1290 struct sk_buff *segs = ERR_PTR(-EINVAL); 1293 struct sk_buff *segs = ERR_PTR(-EINVAL);
1291 const struct net_protocol *ops; 1294 const struct net_offload *ops;
1292 struct iphdr *iph; 1295 struct iphdr *iph;
1293 int proto; 1296 int proto;
1294 int ihl; 1297 int ihl;
@@ -1325,9 +1328,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1325 segs = ERR_PTR(-EPROTONOSUPPORT); 1328 segs = ERR_PTR(-EPROTONOSUPPORT);
1326 1329
1327 rcu_read_lock(); 1330 rcu_read_lock();
1328 ops = rcu_dereference(inet_protos[proto]); 1331 ops = rcu_dereference(inet_offloads[proto]);
1329 if (likely(ops && ops->gso_segment)) 1332 if (likely(ops && ops->callbacks.gso_segment))
1330 segs = ops->gso_segment(skb, features); 1333 segs = ops->callbacks.gso_segment(skb, features);
1331 rcu_read_unlock(); 1334 rcu_read_unlock();
1332 1335
1333 if (!segs || IS_ERR(segs)) 1336 if (!segs || IS_ERR(segs))
@@ -1356,7 +1359,7 @@ out:
1356static struct sk_buff **inet_gro_receive(struct sk_buff **head, 1359static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1357 struct sk_buff *skb) 1360 struct sk_buff *skb)
1358{ 1361{
1359 const struct net_protocol *ops; 1362 const struct net_offload *ops;
1360 struct sk_buff **pp = NULL; 1363 struct sk_buff **pp = NULL;
1361 struct sk_buff *p; 1364 struct sk_buff *p;
1362 const struct iphdr *iph; 1365 const struct iphdr *iph;
@@ -1378,8 +1381,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1378 proto = iph->protocol; 1381 proto = iph->protocol;
1379 1382
1380 rcu_read_lock(); 1383 rcu_read_lock();
1381 ops = rcu_dereference(inet_protos[proto]); 1384 ops = rcu_dereference(inet_offloads[proto]);
1382 if (!ops || !ops->gro_receive) 1385 if (!ops || !ops->callbacks.gro_receive)
1383 goto out_unlock; 1386 goto out_unlock;
1384 1387
1385 if (*(u8 *)iph != 0x45) 1388 if (*(u8 *)iph != 0x45)
@@ -1420,7 +1423,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1420 skb_gro_pull(skb, sizeof(*iph)); 1423 skb_gro_pull(skb, sizeof(*iph));
1421 skb_set_transport_header(skb, skb_gro_offset(skb)); 1424 skb_set_transport_header(skb, skb_gro_offset(skb));
1422 1425
1423 pp = ops->gro_receive(head, skb); 1426 pp = ops->callbacks.gro_receive(head, skb);
1424 1427
1425out_unlock: 1428out_unlock:
1426 rcu_read_unlock(); 1429 rcu_read_unlock();
@@ -1435,7 +1438,7 @@ static int inet_gro_complete(struct sk_buff *skb)
1435{ 1438{
1436 __be16 newlen = htons(skb->len - skb_network_offset(skb)); 1439 __be16 newlen = htons(skb->len - skb_network_offset(skb));
1437 struct iphdr *iph = ip_hdr(skb); 1440 struct iphdr *iph = ip_hdr(skb);
1438 const struct net_protocol *ops; 1441 const struct net_offload *ops;
1439 int proto = iph->protocol; 1442 int proto = iph->protocol;
1440 int err = -ENOSYS; 1443 int err = -ENOSYS;
1441 1444
@@ -1443,11 +1446,11 @@ static int inet_gro_complete(struct sk_buff *skb)
1443 iph->tot_len = newlen; 1446 iph->tot_len = newlen;
1444 1447
1445 rcu_read_lock(); 1448 rcu_read_lock();
1446 ops = rcu_dereference(inet_protos[proto]); 1449 ops = rcu_dereference(inet_offloads[proto]);
1447 if (WARN_ON(!ops || !ops->gro_complete)) 1450 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1448 goto out_unlock; 1451 goto out_unlock;
1449 1452
1450 err = ops->gro_complete(skb); 1453 err = ops->callbacks.gro_complete(skb);
1451 1454
1452out_unlock: 1455out_unlock:
1453 rcu_read_unlock(); 1456 rcu_read_unlock();
@@ -1558,23 +1561,33 @@ static const struct net_protocol tcp_protocol = {
1558 .early_demux = tcp_v4_early_demux, 1561 .early_demux = tcp_v4_early_demux,
1559 .handler = tcp_v4_rcv, 1562 .handler = tcp_v4_rcv,
1560 .err_handler = tcp_v4_err, 1563 .err_handler = tcp_v4_err,
1561 .gso_send_check = tcp_v4_gso_send_check,
1562 .gso_segment = tcp_tso_segment,
1563 .gro_receive = tcp4_gro_receive,
1564 .gro_complete = tcp4_gro_complete,
1565 .no_policy = 1, 1564 .no_policy = 1,
1566 .netns_ok = 1, 1565 .netns_ok = 1,
1567}; 1566};
1568 1567
1568static const struct net_offload tcp_offload = {
1569 .callbacks = {
1570 .gso_send_check = tcp_v4_gso_send_check,
1571 .gso_segment = tcp_tso_segment,
1572 .gro_receive = tcp4_gro_receive,
1573 .gro_complete = tcp4_gro_complete,
1574 },
1575};
1576
1569static const struct net_protocol udp_protocol = { 1577static const struct net_protocol udp_protocol = {
1570 .handler = udp_rcv, 1578 .handler = udp_rcv,
1571 .err_handler = udp_err, 1579 .err_handler = udp_err,
1572 .gso_send_check = udp4_ufo_send_check,
1573 .gso_segment = udp4_ufo_fragment,
1574 .no_policy = 1, 1580 .no_policy = 1,
1575 .netns_ok = 1, 1581 .netns_ok = 1,
1576}; 1582};
1577 1583
1584static const struct net_offload udp_offload = {
1585 .callbacks = {
1586 .gso_send_check = udp4_ufo_send_check,
1587 .gso_segment = udp4_ufo_fragment,
1588 },
1589};
1590
1578static const struct net_protocol icmp_protocol = { 1591static const struct net_protocol icmp_protocol = {
1579 .handler = icmp_rcv, 1592 .handler = icmp_rcv,
1580 .err_handler = ping_err, 1593 .err_handler = ping_err,
@@ -1659,13 +1672,35 @@ static int ipv4_proc_init(void);
1659 * IP protocol layer initialiser 1672 * IP protocol layer initialiser
1660 */ 1673 */
1661 1674
1675static struct packet_offload ip_packet_offload __read_mostly = {
1676 .type = cpu_to_be16(ETH_P_IP),
1677 .callbacks = {
1678 .gso_send_check = inet_gso_send_check,
1679 .gso_segment = inet_gso_segment,
1680 .gro_receive = inet_gro_receive,
1681 .gro_complete = inet_gro_complete,
1682 },
1683};
1684
1685static int __init ipv4_offload_init(void)
1686{
1687 /*
1688 * Add offloads
1689 */
1690 if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0)
1691 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1692 if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0)
1693 pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__);
1694
1695 dev_add_offload(&ip_packet_offload);
1696 return 0;
1697}
1698
1699fs_initcall(ipv4_offload_init);
1700
1662static struct packet_type ip_packet_type __read_mostly = { 1701static struct packet_type ip_packet_type __read_mostly = {
1663 .type = cpu_to_be16(ETH_P_IP), 1702 .type = cpu_to_be16(ETH_P_IP),
1664 .func = ip_rcv, 1703 .func = ip_rcv,
1665 .gso_send_check = inet_gso_send_check,
1666 .gso_segment = inet_gso_segment,
1667 .gro_receive = inet_gro_receive,
1668 .gro_complete = inet_gro_complete,
1669}; 1704};
1670 1705
1671static int __init inet_init(void) 1706static int __init inet_init(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 47800459e4cb..9547a273b9e9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -321,7 +321,7 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
321static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) 321static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
322{ 322{
323 __be32 saddr = 0; 323 __be32 saddr = 0;
324 u8 *dst_ha = NULL; 324 u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
325 struct net_device *dev = neigh->dev; 325 struct net_device *dev = neigh->dev;
326 __be32 target = *(__be32 *)neigh->primary_key; 326 __be32 target = *(__be32 *)neigh->primary_key;
327 int probes = atomic_read(&neigh->probes); 327 int probes = atomic_read(&neigh->probes);
@@ -363,8 +363,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
363 if (probes < 0) { 363 if (probes < 0) {
364 if (!(neigh->nud_state & NUD_VALID)) 364 if (!(neigh->nud_state & NUD_VALID))
365 pr_debug("trying to ucast probe in NUD_INVALID\n"); 365 pr_debug("trying to ucast probe in NUD_INVALID\n");
366 dst_ha = neigh->ha; 366 neigh_ha_snapshot(dst_ha, neigh, dev);
367 read_lock_bh(&neigh->lock); 367 dst_hw = dst_ha;
368 } else { 368 } else {
369 probes -= neigh->parms->app_probes; 369 probes -= neigh->parms->app_probes;
370 if (probes < 0) { 370 if (probes < 0) {
@@ -376,9 +376,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
376 } 376 }
377 377
378 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 378 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
379 dst_ha, dev->dev_addr, NULL); 379 dst_hw, dev->dev_addr, NULL);
380 if (dst_ha)
381 read_unlock_bh(&neigh->lock);
382} 380}
383 381
384static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) 382static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -1161,7 +1159,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1161 switch (cmd) { 1159 switch (cmd) {
1162 case SIOCDARP: 1160 case SIOCDARP:
1163 case SIOCSARP: 1161 case SIOCSARP:
1164 if (!capable(CAP_NET_ADMIN)) 1162 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1165 return -EPERM; 1163 return -EPERM;
1166 case SIOCGARP: 1164 case SIOCGARP:
1167 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1165 err = copy_from_user(&r, arg, sizeof(struct arpreq));
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2a6abc163ed2..a8e4f2665d5e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -55,6 +55,7 @@
55#include <linux/sysctl.h> 55#include <linux/sysctl.h>
56#endif 56#endif
57#include <linux/kmod.h> 57#include <linux/kmod.h>
58#include <linux/netconf.h>
58 59
59#include <net/arp.h> 60#include <net/arp.h>
60#include <net/ip.h> 61#include <net/ip.h>
@@ -723,7 +724,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
723 724
724 case SIOCSIFFLAGS: 725 case SIOCSIFFLAGS:
725 ret = -EPERM; 726 ret = -EPERM;
726 if (!capable(CAP_NET_ADMIN)) 727 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
727 goto out; 728 goto out;
728 break; 729 break;
729 case SIOCSIFADDR: /* Set interface address (and family) */ 730 case SIOCSIFADDR: /* Set interface address (and family) */
@@ -731,7 +732,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
731 case SIOCSIFDSTADDR: /* Set the destination address */ 732 case SIOCSIFDSTADDR: /* Set the destination address */
732 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 733 case SIOCSIFNETMASK: /* Set the netmask for the interface */
733 ret = -EPERM; 734 ret = -EPERM;
734 if (!capable(CAP_NET_ADMIN)) 735 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
735 goto out; 736 goto out;
736 ret = -EINVAL; 737 ret = -EINVAL;
737 if (sin->sin_family != AF_INET) 738 if (sin->sin_family != AF_INET)
@@ -822,9 +823,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
822 if (!ifa) { 823 if (!ifa) {
823 ret = -ENOBUFS; 824 ret = -ENOBUFS;
824 ifa = inet_alloc_ifa(); 825 ifa = inet_alloc_ifa();
825 INIT_HLIST_NODE(&ifa->hash);
826 if (!ifa) 826 if (!ifa)
827 break; 827 break;
828 INIT_HLIST_NODE(&ifa->hash);
828 if (colon) 829 if (colon)
829 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 830 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830 else 831 else
@@ -1442,6 +1443,155 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1442 return 0; 1443 return 0;
1443} 1444}
1444 1445
1446static int inet_netconf_msgsize_devconf(int type)
1447{
1448 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449 + nla_total_size(4); /* NETCONFA_IFINDEX */
1450
1451 /* type -1 is used for ALL */
1452 if (type == -1 || type == NETCONFA_FORWARDING)
1453 size += nla_total_size(4);
1454 if (type == -1 || type == NETCONFA_RP_FILTER)
1455 size += nla_total_size(4);
1456 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1457 size += nla_total_size(4);
1458
1459 return size;
1460}
1461
1462static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1463 struct ipv4_devconf *devconf, u32 portid,
1464 u32 seq, int event, unsigned int flags,
1465 int type)
1466{
1467 struct nlmsghdr *nlh;
1468 struct netconfmsg *ncm;
1469
1470 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1471 flags);
1472 if (nlh == NULL)
1473 return -EMSGSIZE;
1474
1475 ncm = nlmsg_data(nlh);
1476 ncm->ncm_family = AF_INET;
1477
1478 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1479 goto nla_put_failure;
1480
1481 /* type -1 is used for ALL */
1482 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1483 nla_put_s32(skb, NETCONFA_FORWARDING,
1484 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1485 goto nla_put_failure;
1486 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1487 nla_put_s32(skb, NETCONFA_RP_FILTER,
1488 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1489 goto nla_put_failure;
1490 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1491 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1492 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1493 goto nla_put_failure;
1494
1495 return nlmsg_end(skb, nlh);
1496
1497nla_put_failure:
1498 nlmsg_cancel(skb, nlh);
1499 return -EMSGSIZE;
1500}
1501
1502void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1503 struct ipv4_devconf *devconf)
1504{
1505 struct sk_buff *skb;
1506 int err = -ENOBUFS;
1507
1508 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1509 if (skb == NULL)
1510 goto errout;
1511
1512 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1513 RTM_NEWNETCONF, 0, type);
1514 if (err < 0) {
1515 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1516 WARN_ON(err == -EMSGSIZE);
1517 kfree_skb(skb);
1518 goto errout;
1519 }
1520 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1521 return;
1522errout:
1523 if (err < 0)
1524 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1525}
1526
1527static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1528 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1529 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1530 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1531};
1532
1533static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1534 struct nlmsghdr *nlh,
1535 void *arg)
1536{
1537 struct net *net = sock_net(in_skb->sk);
1538 struct nlattr *tb[NETCONFA_MAX+1];
1539 struct netconfmsg *ncm;
1540 struct sk_buff *skb;
1541 struct ipv4_devconf *devconf;
1542 struct in_device *in_dev;
1543 struct net_device *dev;
1544 int ifindex;
1545 int err;
1546
1547 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1548 devconf_ipv4_policy);
1549 if (err < 0)
1550 goto errout;
1551
1552 err = EINVAL;
1553 if (!tb[NETCONFA_IFINDEX])
1554 goto errout;
1555
1556 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1557 switch (ifindex) {
1558 case NETCONFA_IFINDEX_ALL:
1559 devconf = net->ipv4.devconf_all;
1560 break;
1561 case NETCONFA_IFINDEX_DEFAULT:
1562 devconf = net->ipv4.devconf_dflt;
1563 break;
1564 default:
1565 dev = __dev_get_by_index(net, ifindex);
1566 if (dev == NULL)
1567 goto errout;
1568 in_dev = __in_dev_get_rtnl(dev);
1569 if (in_dev == NULL)
1570 goto errout;
1571 devconf = &in_dev->cnf;
1572 break;
1573 }
1574
1575 err = -ENOBUFS;
1576 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1577 if (skb == NULL)
1578 goto errout;
1579
1580 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1581 NETLINK_CB(in_skb).portid,
1582 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1583 -1);
1584 if (err < 0) {
1585 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1586 WARN_ON(err == -EMSGSIZE);
1587 kfree_skb(skb);
1588 goto errout;
1589 }
1590 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1591errout:
1592 return err;
1593}
1594
1445#ifdef CONFIG_SYSCTL 1595#ifdef CONFIG_SYSCTL
1446 1596
1447static void devinet_copy_dflt_conf(struct net *net, int i) 1597static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1467,6 +1617,12 @@ static void inet_forward_change(struct net *net)
1467 1617
1468 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1618 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1619 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1620 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1621 NETCONFA_IFINDEX_ALL,
1622 net->ipv4.devconf_all);
1623 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1624 NETCONFA_IFINDEX_DEFAULT,
1625 net->ipv4.devconf_dflt);
1470 1626
1471 for_each_netdev(net, dev) { 1627 for_each_netdev(net, dev) {
1472 struct in_device *in_dev; 1628 struct in_device *in_dev;
@@ -1474,8 +1630,11 @@ static void inet_forward_change(struct net *net)
1474 dev_disable_lro(dev); 1630 dev_disable_lro(dev);
1475 rcu_read_lock(); 1631 rcu_read_lock();
1476 in_dev = __in_dev_get_rcu(dev); 1632 in_dev = __in_dev_get_rcu(dev);
1477 if (in_dev) 1633 if (in_dev) {
1478 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1634 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1635 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1636 dev->ifindex, &in_dev->cnf);
1637 }
1479 rcu_read_unlock(); 1638 rcu_read_unlock();
1480 } 1639 }
1481} 1640}
@@ -1501,6 +1660,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1501 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1660 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502 if ((new_value == 0) && (old_value != 0)) 1661 if ((new_value == 0) && (old_value != 0))
1503 rt_cache_flush(net); 1662 rt_cache_flush(net);
1663 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1664 new_value != old_value) {
1665 int ifindex;
1666
1667 if (cnf == net->ipv4.devconf_dflt)
1668 ifindex = NETCONFA_IFINDEX_DEFAULT;
1669 else if (cnf == net->ipv4.devconf_all)
1670 ifindex = NETCONFA_IFINDEX_ALL;
1671 else {
1672 struct in_device *idev =
1673 container_of(cnf, struct in_device,
1674 cnf);
1675 ifindex = idev->dev->ifindex;
1676 }
1677 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1678 ifindex, cnf);
1679 }
1504 } 1680 }
1505 1681
1506 return ret; 1682 return ret;
@@ -1527,15 +1703,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1527 } 1703 }
1528 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1704 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 inet_forward_change(net); 1705 inet_forward_change(net);
1530 } else if (*valp) { 1706 } else {
1531 struct ipv4_devconf *cnf = ctl->extra1; 1707 struct ipv4_devconf *cnf = ctl->extra1;
1532 struct in_device *idev = 1708 struct in_device *idev =
1533 container_of(cnf, struct in_device, cnf); 1709 container_of(cnf, struct in_device, cnf);
1534 dev_disable_lro(idev->dev); 1710 if (*valp)
1711 dev_disable_lro(idev->dev);
1712 inet_netconf_notify_devconf(net,
1713 NETCONFA_FORWARDING,
1714 idev->dev->ifindex,
1715 cnf);
1535 } 1716 }
1536 rtnl_unlock(); 1717 rtnl_unlock();
1537 rt_cache_flush(net); 1718 rt_cache_flush(net);
1538 } 1719 } else
1720 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1721 NETCONFA_IFINDEX_DEFAULT,
1722 net->ipv4.devconf_dflt);
1539 } 1723 }
1540 1724
1541 return ret; 1725 return ret;
@@ -1809,5 +1993,7 @@ void __init devinet_init(void)
1809 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 1993 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 1994 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 1995 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1996 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1997 NULL, NULL);
1812} 1998}
1813 1999
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 825c608826de..5cd75e2dab2c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -488,7 +488,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
488 switch (cmd) { 488 switch (cmd) {
489 case SIOCADDRT: /* Add a route */ 489 case SIOCADDRT: /* Add a route */
490 case SIOCDELRT: /* Delete a route */ 490 case SIOCDELRT: /* Delete a route */
491 if (!capable(CAP_NET_ADMIN)) 491 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
492 return -EPERM; 492 return -EPERM;
493 493
494 if (copy_from_user(&rt, arg, sizeof(rt))) 494 if (copy_from_user(&rt, arg, sizeof(rt)))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 71b125cd5db1..4797a800faf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
803 unsigned int bytes; 803 unsigned int bytes;
804 804
805 if (!new_size) 805 if (!new_size)
806 new_size = 1; 806 new_size = 16;
807 bytes = new_size * sizeof(struct hlist_head *); 807 bytes = new_size * sizeof(struct hlist_head *);
808 new_info_hash = fib_info_hash_alloc(bytes); 808 new_info_hash = fib_info_hash_alloc(bytes);
809 new_laddrhash = fib_info_hash_alloc(bytes); 809 new_laddrhash = fib_info_hash_alloc(bytes);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f2eccd531746..17ff9fd7cdda 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -257,7 +257,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
257 struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); 257 struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
258 rc = inet_peer_xrlim_allow(peer, 258 rc = inet_peer_xrlim_allow(peer,
259 net->ipv4.sysctl_icmp_ratelimit); 259 net->ipv4.sysctl_icmp_ratelimit);
260 inet_putpeer(peer); 260 if (peer)
261 inet_putpeer(peer);
261 } 262 }
262out: 263out:
263 return rc; 264 return rc;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d34ce2972c8f..d0670f00d524 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
521 int *expire, int *resend) 521 int *expire, int *resend)
522{ 522{
523 if (!rskq_defer_accept) { 523 if (!rskq_defer_accept) {
524 *expire = req->retrans >= thresh; 524 *expire = req->num_timeout >= thresh;
525 *resend = 1; 525 *resend = 1;
526 return; 526 return;
527 } 527 }
528 *expire = req->retrans >= thresh && 528 *expire = req->num_timeout >= thresh &&
529 (!inet_rsk(req)->acked || req->retrans >= max_retries); 529 (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
530 /* 530 /*
531 * Do not resend while waiting for data after ACK, 531 * Do not resend while waiting for data after ACK,
532 * start to resend on end of deferring period to give 532 * start to resend on end of deferring period to give
533 * last chance for data or ACK to create established socket. 533 * last chance for data or ACK to create established socket.
534 */ 534 */
535 *resend = !inet_rsk(req)->acked || 535 *resend = !inet_rsk(req)->acked ||
536 req->retrans >= rskq_defer_accept - 1; 536 req->num_timeout >= rskq_defer_accept - 1;
537} 537}
538 538
539int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
540{
541 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
542
543 if (!err)
544 req->num_retrans++;
545 return err;
546}
547EXPORT_SYMBOL(inet_rtx_syn_ack);
548
539void inet_csk_reqsk_queue_prune(struct sock *parent, 549void inet_csk_reqsk_queue_prune(struct sock *parent,
540 const unsigned long interval, 550 const unsigned long interval,
541 const unsigned long timeout, 551 const unsigned long timeout,
@@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
599 req->rsk_ops->syn_ack_timeout(parent, req); 609 req->rsk_ops->syn_ack_timeout(parent, req);
600 if (!expire && 610 if (!expire &&
601 (!resend || 611 (!resend ||
602 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 612 !inet_rtx_syn_ack(parent, req) ||
603 inet_rsk(req)->acked)) { 613 inet_rsk(req)->acked)) {
604 unsigned long timeo; 614 unsigned long timeo;
605 615
606 if (req->retrans++ == 0) 616 if (req->num_timeout++ == 0)
607 lopt->qlen_young--; 617 lopt->qlen_young--;
608 timeo = min((timeout << req->retrans), max_rto); 618 timeo = min(timeout << req->num_timeout,
619 max_rto);
609 req->expires = now + timeo; 620 req->expires = now + timeo;
610 reqp = &req->dl_next; 621 reqp = &req->dl_next;
611 continue; 622 continue;
@@ -699,6 +710,22 @@ void inet_csk_destroy_sock(struct sock *sk)
699} 710}
700EXPORT_SYMBOL(inet_csk_destroy_sock); 711EXPORT_SYMBOL(inet_csk_destroy_sock);
701 712
713/* This function allows to force a closure of a socket after the call to
714 * tcp/dccp_create_openreq_child().
715 */
716void inet_csk_prepare_forced_close(struct sock *sk)
717{
718 /* sk_clone_lock locked the socket and set refcnt to 2 */
719 bh_unlock_sock(sk);
720 sock_put(sk);
721
722 /* The below has to be done to allow calling inet_csk_destroy_sock */
723 sock_set_flag(sk, SOCK_DEAD);
724 percpu_counter_inc(sk->sk_prot->orphan_count);
725 inet_sk(sk)->inet_num = 0;
726}
727EXPORT_SYMBOL(inet_csk_prepare_forced_close);
728
702int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 729int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
703{ 730{
704 struct inet_sock *inet = inet_sk(sk); 731 struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 535584c00f91..7afa2c3c788f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -44,6 +44,10 @@ struct inet_diag_entry {
44 u16 dport; 44 u16 dport;
45 u16 family; 45 u16 family;
46 u16 userlocks; 46 u16 userlocks;
47#if IS_ENABLED(CONFIG_IPV6)
48 struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */
49 struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */
50#endif
47}; 51};
48 52
49static DEFINE_MUTEX(inet_diag_table_mutex); 53static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -105,6 +109,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
105 r->id.idiag_src[0] = inet->inet_rcv_saddr; 109 r->id.idiag_src[0] = inet->inet_rcv_saddr;
106 r->id.idiag_dst[0] = inet->inet_daddr; 110 r->id.idiag_dst[0] = inet->inet_daddr;
107 111
112 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
113 goto errout;
114
108 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 115 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
109 * hence this needs to be included regardless of socket family. 116 * hence this needs to be included regardless of socket family.
110 */ 117 */
@@ -428,25 +435,31 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
428 break; 435 break;
429 } 436 }
430 437
431 if (cond->prefix_len == 0)
432 break;
433
434 if (op->code == INET_DIAG_BC_S_COND) 438 if (op->code == INET_DIAG_BC_S_COND)
435 addr = entry->saddr; 439 addr = entry->saddr;
436 else 440 else
437 addr = entry->daddr; 441 addr = entry->daddr;
438 442
443 if (cond->family != AF_UNSPEC &&
444 cond->family != entry->family) {
445 if (entry->family == AF_INET6 &&
446 cond->family == AF_INET) {
447 if (addr[0] == 0 && addr[1] == 0 &&
448 addr[2] == htonl(0xffff) &&
449 bitstring_match(addr + 3,
450 cond->addr,
451 cond->prefix_len))
452 break;
453 }
454 yes = 0;
455 break;
456 }
457
458 if (cond->prefix_len == 0)
459 break;
439 if (bitstring_match(addr, cond->addr, 460 if (bitstring_match(addr, cond->addr,
440 cond->prefix_len)) 461 cond->prefix_len))
441 break; 462 break;
442 if (entry->family == AF_INET6 &&
443 cond->family == AF_INET) {
444 if (addr[0] == 0 && addr[1] == 0 &&
445 addr[2] == htonl(0xffff) &&
446 bitstring_match(addr + 3, cond->addr,
447 cond->prefix_len))
448 break;
449 }
450 yes = 0; 463 yes = 0;
451 break; 464 break;
452 } 465 }
@@ -509,6 +522,55 @@ static int valid_cc(const void *bc, int len, int cc)
509 return 0; 522 return 0;
510} 523}
511 524
525/* Validate an inet_diag_hostcond. */
526static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
527 int *min_len)
528{
529 int addr_len;
530 struct inet_diag_hostcond *cond;
531
532 /* Check hostcond space. */
533 *min_len += sizeof(struct inet_diag_hostcond);
534 if (len < *min_len)
535 return false;
536 cond = (struct inet_diag_hostcond *)(op + 1);
537
538 /* Check address family and address length. */
539 switch (cond->family) {
540 case AF_UNSPEC:
541 addr_len = 0;
542 break;
543 case AF_INET:
544 addr_len = sizeof(struct in_addr);
545 break;
546 case AF_INET6:
547 addr_len = sizeof(struct in6_addr);
548 break;
549 default:
550 return false;
551 }
552 *min_len += addr_len;
553 if (len < *min_len)
554 return false;
555
556 /* Check prefix length (in bits) vs address length (in bytes). */
557 if (cond->prefix_len > 8 * addr_len)
558 return false;
559
560 return true;
561}
562
563/* Validate a port comparison operator. */
564static inline bool valid_port_comparison(const struct inet_diag_bc_op *op,
565 int len, int *min_len)
566{
567 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
568 *min_len += sizeof(struct inet_diag_bc_op);
569 if (len < *min_len)
570 return false;
571 return true;
572}
573
512static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) 574static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
513{ 575{
514 const void *bc = bytecode; 576 const void *bc = bytecode;
@@ -516,29 +578,39 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
516 578
517 while (len > 0) { 579 while (len > 0) {
518 const struct inet_diag_bc_op *op = bc; 580 const struct inet_diag_bc_op *op = bc;
581 int min_len = sizeof(struct inet_diag_bc_op);
519 582
520//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); 583//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
521 switch (op->code) { 584 switch (op->code) {
522 case INET_DIAG_BC_AUTO:
523 case INET_DIAG_BC_S_COND: 585 case INET_DIAG_BC_S_COND:
524 case INET_DIAG_BC_D_COND: 586 case INET_DIAG_BC_D_COND:
587 if (!valid_hostcond(bc, len, &min_len))
588 return -EINVAL;
589 break;
525 case INET_DIAG_BC_S_GE: 590 case INET_DIAG_BC_S_GE:
526 case INET_DIAG_BC_S_LE: 591 case INET_DIAG_BC_S_LE:
527 case INET_DIAG_BC_D_GE: 592 case INET_DIAG_BC_D_GE:
528 case INET_DIAG_BC_D_LE: 593 case INET_DIAG_BC_D_LE:
529 case INET_DIAG_BC_JMP: 594 if (!valid_port_comparison(bc, len, &min_len))
530 if (op->no < 4 || op->no > len + 4 || op->no & 3)
531 return -EINVAL;
532 if (op->no < len &&
533 !valid_cc(bytecode, bytecode_len, len - op->no))
534 return -EINVAL; 595 return -EINVAL;
535 break; 596 break;
597 case INET_DIAG_BC_AUTO:
598 case INET_DIAG_BC_JMP:
536 case INET_DIAG_BC_NOP: 599 case INET_DIAG_BC_NOP:
537 break; 600 break;
538 default: 601 default:
539 return -EINVAL; 602 return -EINVAL;
540 } 603 }
541 if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) 604
605 if (op->code != INET_DIAG_BC_NOP) {
606 if (op->no < min_len || op->no > len + 4 || op->no & 3)
607 return -EINVAL;
608 if (op->no < len &&
609 !valid_cc(bytecode, bytecode_len, len - op->no))
610 return -EINVAL;
611 }
612
613 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3)
542 return -EINVAL; 614 return -EINVAL;
543 bc += op->yes; 615 bc += op->yes;
544 len -= op->yes; 616 len -= op->yes;
@@ -596,6 +668,36 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
596 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 668 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
597} 669}
598 670
671/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses
672 * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6.
673 */
674static inline void inet_diag_req_addrs(const struct sock *sk,
675 const struct request_sock *req,
676 struct inet_diag_entry *entry)
677{
678 struct inet_request_sock *ireq = inet_rsk(req);
679
680#if IS_ENABLED(CONFIG_IPV6)
681 if (sk->sk_family == AF_INET6) {
682 if (req->rsk_ops->family == AF_INET6) {
683 entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32;
684 entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32;
685 } else if (req->rsk_ops->family == AF_INET) {
686 ipv6_addr_set_v4mapped(ireq->loc_addr,
687 &entry->saddr_storage);
688 ipv6_addr_set_v4mapped(ireq->rmt_addr,
689 &entry->daddr_storage);
690 entry->saddr = entry->saddr_storage.s6_addr32;
691 entry->daddr = entry->daddr_storage.s6_addr32;
692 }
693 } else
694#endif
695 {
696 entry->saddr = &ireq->loc_addr;
697 entry->daddr = &ireq->rmt_addr;
698 }
699}
700
599static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, 701static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
600 struct request_sock *req, 702 struct request_sock *req,
601 struct user_namespace *user_ns, 703 struct user_namespace *user_ns,
@@ -617,7 +719,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
617 r->idiag_family = sk->sk_family; 719 r->idiag_family = sk->sk_family;
618 r->idiag_state = TCP_SYN_RECV; 720 r->idiag_state = TCP_SYN_RECV;
619 r->idiag_timer = 1; 721 r->idiag_timer = 1;
620 r->idiag_retrans = req->retrans; 722 r->idiag_retrans = req->num_retrans;
621 723
622 r->id.idiag_if = sk->sk_bound_dev_if; 724 r->id.idiag_if = sk->sk_bound_dev_if;
623 sock_diag_save_cookie(req, r->id.idiag_cookie); 725 sock_diag_save_cookie(req, r->id.idiag_cookie);
@@ -637,8 +739,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
637 r->idiag_inode = 0; 739 r->idiag_inode = 0;
638#if IS_ENABLED(CONFIG_IPV6) 740#if IS_ENABLED(CONFIG_IPV6)
639 if (r->idiag_family == AF_INET6) { 741 if (r->idiag_family == AF_INET6) {
640 *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; 742 struct inet_diag_entry entry;
641 *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; 743 inet_diag_req_addrs(sk, req, &entry);
744 memcpy(r->id.idiag_src, entry.saddr, sizeof(struct in6_addr));
745 memcpy(r->id.idiag_dst, entry.daddr, sizeof(struct in6_addr));
642 } 746 }
643#endif 747#endif
644 748
@@ -691,18 +795,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
691 continue; 795 continue;
692 796
693 if (bc) { 797 if (bc) {
694 entry.saddr = 798 inet_diag_req_addrs(sk, req, &entry);
695#if IS_ENABLED(CONFIG_IPV6)
696 (entry.family == AF_INET6) ?
697 inet6_rsk(req)->loc_addr.s6_addr32 :
698#endif
699 &ireq->loc_addr;
700 entry.daddr =
701#if IS_ENABLED(CONFIG_IPV6)
702 (entry.family == AF_INET6) ?
703 inet6_rsk(req)->rmt_addr.s6_addr32 :
704#endif
705 &ireq->rmt_addr;
706 entry.dport = ntohs(ireq->rmt_port); 799 entry.dport = ntohs(ireq->rmt_port);
707 800
708 if (!inet_diag_bc_run(bc, &entry)) 801 if (!inet_diag_bc_run(bc, &entry))
@@ -892,13 +985,16 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
892 struct inet_diag_req_v2 *r, struct nlattr *bc) 985 struct inet_diag_req_v2 *r, struct nlattr *bc)
893{ 986{
894 const struct inet_diag_handler *handler; 987 const struct inet_diag_handler *handler;
988 int err = 0;
895 989
896 handler = inet_diag_lock_handler(r->sdiag_protocol); 990 handler = inet_diag_lock_handler(r->sdiag_protocol);
897 if (!IS_ERR(handler)) 991 if (!IS_ERR(handler))
898 handler->dump(skb, cb, r, bc); 992 handler->dump(skb, cb, r, bc);
993 else
994 err = PTR_ERR(handler);
899 inet_diag_unlock_handler(handler); 995 inet_diag_unlock_handler(handler);
900 996
901 return skb->len; 997 return err ? : skb->len;
902} 998}
903 999
904static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 1000static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7880af970208..fa3ae8148710 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -237,12 +237,14 @@ struct sock *__inet_lookup_established(struct net *net,
237 rcu_read_lock(); 237 rcu_read_lock();
238begin: 238begin:
239 sk_nulls_for_each_rcu(sk, node, &head->chain) { 239 sk_nulls_for_each_rcu(sk, node, &head->chain) {
240 if (INET_MATCH(sk, net, hash, acookie, 240 if (sk->sk_hash != hash)
241 saddr, daddr, ports, dif)) { 241 continue;
242 if (likely(INET_MATCH(sk, net, acookie,
243 saddr, daddr, ports, dif))) {
242 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 244 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
243 goto begintw; 245 goto begintw;
244 if (unlikely(!INET_MATCH(sk, net, hash, acookie, 246 if (unlikely(!INET_MATCH(sk, net, acookie,
245 saddr, daddr, ports, dif))) { 247 saddr, daddr, ports, dif))) {
246 sock_put(sk); 248 sock_put(sk);
247 goto begin; 249 goto begin;
248 } 250 }
@@ -260,14 +262,18 @@ begin:
260begintw: 262begintw:
261 /* Must check for a TIME_WAIT'er before going to listener hash. */ 263 /* Must check for a TIME_WAIT'er before going to listener hash. */
262 sk_nulls_for_each_rcu(sk, node, &head->twchain) { 264 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
263 if (INET_TW_MATCH(sk, net, hash, acookie, 265 if (sk->sk_hash != hash)
264 saddr, daddr, ports, dif)) { 266 continue;
267 if (likely(INET_TW_MATCH(sk, net, acookie,
268 saddr, daddr, ports,
269 dif))) {
265 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { 270 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
266 sk = NULL; 271 sk = NULL;
267 goto out; 272 goto out;
268 } 273 }
269 if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, 274 if (unlikely(!INET_TW_MATCH(sk, net, acookie,
270 saddr, daddr, ports, dif))) { 275 saddr, daddr, ports,
276 dif))) {
271 sock_put(sk); 277 sock_put(sk);
272 goto begintw; 278 goto begintw;
273 } 279 }
@@ -314,10 +320,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
314 320
315 /* Check TIME-WAIT sockets first. */ 321 /* Check TIME-WAIT sockets first. */
316 sk_nulls_for_each(sk2, node, &head->twchain) { 322 sk_nulls_for_each(sk2, node, &head->twchain) {
317 tw = inet_twsk(sk2); 323 if (sk2->sk_hash != hash)
324 continue;
318 325
319 if (INET_TW_MATCH(sk2, net, hash, acookie, 326 if (likely(INET_TW_MATCH(sk2, net, acookie,
320 saddr, daddr, ports, dif)) { 327 saddr, daddr, ports, dif))) {
328 tw = inet_twsk(sk2);
321 if (twsk_unique(sk, sk2, twp)) 329 if (twsk_unique(sk, sk2, twp))
322 goto unique; 330 goto unique;
323 else 331 else
@@ -328,8 +336,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
328 336
329 /* And established part... */ 337 /* And established part... */
330 sk_nulls_for_each(sk2, node, &head->chain) { 338 sk_nulls_for_each(sk2, node, &head->chain) {
331 if (INET_MATCH(sk2, net, hash, acookie, 339 if (sk2->sk_hash != hash)
332 saddr, daddr, ports, dif)) 340 continue;
341 if (likely(INET_MATCH(sk2, net, acookie,
342 saddr, daddr, ports, dif)))
333 goto not_unique; 343 goto not_unique;
334 } 344 }
335 345
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 448e68546827..eb9d63a570cd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -707,28 +707,27 @@ EXPORT_SYMBOL(ip_defrag);
707 707
708struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) 708struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
709{ 709{
710 const struct iphdr *iph; 710 struct iphdr iph;
711 u32 len; 711 u32 len;
712 712
713 if (skb->protocol != htons(ETH_P_IP)) 713 if (skb->protocol != htons(ETH_P_IP))
714 return skb; 714 return skb;
715 715
716 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 716 if (!skb_copy_bits(skb, 0, &iph, sizeof(iph)))
717 return skb; 717 return skb;
718 718
719 iph = ip_hdr(skb); 719 if (iph.ihl < 5 || iph.version != 4)
720 if (iph->ihl < 5 || iph->version != 4)
721 return skb; 720 return skb;
722 if (!pskb_may_pull(skb, iph->ihl*4)) 721
723 return skb; 722 len = ntohs(iph.tot_len);
724 iph = ip_hdr(skb); 723 if (skb->len < len || len < (iph.ihl * 4))
725 len = ntohs(iph->tot_len);
726 if (skb->len < len || len < (iph->ihl * 4))
727 return skb; 724 return skb;
728 725
729 if (ip_is_fragment(ip_hdr(skb))) { 726 if (ip_is_fragment(&iph)) {
730 skb = skb_share_check(skb, GFP_ATOMIC); 727 skb = skb_share_check(skb, GFP_ATOMIC);
731 if (skb) { 728 if (skb) {
729 if (!pskb_may_pull(skb, iph.ihl*4))
730 return skb;
732 if (pskb_trim_rcsum(skb, len)) 731 if (pskb_trim_rcsum(skb, len))
733 return skb; 732 return skb;
734 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 733 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
@@ -802,6 +801,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
802 table[0].data = &net->ipv4.frags.high_thresh; 801 table[0].data = &net->ipv4.frags.high_thresh;
803 table[1].data = &net->ipv4.frags.low_thresh; 802 table[1].data = &net->ipv4.frags.low_thresh;
804 table[2].data = &net->ipv4.frags.timeout; 803 table[2].data = &net->ipv4.frags.timeout;
804
805 /* Don't export sysctls to unprivileged users */
806 if (net->user_ns != &init_user_ns)
807 table[0].procname = NULL;
805 } 808 }
806 809
807 hdr = register_net_sysctl(net, "net/ipv4", table); 810 hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7240f8e2dd45..303012adf9e6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,21 +164,6 @@ struct ipgre_net {
164#define tunnels_r tunnels[2] 164#define tunnels_r tunnels[2]
165#define tunnels_l tunnels[1] 165#define tunnels_l tunnels[1]
166#define tunnels_wc tunnels[0] 166#define tunnels_wc tunnels[0]
167/*
168 * Locking : hash tables are protected by RCU and RTNL
169 */
170
171#define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
173
174/* often modified stats are per cpu, other are shared (netdev->stats) */
175struct pcpu_tstats {
176 u64 rx_packets;
177 u64 rx_bytes;
178 u64 tx_packets;
179 u64 tx_bytes;
180 struct u64_stats_sync syncp;
181};
182 167
183static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, 168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
184 struct rtnl_link_stats64 *tot) 169 struct rtnl_link_stats64 *tot)
@@ -250,7 +235,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
250 ARPHRD_ETHER : ARPHRD_IPGRE; 235 ARPHRD_ETHER : ARPHRD_IPGRE;
251 int score, cand_score = 4; 236 int score, cand_score = 4;
252 237
253 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { 238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
254 if (local != t->parms.iph.saddr || 239 if (local != t->parms.iph.saddr ||
255 remote != t->parms.iph.daddr || 240 remote != t->parms.iph.daddr ||
256 !(t->dev->flags & IFF_UP)) 241 !(t->dev->flags & IFF_UP))
@@ -277,7 +262,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
277 } 262 }
278 } 263 }
279 264
280 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { 265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
281 if (remote != t->parms.iph.daddr || 266 if (remote != t->parms.iph.daddr ||
282 !(t->dev->flags & IFF_UP)) 267 !(t->dev->flags & IFF_UP))
283 continue; 268 continue;
@@ -303,7 +288,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
303 } 288 }
304 } 289 }
305 290
306 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { 291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
307 if ((local != t->parms.iph.saddr && 292 if ((local != t->parms.iph.saddr &&
308 (local != t->parms.iph.daddr || 293 (local != t->parms.iph.daddr ||
309 !ipv4_is_multicast(local))) || 294 !ipv4_is_multicast(local))) ||
@@ -331,7 +316,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
331 } 316 }
332 } 317 }
333 318
334 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { 319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
335 if (t->parms.i_key != key || 320 if (t->parms.i_key != key ||
336 !(t->dev->flags & IFF_UP)) 321 !(t->dev->flags & IFF_UP))
337 continue; 322 continue;
@@ -753,7 +738,6 @@ drop:
753static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 738static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
754{ 739{
755 struct ip_tunnel *tunnel = netdev_priv(dev); 740 struct ip_tunnel *tunnel = netdev_priv(dev);
756 struct pcpu_tstats *tstats;
757 const struct iphdr *old_iph = ip_hdr(skb); 741 const struct iphdr *old_iph = ip_hdr(skb);
758 const struct iphdr *tiph; 742 const struct iphdr *tiph;
759 struct flowi4 fl4; 743 struct flowi4 fl4;
@@ -766,6 +750,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
766 int gre_hlen; 750 int gre_hlen;
767 __be32 dst; 751 __be32 dst;
768 int mtu; 752 int mtu;
753 u8 ttl;
769 754
770 if (skb->ip_summed == CHECKSUM_PARTIAL && 755 if (skb->ip_summed == CHECKSUM_PARTIAL &&
771 skb_checksum_help(skb)) 756 skb_checksum_help(skb))
@@ -776,7 +761,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
776 761
777 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 762 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
778 gre_hlen = 0; 763 gre_hlen = 0;
779 tiph = (const struct iphdr *)skb->data; 764 if (skb->protocol == htons(ETH_P_IP))
765 tiph = (const struct iphdr *)skb->data;
766 else
767 tiph = &tunnel->parms.iph;
780 } else { 768 } else {
781 gre_hlen = tunnel->hlen; 769 gre_hlen = tunnel->hlen;
782 tiph = &tunnel->parms.iph; 770 tiph = &tunnel->parms.iph;
@@ -828,6 +816,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
828 goto tx_error; 816 goto tx_error;
829 } 817 }
830 818
819 ttl = tiph->ttl;
831 tos = tiph->tos; 820 tos = tiph->tos;
832 if (tos == 1) { 821 if (tos == 1) {
833 tos = 0; 822 tos = 0;
@@ -920,11 +909,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
920 dev_kfree_skb(skb); 909 dev_kfree_skb(skb);
921 skb = new_skb; 910 skb = new_skb;
922 old_iph = ip_hdr(skb); 911 old_iph = ip_hdr(skb);
912 /* Warning : tiph value might point to freed memory */
923 } 913 }
924 914
925 skb_reset_transport_header(skb);
926 skb_push(skb, gre_hlen); 915 skb_push(skb, gre_hlen);
927 skb_reset_network_header(skb); 916 skb_reset_network_header(skb);
917 skb_set_transport_header(skb, sizeof(*iph));
928 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 918 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
929 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 919 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
930 IPSKB_REROUTED); 920 IPSKB_REROUTED);
@@ -943,8 +933,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
943 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 933 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
944 iph->daddr = fl4.daddr; 934 iph->daddr = fl4.daddr;
945 iph->saddr = fl4.saddr; 935 iph->saddr = fl4.saddr;
936 iph->ttl = ttl;
946 937
947 if ((iph->ttl = tiph->ttl) == 0) { 938 if (ttl == 0) {
948 if (skb->protocol == htons(ETH_P_IP)) 939 if (skb->protocol == htons(ETH_P_IP))
949 iph->ttl = old_iph->ttl; 940 iph->ttl = old_iph->ttl;
950#if IS_ENABLED(CONFIG_IPV6) 941#if IS_ENABLED(CONFIG_IPV6)
@@ -977,9 +968,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
977 } 968 }
978 } 969 }
979 970
980 nf_reset(skb); 971 iptunnel_xmit(skb, dev);
981 tstats = this_cpu_ptr(dev->tstats);
982 __IPTUNNEL_XMIT(tstats, &dev->stats);
983 return NETDEV_TX_OK; 972 return NETDEV_TX_OK;
984 973
985#if IS_ENABLED(CONFIG_IPV6) 974#if IS_ENABLED(CONFIG_IPV6)
@@ -1082,7 +1071,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1082 case SIOCADDTUNNEL: 1071 case SIOCADDTUNNEL:
1083 case SIOCCHGTUNNEL: 1072 case SIOCCHGTUNNEL:
1084 err = -EPERM; 1073 err = -EPERM;
1085 if (!capable(CAP_NET_ADMIN)) 1074 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086 goto done; 1075 goto done;
1087 1076
1088 err = -EFAULT; 1077 err = -EFAULT;
@@ -1157,7 +1146,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1157 1146
1158 case SIOCDELTUNNEL: 1147 case SIOCDELTUNNEL:
1159 err = -EPERM; 1148 err = -EPERM;
1160 if (!capable(CAP_NET_ADMIN)) 1149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1161 goto done; 1150 goto done;
1162 1151
1163 if (dev == ign->fb_tunnel_dev) { 1152 if (dev == ign->fb_tunnel_dev) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1dc01f9793d5..f6289bf6f332 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -409,7 +409,7 @@ int ip_options_compile(struct net *net,
409 optptr[2] += 8; 409 optptr[2] += 8;
410 break; 410 break;
411 default: 411 default:
412 if (!skb && !capable(CAP_NET_RAW)) { 412 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
413 pp_ptr = optptr + 3; 413 pp_ptr = optptr + 3;
414 goto error; 414 goto error;
415 } 415 }
@@ -445,7 +445,7 @@ int ip_options_compile(struct net *net,
445 opt->router_alert = optptr - iph; 445 opt->router_alert = optptr - iph;
446 break; 446 break;
447 case IPOPT_CIPSO: 447 case IPOPT_CIPSO:
448 if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { 448 if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) {
449 pp_ptr = optptr; 449 pp_ptr = optptr;
450 goto error; 450 goto error;
451 } 451 }
@@ -458,7 +458,7 @@ int ip_options_compile(struct net *net,
458 case IPOPT_SEC: 458 case IPOPT_SEC:
459 case IPOPT_SID: 459 case IPOPT_SID:
460 default: 460 default:
461 if (!skb && !capable(CAP_NET_RAW)) { 461 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
462 pp_ptr = optptr; 462 pp_ptr = optptr;
463 goto error; 463 goto error;
464 } 464 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6537a408a4fb..3e98ed2bff55 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -595,6 +595,10 @@ slow_path_clean:
595 } 595 }
596 596
597slow_path: 597slow_path:
598 /* for offloaded checksums cleanup checksum before fragmentation */
599 if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
600 goto fail;
601
598 left = skb->len - hlen; /* Space per frame */ 602 left = skb->len - hlen; /* Space per frame */
599 ptr = hlen; /* Where to start from */ 603 ptr = hlen; /* Where to start from */
600 604
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5eea4a811042..d9c4f113d709 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -457,19 +457,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
457 struct inet_sock *inet = inet_sk(sk); 457 struct inet_sock *inet = inet_sk(sk);
458 int val = 0, err; 458 int val = 0, err;
459 459
460 if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | 460 switch (optname) {
461 (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | 461 case IP_PKTINFO:
462 (1<<IP_RETOPTS) | (1<<IP_TOS) | 462 case IP_RECVTTL:
463 (1<<IP_TTL) | (1<<IP_HDRINCL) | 463 case IP_RECVOPTS:
464 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 464 case IP_RECVTOS:
465 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 465 case IP_RETOPTS:
466 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | 466 case IP_TOS:
467 (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || 467 case IP_TTL:
468 optname == IP_UNICAST_IF || 468 case IP_HDRINCL:
469 optname == IP_MULTICAST_TTL || 469 case IP_MTU_DISCOVER:
470 optname == IP_MULTICAST_ALL || 470 case IP_RECVERR:
471 optname == IP_MULTICAST_LOOP || 471 case IP_ROUTER_ALERT:
472 optname == IP_RECVORIGDSTADDR) { 472 case IP_FREEBIND:
473 case IP_PASSSEC:
474 case IP_TRANSPARENT:
475 case IP_MINTTL:
476 case IP_NODEFRAG:
477 case IP_UNICAST_IF:
478 case IP_MULTICAST_TTL:
479 case IP_MULTICAST_ALL:
480 case IP_MULTICAST_LOOP:
481 case IP_RECVORIGDSTADDR:
473 if (optlen >= sizeof(int)) { 482 if (optlen >= sizeof(int)) {
474 if (get_user(val, (int __user *) optval)) 483 if (get_user(val, (int __user *) optval))
475 return -EFAULT; 484 return -EFAULT;
@@ -581,7 +590,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
581 case IP_TTL: 590 case IP_TTL:
582 if (optlen < 1) 591 if (optlen < 1)
583 goto e_inval; 592 goto e_inval;
584 if (val != -1 && (val < 0 || val > 255)) 593 if (val != -1 && (val < 1 || val > 255))
585 goto e_inval; 594 goto e_inval;
586 inet->uc_ttl = val; 595 inet->uc_ttl = val;
587 break; 596 break;
@@ -980,13 +989,14 @@ mc_msf_out:
980 case IP_IPSEC_POLICY: 989 case IP_IPSEC_POLICY:
981 case IP_XFRM_POLICY: 990 case IP_XFRM_POLICY:
982 err = -EPERM; 991 err = -EPERM;
983 if (!capable(CAP_NET_ADMIN)) 992 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
984 break; 993 break;
985 err = xfrm_user_policy(sk, optname, optval, optlen); 994 err = xfrm_user_policy(sk, optname, optval, optlen);
986 break; 995 break;
987 996
988 case IP_TRANSPARENT: 997 case IP_TRANSPARENT:
989 if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) { 998 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
999 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
990 err = -EPERM; 1000 err = -EPERM;
991 break; 1001 break;
992 } 1002 }
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1831092f999f..c3a4233c0ac2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -66,20 +66,6 @@ static void vti_tunnel_setup(struct net_device *dev);
66static void vti_dev_free(struct net_device *dev); 66static void vti_dev_free(struct net_device *dev);
67static int vti_tunnel_bind_dev(struct net_device *dev); 67static int vti_tunnel_bind_dev(struct net_device *dev);
68 68
69/* Locking : hash tables are protected by RCU and RTNL */
70
71#define for_each_ip_tunnel_rcu(start) \
72 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
73
74/* often modified stats are per cpu, other are shared (netdev->stats) */
75struct pcpu_tstats {
76 u64 rx_packets;
77 u64 rx_bytes;
78 u64 tx_packets;
79 u64 tx_bytes;
80 struct u64_stats_sync syncp;
81};
82
83#define VTI_XMIT(stats1, stats2) do { \ 69#define VTI_XMIT(stats1, stats2) do { \
84 int err; \ 70 int err; \
85 int pkt_len = skb->len; \ 71 int pkt_len = skb->len; \
@@ -142,19 +128,19 @@ static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
142 struct ip_tunnel *t; 128 struct ip_tunnel *t;
143 struct vti_net *ipn = net_generic(net, vti_net_id); 129 struct vti_net *ipn = net_generic(net, vti_net_id);
144 130
145 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 131 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
146 if (local == t->parms.iph.saddr && 132 if (local == t->parms.iph.saddr &&
147 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 133 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
148 return t; 134 return t;
149 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 135 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
150 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 136 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 return t; 137 return t;
152 138
153 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 139 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
154 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 140 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 return t; 141 return t;
156 142
157 for_each_ip_tunnel_rcu(ipn->tunnels_wc[0]) 143 for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
158 if (t && (t->dev->flags&IFF_UP)) 144 if (t && (t->dev->flags&IFF_UP))
159 return t; 145 return t;
160 return NULL; 146 return NULL;
@@ -338,12 +324,17 @@ static int vti_rcv(struct sk_buff *skb)
338 if (tunnel != NULL) { 324 if (tunnel != NULL) {
339 struct pcpu_tstats *tstats; 325 struct pcpu_tstats *tstats;
340 326
327 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
328 return -1;
329
341 tstats = this_cpu_ptr(tunnel->dev->tstats); 330 tstats = this_cpu_ptr(tunnel->dev->tstats);
342 u64_stats_update_begin(&tstats->syncp); 331 u64_stats_update_begin(&tstats->syncp);
343 tstats->rx_packets++; 332 tstats->rx_packets++;
344 tstats->rx_bytes += skb->len; 333 tstats->rx_bytes += skb->len;
345 u64_stats_update_end(&tstats->syncp); 334 u64_stats_update_end(&tstats->syncp);
346 335
336 skb->mark = 0;
337 secpath_reset(skb);
347 skb->dev = tunnel->dev; 338 skb->dev = tunnel->dev;
348 return 1; 339 return 1;
349 } 340 }
@@ -497,7 +488,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
497 case SIOCADDTUNNEL: 488 case SIOCADDTUNNEL:
498 case SIOCCHGTUNNEL: 489 case SIOCCHGTUNNEL:
499 err = -EPERM; 490 err = -EPERM;
500 if (!capable(CAP_NET_ADMIN)) 491 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
501 goto done; 492 goto done;
502 493
503 err = -EFAULT; 494 err = -EFAULT;
@@ -562,7 +553,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
562 553
563 case SIOCDELTUNNEL: 554 case SIOCDELTUNNEL:
564 err = -EPERM; 555 err = -EPERM;
565 if (!capable(CAP_NET_ADMIN)) 556 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
566 goto done; 557 goto done;
567 558
568 if (dev == ipn->fb_tunnel_dev) { 559 if (dev == ipn->fb_tunnel_dev) {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 798358b10717..a2e50ae80b53 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -136,6 +136,8 @@ __be32 ic_myaddr = NONE; /* My IP address */
136static __be32 ic_netmask = NONE; /* Netmask for local subnet */ 136static __be32 ic_netmask = NONE; /* Netmask for local subnet */
137__be32 ic_gateway = NONE; /* Gateway IP address */ 137__be32 ic_gateway = NONE; /* Gateway IP address */
138 138
139__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */
140
139__be32 ic_servaddr = NONE; /* Boot server IP address */ 141__be32 ic_servaddr = NONE; /* Boot server IP address */
140 142
141__be32 root_server_addr = NONE; /* Address of NFS server */ 143__be32 root_server_addr = NONE; /* Address of NFS server */
@@ -558,6 +560,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
558 if (ic_myaddr == NONE) 560 if (ic_myaddr == NONE)
559 ic_myaddr = tip; 561 ic_myaddr = tip;
560 ic_servaddr = sip; 562 ic_servaddr = sip;
563 ic_addrservaddr = sip;
561 ic_got_reply = IC_RARP; 564 ic_got_reply = IC_RARP;
562 565
563drop_unlock: 566drop_unlock:
@@ -1068,7 +1071,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
1068 ic_servaddr = server_id; 1071 ic_servaddr = server_id;
1069#ifdef IPCONFIG_DEBUG 1072#ifdef IPCONFIG_DEBUG
1070 printk("DHCP: Offered address %pI4 by server %pI4\n", 1073 printk("DHCP: Offered address %pI4 by server %pI4\n",
1071 &ic_myaddr, &ic_servaddr); 1074 &ic_myaddr, &b->iph.saddr);
1072#endif 1075#endif
1073 /* The DHCP indicated server address takes 1076 /* The DHCP indicated server address takes
1074 * precedence over the bootp header one if 1077 * precedence over the bootp header one if
@@ -1113,6 +1116,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
1113 ic_dev = dev; 1116 ic_dev = dev;
1114 ic_myaddr = b->your_ip; 1117 ic_myaddr = b->your_ip;
1115 ic_servaddr = b->server_ip; 1118 ic_servaddr = b->server_ip;
1119 ic_addrservaddr = b->iph.saddr;
1116 if (ic_gateway == NONE && b->relay_ip) 1120 if (ic_gateway == NONE && b->relay_ip)
1117 ic_gateway = b->relay_ip; 1121 ic_gateway = b->relay_ip;
1118 if (ic_nameservers[0] == NONE) 1122 if (ic_nameservers[0] == NONE)
@@ -1268,7 +1272,7 @@ static int __init ic_dynamic(void)
1268 printk("IP-Config: Got %s answer from %pI4, ", 1272 printk("IP-Config: Got %s answer from %pI4, ",
1269 ((ic_got_reply & IC_RARP) ? "RARP" 1273 ((ic_got_reply & IC_RARP) ? "RARP"
1270 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), 1274 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
1271 &ic_servaddr); 1275 &ic_addrservaddr);
1272 pr_cont("my address is %pI4\n", &ic_myaddr); 1276 pr_cont("my address is %pI4\n", &ic_myaddr);
1273 1277
1274 return 0; 1278 return 0;
@@ -1500,8 +1504,10 @@ static int __init ip_auto_config(void)
1500 * Clue in the operator. 1504 * Clue in the operator.
1501 */ 1505 */
1502 pr_info("IP-Config: Complete:\n"); 1506 pr_info("IP-Config: Complete:\n");
1503 pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", 1507
1504 ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); 1508 pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
1509 ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
1510 &ic_myaddr, &ic_netmask, &ic_gateway);
1505 pr_info(" host=%s, domain=%s, nis-domain=%s\n", 1511 pr_info(" host=%s, domain=%s, nis-domain=%s\n",
1506 utsname()->nodename, ic_domain, utsname()->domainname); 1512 utsname()->nodename, ic_domain, utsname()->domainname);
1507 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", 1513 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s",
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e15b45297c09..191fc24a745a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -138,22 +138,7 @@ struct ipip_net {
138static int ipip_tunnel_init(struct net_device *dev); 138static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev); 139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev); 140static void ipip_dev_free(struct net_device *dev);
141 141static struct rtnl_link_ops ipip_link_ops __read_mostly;
142/*
143 * Locking : hash tables are protected by RCU and RTNL
144 */
145
146#define for_each_ip_tunnel_rcu(start) \
147 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148
149/* often modified stats are per cpu, other are shared (netdev->stats) */
150struct pcpu_tstats {
151 u64 rx_packets;
152 u64 rx_bytes;
153 u64 tx_packets;
154 u64 tx_bytes;
155 struct u64_stats_sync syncp;
156};
157 142
158static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, 143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159 struct rtnl_link_stats64 *tot) 144 struct rtnl_link_stats64 *tot)
@@ -197,16 +182,16 @@ static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
197 struct ip_tunnel *t; 182 struct ip_tunnel *t;
198 struct ipip_net *ipn = net_generic(net, ipip_net_id); 183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
199 184
200 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
201 if (local == t->parms.iph.saddr && 186 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203 return t; 188 return t;
204 189
205 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
206 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207 return t; 192 return t;
208 193
209 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
210 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211 return t; 196 return t;
212 197
@@ -264,6 +249,32 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
264 rcu_assign_pointer(*tp, t); 249 rcu_assign_pointer(*tp, t);
265} 250}
266 251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
267static struct ip_tunnel *ipip_tunnel_locate(struct net *net, 278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268 struct ip_tunnel_parm *parms, int create) 279 struct ip_tunnel_parm *parms, int create)
269{ 280{
@@ -298,16 +309,9 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
298 nt = netdev_priv(dev); 309 nt = netdev_priv(dev);
299 nt->parms = *parms; 310 nt->parms = *parms;
300 311
301 if (ipip_tunnel_init(dev) < 0) 312 if (ipip_tunnel_create(dev) < 0)
302 goto failed_free; 313 goto failed_free;
303 314
304 if (register_netdevice(dev) < 0)
305 goto failed_free;
306
307 strcpy(nt->parms.name, dev->name);
308
309 dev_hold(dev);
310 ipip_tunnel_link(ipn, nt);
311 return nt; 315 return nt;
312 316
313failed_free: 317failed_free:
@@ -463,7 +467,6 @@ drop:
463static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464{ 468{
465 struct ip_tunnel *tunnel = netdev_priv(dev); 469 struct ip_tunnel *tunnel = netdev_priv(dev);
466 struct pcpu_tstats *tstats;
467 const struct iphdr *tiph = &tunnel->parms.iph; 470 const struct iphdr *tiph = &tunnel->parms.iph;
468 u8 tos = tunnel->parms.iph.tos; 471 u8 tos = tunnel->parms.iph.tos;
469 __be16 df = tiph->frag_off; 472 __be16 df = tiph->frag_off;
@@ -479,6 +482,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
479 if (skb->protocol != htons(ETH_P_IP)) 482 if (skb->protocol != htons(ETH_P_IP))
480 goto tx_error; 483 goto tx_error;
481 484
485 if (skb->ip_summed == CHECKSUM_PARTIAL &&
486 skb_checksum_help(skb))
487 goto tx_error;
488
482 if (tos & 1) 489 if (tos & 1)
483 tos = old_iph->tos; 490 tos = old_iph->tos;
484 491
@@ -586,9 +593,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
586 if ((iph->ttl = tiph->ttl) == 0) 593 if ((iph->ttl = tiph->ttl) == 0)
587 iph->ttl = old_iph->ttl; 594 iph->ttl = old_iph->ttl;
588 595
589 nf_reset(skb); 596 iptunnel_xmit(skb, dev);
590 tstats = this_cpu_ptr(dev->tstats);
591 __IPTUNNEL_XMIT(tstats, &dev->stats);
592 return NETDEV_TX_OK; 597 return NETDEV_TX_OK;
593 598
594tx_error_icmp: 599tx_error_icmp:
@@ -635,6 +640,28 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
635 dev->iflink = tunnel->parms.link; 640 dev->iflink = tunnel->parms.link;
636} 641}
637 642
643static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
644{
645 struct net *net = dev_net(t->dev);
646 struct ipip_net *ipn = net_generic(net, ipip_net_id);
647
648 ipip_tunnel_unlink(ipn, t);
649 synchronize_net();
650 t->parms.iph.saddr = p->iph.saddr;
651 t->parms.iph.daddr = p->iph.daddr;
652 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
653 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
654 ipip_tunnel_link(ipn, t);
655 t->parms.iph.ttl = p->iph.ttl;
656 t->parms.iph.tos = p->iph.tos;
657 t->parms.iph.frag_off = p->iph.frag_off;
658 if (t->parms.link != p->link) {
659 t->parms.link = p->link;
660 ipip_tunnel_bind_dev(t->dev);
661 }
662 netdev_state_change(t->dev);
663}
664
638static int 665static int
639ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 666ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
640{ 667{
@@ -664,7 +691,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
664 case SIOCADDTUNNEL: 691 case SIOCADDTUNNEL:
665 case SIOCCHGTUNNEL: 692 case SIOCCHGTUNNEL:
666 err = -EPERM; 693 err = -EPERM;
667 if (!capable(CAP_NET_ADMIN)) 694 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
668 goto done; 695 goto done;
669 696
670 err = -EFAULT; 697 err = -EFAULT;
@@ -693,29 +720,13 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
693 break; 720 break;
694 } 721 }
695 t = netdev_priv(dev); 722 t = netdev_priv(dev);
696 ipip_tunnel_unlink(ipn, t);
697 synchronize_net();
698 t->parms.iph.saddr = p.iph.saddr;
699 t->parms.iph.daddr = p.iph.daddr;
700 memcpy(dev->dev_addr, &p.iph.saddr, 4);
701 memcpy(dev->broadcast, &p.iph.daddr, 4);
702 ipip_tunnel_link(ipn, t);
703 netdev_state_change(dev);
704 } 723 }
724
725 ipip_tunnel_update(t, &p);
705 } 726 }
706 727
707 if (t) { 728 if (t) {
708 err = 0; 729 err = 0;
709 if (cmd == SIOCCHGTUNNEL) {
710 t->parms.iph.ttl = p.iph.ttl;
711 t->parms.iph.tos = p.iph.tos;
712 t->parms.iph.frag_off = p.iph.frag_off;
713 if (t->parms.link != p.link) {
714 t->parms.link = p.link;
715 ipip_tunnel_bind_dev(dev);
716 netdev_state_change(dev);
717 }
718 }
719 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 730 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
720 err = -EFAULT; 731 err = -EFAULT;
721 } else 732 } else
@@ -724,7 +735,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
724 735
725 case SIOCDELTUNNEL: 736 case SIOCDELTUNNEL:
726 err = -EPERM; 737 err = -EPERM;
727 if (!capable(CAP_NET_ADMIN)) 738 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728 goto done; 739 goto done;
729 740
730 if (dev == ipn->fb_tunnel_dev) { 741 if (dev == ipn->fb_tunnel_dev) {
@@ -773,6 +784,11 @@ static void ipip_dev_free(struct net_device *dev)
773 free_netdev(dev); 784 free_netdev(dev);
774} 785}
775 786
787#define IPIP_FEATURES (NETIF_F_SG | \
788 NETIF_F_FRAGLIST | \
789 NETIF_F_HIGHDMA | \
790 NETIF_F_HW_CSUM)
791
776static void ipip_tunnel_setup(struct net_device *dev) 792static void ipip_tunnel_setup(struct net_device *dev)
777{ 793{
778 dev->netdev_ops = &ipip_netdev_ops; 794 dev->netdev_ops = &ipip_netdev_ops;
@@ -787,6 +803,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
787 dev->features |= NETIF_F_NETNS_LOCAL; 803 dev->features |= NETIF_F_NETNS_LOCAL;
788 dev->features |= NETIF_F_LLTX; 804 dev->features |= NETIF_F_LLTX;
789 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 805 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
806
807 dev->features |= IPIP_FEATURES;
808 dev->hw_features |= IPIP_FEATURES;
790} 809}
791 810
792static int ipip_tunnel_init(struct net_device *dev) 811static int ipip_tunnel_init(struct net_device *dev)
@@ -829,6 +848,142 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
829 return 0; 848 return 0;
830} 849}
831 850
851static void ipip_netlink_parms(struct nlattr *data[],
852 struct ip_tunnel_parm *parms)
853{
854 memset(parms, 0, sizeof(*parms));
855
856 parms->iph.version = 4;
857 parms->iph.protocol = IPPROTO_IPIP;
858 parms->iph.ihl = 5;
859
860 if (!data)
861 return;
862
863 if (data[IFLA_IPTUN_LINK])
864 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
865
866 if (data[IFLA_IPTUN_LOCAL])
867 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
868
869 if (data[IFLA_IPTUN_REMOTE])
870 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
871
872 if (data[IFLA_IPTUN_TTL]) {
873 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
874 if (parms->iph.ttl)
875 parms->iph.frag_off = htons(IP_DF);
876 }
877
878 if (data[IFLA_IPTUN_TOS])
879 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
880
881 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
882 parms->iph.frag_off = htons(IP_DF);
883}
884
885static int ipip_newlink(struct net *src_net, struct net_device *dev,
886 struct nlattr *tb[], struct nlattr *data[])
887{
888 struct net *net = dev_net(dev);
889 struct ip_tunnel *nt;
890
891 nt = netdev_priv(dev);
892 ipip_netlink_parms(data, &nt->parms);
893
894 if (ipip_tunnel_locate(net, &nt->parms, 0))
895 return -EEXIST;
896
897 return ipip_tunnel_create(dev);
898}
899
900static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
901 struct nlattr *data[])
902{
903 struct ip_tunnel *t;
904 struct ip_tunnel_parm p;
905 struct net *net = dev_net(dev);
906 struct ipip_net *ipn = net_generic(net, ipip_net_id);
907
908 if (dev == ipn->fb_tunnel_dev)
909 return -EINVAL;
910
911 ipip_netlink_parms(data, &p);
912
913 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
914 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
915 return -EINVAL;
916
917 t = ipip_tunnel_locate(net, &p, 0);
918
919 if (t) {
920 if (t->dev != dev)
921 return -EEXIST;
922 } else
923 t = netdev_priv(dev);
924
925 ipip_tunnel_update(t, &p);
926 return 0;
927}
928
929static size_t ipip_get_size(const struct net_device *dev)
930{
931 return
932 /* IFLA_IPTUN_LINK */
933 nla_total_size(4) +
934 /* IFLA_IPTUN_LOCAL */
935 nla_total_size(4) +
936 /* IFLA_IPTUN_REMOTE */
937 nla_total_size(4) +
938 /* IFLA_IPTUN_TTL */
939 nla_total_size(1) +
940 /* IFLA_IPTUN_TOS */
941 nla_total_size(1) +
942 /* IFLA_IPTUN_PMTUDISC */
943 nla_total_size(1) +
944 0;
945}
946
947static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
948{
949 struct ip_tunnel *tunnel = netdev_priv(dev);
950 struct ip_tunnel_parm *parm = &tunnel->parms;
951
952 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
953 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
954 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
955 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
956 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
957 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
958 !!(parm->iph.frag_off & htons(IP_DF))))
959 goto nla_put_failure;
960 return 0;
961
962nla_put_failure:
963 return -EMSGSIZE;
964}
965
966static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
967 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
968 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
969 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
970 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
971 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
972 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
973};
974
975static struct rtnl_link_ops ipip_link_ops __read_mostly = {
976 .kind = "ipip",
977 .maxtype = IFLA_IPTUN_MAX,
978 .policy = ipip_policy,
979 .priv_size = sizeof(struct ip_tunnel),
980 .setup = ipip_tunnel_setup,
981 .newlink = ipip_newlink,
982 .changelink = ipip_changelink,
983 .get_size = ipip_get_size,
984 .fill_info = ipip_fill_info,
985};
986
832static struct xfrm_tunnel ipip_handler __read_mostly = { 987static struct xfrm_tunnel ipip_handler __read_mostly = {
833 .handler = ipip_rcv, 988 .handler = ipip_rcv,
834 .err_handler = ipip_err, 989 .err_handler = ipip_err,
@@ -925,14 +1080,26 @@ static int __init ipip_init(void)
925 return err; 1080 return err;
926 err = xfrm4_tunnel_register(&ipip_handler, AF_INET); 1081 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927 if (err < 0) { 1082 if (err < 0) {
928 unregister_pernet_device(&ipip_net_ops);
929 pr_info("%s: can't register tunnel\n", __func__); 1083 pr_info("%s: can't register tunnel\n", __func__);
1084 goto xfrm_tunnel_failed;
930 } 1085 }
1086 err = rtnl_link_register(&ipip_link_ops);
1087 if (err < 0)
1088 goto rtnl_link_failed;
1089
1090out:
931 return err; 1091 return err;
1092
1093rtnl_link_failed:
1094 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1095xfrm_tunnel_failed:
1096 unregister_pernet_device(&ipip_net_ops);
1097 goto out;
932} 1098}
933 1099
934static void __exit ipip_fini(void) 1100static void __exit ipip_fini(void)
935{ 1101{
1102 rtnl_link_unregister(&ipip_link_ops);
936 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) 1103 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937 pr_info("%s: can't deregister tunnel\n", __func__); 1104 pr_info("%s: can't deregister tunnel\n", __func__);
938 1105
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6168c4dc58b1..a9454cbd953c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -65,6 +65,7 @@
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
68#include <linux/netconf.h>
68 69
69#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 70#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
70#define CONFIG_IP_PIMSM 1 71#define CONFIG_IP_PIMSM 1
@@ -83,8 +84,8 @@ struct mr_table {
83 struct vif_device vif_table[MAXVIFS]; 84 struct vif_device vif_table[MAXVIFS];
84 int maxvif; 85 int maxvif;
85 atomic_t cache_resolve_queue_len; 86 atomic_t cache_resolve_queue_len;
86 int mroute_do_assert; 87 bool mroute_do_assert;
87 int mroute_do_pim; 88 bool mroute_do_pim;
88#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 89#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
89 int mroute_reg_vif_num; 90 int mroute_reg_vif_num;
90#endif 91#endif
@@ -133,6 +134,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
133 struct sk_buff *pkt, vifi_t vifi, int assert); 134 struct sk_buff *pkt, vifi_t vifi, int assert);
134static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
135 struct mfc_cache *c, struct rtmsg *rtm); 136 struct mfc_cache *c, struct rtmsg *rtm);
137static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
138 int cmd);
136static void mroute_clean_tables(struct mr_table *mrt); 139static void mroute_clean_tables(struct mr_table *mrt);
137static void ipmr_expire_process(unsigned long arg); 140static void ipmr_expire_process(unsigned long arg);
138 141
@@ -582,6 +585,9 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
582 in_dev = __in_dev_get_rtnl(dev); 585 in_dev = __in_dev_get_rtnl(dev);
583 if (in_dev) { 586 if (in_dev) {
584 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 587 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
588 inet_netconf_notify_devconf(dev_net(dev),
589 NETCONFA_MC_FORWARDING,
590 dev->ifindex, &in_dev->cnf);
585 ip_rt_multicast_event(in_dev); 591 ip_rt_multicast_event(in_dev);
586 } 592 }
587 593
@@ -665,6 +671,7 @@ static void ipmr_expire_process(unsigned long arg)
665 } 671 }
666 672
667 list_del(&c->list); 673 list_del(&c->list);
674 mroute_netlink_event(mrt, c, RTM_DELROUTE);
668 ipmr_destroy_unres(mrt, c); 675 ipmr_destroy_unres(mrt, c);
669 } 676 }
670 677
@@ -772,6 +779,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
772 return -EADDRNOTAVAIL; 779 return -EADDRNOTAVAIL;
773 } 780 }
774 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 781 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
782 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
783 &in_dev->cnf);
775 ip_rt_multicast_event(in_dev); 784 ip_rt_multicast_event(in_dev);
776 785
777 /* Fill in the VIF structures */ 786 /* Fill in the VIF structures */
@@ -1020,6 +1029,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1020 1029
1021 atomic_inc(&mrt->cache_resolve_queue_len); 1030 atomic_inc(&mrt->cache_resolve_queue_len);
1022 list_add(&c->list, &mrt->mfc_unres_queue); 1031 list_add(&c->list, &mrt->mfc_unres_queue);
1032 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1023 1033
1024 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1034 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1025 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1035 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
@@ -1054,7 +1064,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1054 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1064 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1055 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1065 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1056 list_del_rcu(&c->list); 1066 list_del_rcu(&c->list);
1057 1067 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1058 ipmr_cache_free(c); 1068 ipmr_cache_free(c);
1059 return 0; 1069 return 0;
1060 } 1070 }
@@ -1089,6 +1099,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1089 if (!mrtsock) 1099 if (!mrtsock)
1090 c->mfc_flags |= MFC_STATIC; 1100 c->mfc_flags |= MFC_STATIC;
1091 write_unlock_bh(&mrt_lock); 1101 write_unlock_bh(&mrt_lock);
1102 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1092 return 0; 1103 return 0;
1093 } 1104 }
1094 1105
@@ -1131,6 +1142,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1131 ipmr_cache_resolve(net, mrt, uc, c); 1142 ipmr_cache_resolve(net, mrt, uc, c);
1132 ipmr_cache_free(uc); 1143 ipmr_cache_free(uc);
1133 } 1144 }
1145 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1134 return 0; 1146 return 0;
1135} 1147}
1136 1148
@@ -1159,6 +1171,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
1159 if (c->mfc_flags & MFC_STATIC) 1171 if (c->mfc_flags & MFC_STATIC)
1160 continue; 1172 continue;
1161 list_del_rcu(&c->list); 1173 list_del_rcu(&c->list);
1174 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1162 ipmr_cache_free(c); 1175 ipmr_cache_free(c);
1163 } 1176 }
1164 } 1177 }
@@ -1167,6 +1180,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
1167 spin_lock_bh(&mfc_unres_lock); 1180 spin_lock_bh(&mfc_unres_lock);
1168 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1181 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1169 list_del(&c->list); 1182 list_del(&c->list);
1183 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1170 ipmr_destroy_unres(mrt, c); 1184 ipmr_destroy_unres(mrt, c);
1171 } 1185 }
1172 spin_unlock_bh(&mfc_unres_lock); 1186 spin_unlock_bh(&mfc_unres_lock);
@@ -1185,6 +1199,9 @@ static void mrtsock_destruct(struct sock *sk)
1185 ipmr_for_each_table(mrt, net) { 1199 ipmr_for_each_table(mrt, net) {
1186 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1200 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1187 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1201 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1202 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1203 NETCONFA_IFINDEX_ALL,
1204 net->ipv4.devconf_all);
1188 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1205 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1189 mroute_clean_tables(mrt); 1206 mroute_clean_tables(mrt);
1190 } 1207 }
@@ -1207,23 +1224,24 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1207 struct net *net = sock_net(sk); 1224 struct net *net = sock_net(sk);
1208 struct mr_table *mrt; 1225 struct mr_table *mrt;
1209 1226
1227 if (sk->sk_type != SOCK_RAW ||
1228 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1229 return -EOPNOTSUPP;
1230
1210 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1231 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1211 if (mrt == NULL) 1232 if (mrt == NULL)
1212 return -ENOENT; 1233 return -ENOENT;
1213 1234
1214 if (optname != MRT_INIT) { 1235 if (optname != MRT_INIT) {
1215 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1236 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1216 !capable(CAP_NET_ADMIN)) 1237 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1217 return -EACCES; 1238 return -EACCES;
1218 } 1239 }
1219 1240
1220 switch (optname) { 1241 switch (optname) {
1221 case MRT_INIT: 1242 case MRT_INIT:
1222 if (sk->sk_type != SOCK_RAW ||
1223 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1224 return -EOPNOTSUPP;
1225 if (optlen != sizeof(int)) 1243 if (optlen != sizeof(int))
1226 return -ENOPROTOOPT; 1244 return -EINVAL;
1227 1245
1228 rtnl_lock(); 1246 rtnl_lock();
1229 if (rtnl_dereference(mrt->mroute_sk)) { 1247 if (rtnl_dereference(mrt->mroute_sk)) {
@@ -1235,6 +1253,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1235 if (ret == 0) { 1253 if (ret == 0) {
1236 rcu_assign_pointer(mrt->mroute_sk, sk); 1254 rcu_assign_pointer(mrt->mroute_sk, sk);
1237 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1255 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1256 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1257 NETCONFA_IFINDEX_ALL,
1258 net->ipv4.devconf_all);
1238 } 1259 }
1239 rtnl_unlock(); 1260 rtnl_unlock();
1240 return ret; 1261 return ret;
@@ -1284,9 +1305,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1284 case MRT_ASSERT: 1305 case MRT_ASSERT:
1285 { 1306 {
1286 int v; 1307 int v;
1308 if (optlen != sizeof(v))
1309 return -EINVAL;
1287 if (get_user(v, (int __user *)optval)) 1310 if (get_user(v, (int __user *)optval))
1288 return -EFAULT; 1311 return -EFAULT;
1289 mrt->mroute_do_assert = (v) ? 1 : 0; 1312 mrt->mroute_do_assert = v;
1290 return 0; 1313 return 0;
1291 } 1314 }
1292#ifdef CONFIG_IP_PIMSM 1315#ifdef CONFIG_IP_PIMSM
@@ -1294,9 +1317,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1294 { 1317 {
1295 int v; 1318 int v;
1296 1319
1320 if (optlen != sizeof(v))
1321 return -EINVAL;
1297 if (get_user(v, (int __user *)optval)) 1322 if (get_user(v, (int __user *)optval))
1298 return -EFAULT; 1323 return -EFAULT;
1299 v = (v) ? 1 : 0; 1324 v = !!v;
1300 1325
1301 rtnl_lock(); 1326 rtnl_lock();
1302 ret = 0; 1327 ret = 0;
@@ -1318,6 +1343,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1318 if (get_user(v, (u32 __user *)optval)) 1343 if (get_user(v, (u32 __user *)optval))
1319 return -EFAULT; 1344 return -EFAULT;
1320 1345
1346 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
1347 if (v != RT_TABLE_DEFAULT && v >= 1000000000)
1348 return -EINVAL;
1349
1321 rtnl_lock(); 1350 rtnl_lock();
1322 ret = 0; 1351 ret = 0;
1323 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1352 if (sk == rtnl_dereference(mrt->mroute_sk)) {
@@ -1325,7 +1354,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1325 } else { 1354 } else {
1326 if (!ipmr_new_table(net, v)) 1355 if (!ipmr_new_table(net, v))
1327 ret = -ENOMEM; 1356 ret = -ENOMEM;
1328 raw_sk(sk)->ipmr_table = v; 1357 else
1358 raw_sk(sk)->ipmr_table = v;
1329 } 1359 }
1330 rtnl_unlock(); 1360 rtnl_unlock();
1331 return ret; 1361 return ret;
@@ -1351,6 +1381,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1351 struct net *net = sock_net(sk); 1381 struct net *net = sock_net(sk);
1352 struct mr_table *mrt; 1382 struct mr_table *mrt;
1353 1383
1384 if (sk->sk_type != SOCK_RAW ||
1385 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1386 return -EOPNOTSUPP;
1387
1354 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1388 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1355 if (mrt == NULL) 1389 if (mrt == NULL)
1356 return -ENOENT; 1390 return -ENOENT;
@@ -2020,6 +2054,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2020 int ct; 2054 int ct;
2021 struct rtnexthop *nhp; 2055 struct rtnexthop *nhp;
2022 struct nlattr *mp_attr; 2056 struct nlattr *mp_attr;
2057 struct rta_mfc_stats mfcs;
2023 2058
2024 /* If cache is unresolved, don't try to parse IIF and OIF */ 2059 /* If cache is unresolved, don't try to parse IIF and OIF */
2025 if (c->mfc_parent >= MAXVIFS) 2060 if (c->mfc_parent >= MAXVIFS)
@@ -2048,6 +2083,12 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2048 2083
2049 nla_nest_end(skb, mp_attr); 2084 nla_nest_end(skb, mp_attr);
2050 2085
2086 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2087 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2088 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2089 if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2090 return -EMSGSIZE;
2091
2051 rtm->rtm_type = RTN_MULTICAST; 2092 rtm->rtm_type = RTN_MULTICAST;
2052 return 1; 2093 return 1;
2053} 2094}
@@ -2117,12 +2158,13 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
2117} 2158}
2118 2159
2119static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2160static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2120 u32 portid, u32 seq, struct mfc_cache *c) 2161 u32 portid, u32 seq, struct mfc_cache *c, int cmd)
2121{ 2162{
2122 struct nlmsghdr *nlh; 2163 struct nlmsghdr *nlh;
2123 struct rtmsg *rtm; 2164 struct rtmsg *rtm;
2165 int err;
2124 2166
2125 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2167 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2126 if (nlh == NULL) 2168 if (nlh == NULL)
2127 return -EMSGSIZE; 2169 return -EMSGSIZE;
2128 2170
@@ -2136,13 +2178,18 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2136 goto nla_put_failure; 2178 goto nla_put_failure;
2137 rtm->rtm_type = RTN_MULTICAST; 2179 rtm->rtm_type = RTN_MULTICAST;
2138 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2180 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2139 rtm->rtm_protocol = RTPROT_UNSPEC; 2181 if (c->mfc_flags & MFC_STATIC)
2182 rtm->rtm_protocol = RTPROT_STATIC;
2183 else
2184 rtm->rtm_protocol = RTPROT_MROUTED;
2140 rtm->rtm_flags = 0; 2185 rtm->rtm_flags = 0;
2141 2186
2142 if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || 2187 if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
2143 nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) 2188 nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
2144 goto nla_put_failure; 2189 goto nla_put_failure;
2145 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2190 err = __ipmr_fill_mroute(mrt, skb, c, rtm);
2191 /* do not break the dump if cache is unresolved */
2192 if (err < 0 && err != -ENOENT)
2146 goto nla_put_failure; 2193 goto nla_put_failure;
2147 2194
2148 return nlmsg_end(skb, nlh); 2195 return nlmsg_end(skb, nlh);
@@ -2152,6 +2199,52 @@ nla_put_failure:
2152 return -EMSGSIZE; 2199 return -EMSGSIZE;
2153} 2200}
2154 2201
2202static size_t mroute_msgsize(bool unresolved, int maxvif)
2203{
2204 size_t len =
2205 NLMSG_ALIGN(sizeof(struct rtmsg))
2206 + nla_total_size(4) /* RTA_TABLE */
2207 + nla_total_size(4) /* RTA_SRC */
2208 + nla_total_size(4) /* RTA_DST */
2209 ;
2210
2211 if (!unresolved)
2212 len = len
2213 + nla_total_size(4) /* RTA_IIF */
2214 + nla_total_size(0) /* RTA_MULTIPATH */
2215 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2216 /* RTA_MFC_STATS */
2217 + nla_total_size(sizeof(struct rta_mfc_stats))
2218 ;
2219
2220 return len;
2221}
2222
2223static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2224 int cmd)
2225{
2226 struct net *net = read_pnet(&mrt->net);
2227 struct sk_buff *skb;
2228 int err = -ENOBUFS;
2229
2230 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
2231 GFP_ATOMIC);
2232 if (skb == NULL)
2233 goto errout;
2234
2235 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2236 if (err < 0)
2237 goto errout;
2238
2239 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2240 return;
2241
2242errout:
2243 kfree_skb(skb);
2244 if (err < 0)
2245 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2246}
2247
2155static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2248static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2156{ 2249{
2157 struct net *net = sock_net(skb->sk); 2250 struct net *net = sock_net(skb->sk);
@@ -2178,13 +2271,29 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2178 if (ipmr_fill_mroute(mrt, skb, 2271 if (ipmr_fill_mroute(mrt, skb,
2179 NETLINK_CB(cb->skb).portid, 2272 NETLINK_CB(cb->skb).portid,
2180 cb->nlh->nlmsg_seq, 2273 cb->nlh->nlmsg_seq,
2181 mfc) < 0) 2274 mfc, RTM_NEWROUTE) < 0)
2182 goto done; 2275 goto done;
2183next_entry: 2276next_entry:
2184 e++; 2277 e++;
2185 } 2278 }
2186 e = s_e = 0; 2279 e = s_e = 0;
2187 } 2280 }
2281 spin_lock_bh(&mfc_unres_lock);
2282 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
2283 if (e < s_e)
2284 goto next_entry2;
2285 if (ipmr_fill_mroute(mrt, skb,
2286 NETLINK_CB(cb->skb).portid,
2287 cb->nlh->nlmsg_seq,
2288 mfc, RTM_NEWROUTE) < 0) {
2289 spin_unlock_bh(&mfc_unres_lock);
2290 goto done;
2291 }
2292next_entry2:
2293 e++;
2294 }
2295 spin_unlock_bh(&mfc_unres_lock);
2296 e = s_e = 0;
2188 s_h = 0; 2297 s_h = 0;
2189next_table: 2298next_table:
2190 t++; 2299 t++;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 97e61eadf580..3ea4127404d6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1533,7 +1533,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1533{ 1533{
1534 int ret; 1534 int ret;
1535 1535
1536 if (!capable(CAP_NET_ADMIN)) 1536 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1537 return -EPERM; 1537 return -EPERM;
1538 1538
1539 switch (cmd) { 1539 switch (cmd) {
@@ -1677,7 +1677,7 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1677{ 1677{
1678 int ret; 1678 int ret;
1679 1679
1680 if (!capable(CAP_NET_ADMIN)) 1680 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1681 return -EPERM; 1681 return -EPERM;
1682 1682
1683 switch (cmd) { 1683 switch (cmd) {
@@ -1698,7 +1698,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1698{ 1698{
1699 int ret; 1699 int ret;
1700 1700
1701 if (!capable(CAP_NET_ADMIN)) 1701 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1702 return -EPERM; 1702 return -EPERM;
1703 1703
1704 switch (cmd) { 1704 switch (cmd) {
@@ -1722,7 +1722,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1722{ 1722{
1723 int ret; 1723 int ret;
1724 1724
1725 if (!capable(CAP_NET_ADMIN)) 1725 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1726 return -EPERM; 1726 return -EPERM;
1727 1727
1728 switch (cmd) { 1728 switch (cmd) {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 170b1fdd6b72..17c5e06da662 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1846,7 +1846,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1846{ 1846{
1847 int ret; 1847 int ret;
1848 1848
1849 if (!capable(CAP_NET_ADMIN)) 1849 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1850 return -EPERM; 1850 return -EPERM;
1851 1851
1852 switch (cmd) { 1852 switch (cmd) {
@@ -1961,7 +1961,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1961{ 1961{
1962 int ret; 1962 int ret;
1963 1963
1964 if (!capable(CAP_NET_ADMIN)) 1964 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1965 return -EPERM; 1965 return -EPERM;
1966 1966
1967 switch (cmd) { 1967 switch (cmd) {
@@ -1983,7 +1983,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1983{ 1983{
1984 int ret; 1984 int ret;
1985 1985
1986 if (!capable(CAP_NET_ADMIN)) 1986 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1987 return -EPERM; 1987 return -EPERM;
1988 1988
1989 switch (cmd) { 1989 switch (cmd) {
@@ -2008,7 +2008,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2008{ 2008{
2009 int ret; 2009 int ret;
2010 2010
2011 if (!capable(CAP_NET_ADMIN)) 2011 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2012 return -EPERM; 2012 return -EPERM;
2013 2013
2014 switch (cmd) { 2014 switch (cmd) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fe5daea5214d..75e33a7048f8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -661,6 +661,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
661#define PROC_WRITELEN 10 661#define PROC_WRITELEN 10
662 char buffer[PROC_WRITELEN+1]; 662 char buffer[PROC_WRITELEN+1];
663 unsigned long nodenum; 663 unsigned long nodenum;
664 int rc;
664 665
665 if (size > PROC_WRITELEN) 666 if (size > PROC_WRITELEN)
666 return -EIO; 667 return -EIO;
@@ -669,11 +670,15 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
669 buffer[size] = 0; 670 buffer[size] = 0;
670 671
671 if (*buffer == '+') { 672 if (*buffer == '+') {
672 nodenum = simple_strtoul(buffer+1, NULL, 10); 673 rc = kstrtoul(buffer+1, 10, &nodenum);
674 if (rc)
675 return rc;
673 if (clusterip_add_node(c, nodenum)) 676 if (clusterip_add_node(c, nodenum))
674 return -ENOMEM; 677 return -ENOMEM;
675 } else if (*buffer == '-') { 678 } else if (*buffer == '-') {
676 nodenum = simple_strtoul(buffer+1, NULL,10); 679 rc = kstrtoul(buffer+1, 10, &nodenum);
680 if (rc)
681 return rc;
677 if (clusterip_del_node(c, nodenum)) 682 if (clusterip_del_node(c, nodenum))
678 return -ENOENT; 683 return -ENOENT;
679 } else 684 } else
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 51f13f8ec724..04b18c1ac345 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
81 niph->saddr = oiph->daddr; 81 niph->saddr = oiph->daddr;
82 niph->daddr = oiph->saddr; 82 niph->daddr = oiph->saddr;
83 83
84 skb_reset_transport_header(nskb);
84 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); 85 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
85 memset(tcph, 0, sizeof(*tcph)); 86 memset(tcph, 0, sizeof(*tcph));
86 tcph->source = oth->dest; 87 tcph->source = oth->dest;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 9e0ffaf1d942..eeaff7e4acb5 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -124,19 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum,
124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct); 124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125 if (ret != NF_ACCEPT) 125 if (ret != NF_ACCEPT)
126 return ret; 126 return ret;
127 } else 127 } else {
128 pr_debug("Already setup manip %s for ct %p\n", 128 pr_debug("Already setup manip %s for ct %p\n",
129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 ct); 130 ct);
131 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
132 goto oif_changed;
133 }
131 break; 134 break;
132 135
133 default: 136 default:
134 /* ESTABLISHED */ 137 /* ESTABLISHED */
135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 ctinfo == IP_CT_ESTABLISHED_REPLY); 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
141 goto oif_changed;
137 } 142 }
138 143
139 return nf_nat_packet(ct, ctinfo, hooknum, skb); 144 return nf_nat_packet(ct, ctinfo, hooknum, skb);
145
146oif_changed:
147 nf_ct_kill_acct(ct, ctinfo, skb);
148 return NF_DROP;
140} 149}
141 150
142static unsigned int 151static unsigned int
@@ -184,7 +193,8 @@ nf_nat_ipv4_out(unsigned int hooknum,
184 193
185 if ((ct->tuplehash[dir].tuple.src.u3.ip != 194 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
186 ct->tuplehash[!dir].tuple.dst.u3.ip) || 195 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
187 (ct->tuplehash[dir].tuple.src.u.all != 196 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
197 ct->tuplehash[dir].tuple.src.u.all !=
188 ct->tuplehash[!dir].tuple.dst.u.all)) 198 ct->tuplehash[!dir].tuple.dst.u.all))
189 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 199 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
190 ret = NF_DROP; 200 ret = NF_DROP;
@@ -221,6 +231,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
221 } 231 }
222#ifdef CONFIG_XFRM 232#ifdef CONFIG_XFRM
223 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 233 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
234 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
224 ct->tuplehash[dir].tuple.dst.u.all != 235 ct->tuplehash[dir].tuple.dst.u.all !=
225 ct->tuplehash[!dir].tuple.src.u.all) 236 ct->tuplehash[!dir].tuple.src.u.all)
226 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 237 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
@@ -274,9 +285,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
274 return -ENOMEM; 285 return -ENOMEM;
275 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 286 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
276 kfree(repl); 287 kfree(repl);
277 if (IS_ERR(net->ipv4.nat_table)) 288 return PTR_RET(net->ipv4.nat_table);
278 return PTR_ERR(net->ipv4.nat_table);
279 return 0;
280} 289}
281 290
282static void __net_exit iptable_nat_net_exit(struct net *net) 291static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 8918eff1426d..0f9d09f54bd9 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -29,6 +29,7 @@
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
32 33
33/* 34/*
34 * Add a protocol handler to the hash tables 35 * Add a protocol handler to the hash tables
@@ -41,6 +42,13 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
41} 42}
42EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
43 44
45int inet_add_offload(const struct net_offload *prot, unsigned char protocol)
46{
47 return !cmpxchg((const struct net_offload **)&inet_offloads[protocol],
48 NULL, prot) ? 0 : -1;
49}
50EXPORT_SYMBOL(inet_add_offload);
51
44/* 52/*
45 * Remove a protocol from the hash tables. 53 * Remove a protocol from the hash tables.
46 */ 54 */
@@ -57,3 +65,16 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
57 return ret; 65 return ret;
58} 66}
59EXPORT_SYMBOL(inet_del_protocol); 67EXPORT_SYMBOL(inet_del_protocol);
68
69int inet_del_offload(const struct net_offload *prot, unsigned char protocol)
70{
71 int ret;
72
73 ret = (cmpxchg((const struct net_offload **)&inet_offloads[protocol],
74 prot, NULL) == prot) ? 0 : -1;
75
76 synchronize_net();
77
78 return ret;
79}
80EXPORT_SYMBOL(inet_del_offload);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a8c651216fa6..844a9ef60dbd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1785,6 +1785,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1785 if (dev_out->flags & IFF_LOOPBACK) 1785 if (dev_out->flags & IFF_LOOPBACK)
1786 flags |= RTCF_LOCAL; 1786 flags |= RTCF_LOCAL;
1787 1787
1788 do_cache = true;
1788 if (type == RTN_BROADCAST) { 1789 if (type == RTN_BROADCAST) {
1789 flags |= RTCF_BROADCAST | RTCF_LOCAL; 1790 flags |= RTCF_BROADCAST | RTCF_LOCAL;
1790 fi = NULL; 1791 fi = NULL;
@@ -1793,6 +1794,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1793 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, 1794 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1794 fl4->flowi4_proto)) 1795 fl4->flowi4_proto))
1795 flags &= ~RTCF_LOCAL; 1796 flags &= ~RTCF_LOCAL;
1797 else
1798 do_cache = false;
1796 /* If multicast route do not exist use 1799 /* If multicast route do not exist use
1797 * default one, but do not gateway in this case. 1800 * default one, but do not gateway in this case.
1798 * Yes, it is hack. 1801 * Yes, it is hack.
@@ -1802,8 +1805,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1802 } 1805 }
1803 1806
1804 fnhe = NULL; 1807 fnhe = NULL;
1805 do_cache = fi != NULL; 1808 do_cache &= fi != NULL;
1806 if (fi) { 1809 if (do_cache) {
1807 struct rtable __rcu **prth; 1810 struct rtable __rcu **prth;
1808 struct fib_nh *nh = &FIB_RES_NH(*res); 1811 struct fib_nh *nh = &FIB_RES_NH(*res);
1809 1812
@@ -2229,8 +2232,27 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2229 error = rt->dst.error; 2232 error = rt->dst.error;
2230 2233
2231 if (rt_is_input_route(rt)) { 2234 if (rt_is_input_route(rt)) {
2232 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) 2235#ifdef CONFIG_IP_MROUTE
2233 goto nla_put_failure; 2236 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2237 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2238 int err = ipmr_get_route(net, skb,
2239 fl4->saddr, fl4->daddr,
2240 r, nowait);
2241 if (err <= 0) {
2242 if (!nowait) {
2243 if (err == 0)
2244 return 0;
2245 goto nla_put_failure;
2246 } else {
2247 if (err == -EMSGSIZE)
2248 goto nla_put_failure;
2249 error = err;
2250 }
2251 }
2252 } else
2253#endif
2254 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2255 goto nla_put_failure;
2234 } 2256 }
2235 2257
2236 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) 2258 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
@@ -2493,6 +2515,10 @@ static __net_init int sysctl_route_net_init(struct net *net)
2493 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); 2515 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2494 if (tbl == NULL) 2516 if (tbl == NULL)
2495 goto err_dup; 2517 goto err_dup;
2518
2519 /* Don't export sysctls to unprivileged users */
2520 if (net->user_ns != &init_user_ns)
2521 tbl[0].procname = NULL;
2496 } 2522 }
2497 tbl[0].extra1 = net; 2523 tbl[0].extra1 = net;
2498 2524
@@ -2597,7 +2623,7 @@ int __init ip_rt_init(void)
2597 pr_err("Unable to create route proc files\n"); 2623 pr_err("Unable to create route proc files\n");
2598#ifdef CONFIG_XFRM 2624#ifdef CONFIG_XFRM
2599 xfrm_init(); 2625 xfrm_init();
2600 xfrm4_init(ip_rt_max_size); 2626 xfrm4_init();
2601#endif 2627#endif
2602 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); 2628 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
2603 2629
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ba48e799b031..b236ef04914f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
340 } 340 }
341 341
342 req->expires = 0UL; 342 req->expires = 0UL;
343 req->retrans = 0; 343 req->num_retrans = 0;
344 344
345 /* 345 /*
346 * We need to lookup the route here to get at the correct 346 * We need to lookup the route here to get at the correct
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 63d4eccc674d..d84400b65049 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,9 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
883 table[6].data = 883 table[6].data =
884 &net->ipv4.sysctl_ping_group_range; 884 &net->ipv4.sysctl_ping_group_range;
885 885
886 /* Don't export sysctls to unprivileged users */
887 if (net->user_ns != &init_user_ns)
888 table[0].procname = NULL;
886 } 889 }
887 890
888 /* 891 /*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197c0008503c..2aa69c8ae60c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
536{ 536{
537 struct tcp_sock *tp = tcp_sk(sk); 537 struct tcp_sock *tp = tcp_sk(sk);
538 int answ; 538 int answ;
539 bool slow;
539 540
540 switch (cmd) { 541 switch (cmd) {
541 case SIOCINQ: 542 case SIOCINQ:
542 if (sk->sk_state == TCP_LISTEN) 543 if (sk->sk_state == TCP_LISTEN)
543 return -EINVAL; 544 return -EINVAL;
544 545
545 lock_sock(sk); 546 slow = lock_sock_fast(sk);
546 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 547 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
547 answ = 0; 548 answ = 0;
548 else if (sock_flag(sk, SOCK_URGINLINE) || 549 else if (sock_flag(sk, SOCK_URGINLINE) ||
@@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
557 answ--; 558 answ--;
558 } else 559 } else
559 answ = tp->urg_seq - tp->copied_seq; 560 answ = tp->urg_seq - tp->copied_seq;
560 release_sock(sk); 561 unlock_sock_fast(sk, slow);
561 break; 562 break;
562 case SIOCATMARK: 563 case SIOCATMARK:
563 answ = tp->urg_data && tp->urg_seq == tp->copied_seq; 564 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
@@ -830,8 +831,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
830 return mss_now; 831 return mss_now;
831} 832}
832 833
833static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, 834static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
834 size_t psize, int flags) 835 size_t size, int flags)
835{ 836{
836 struct tcp_sock *tp = tcp_sk(sk); 837 struct tcp_sock *tp = tcp_sk(sk);
837 int mss_now, size_goal; 838 int mss_now, size_goal;
@@ -858,12 +859,9 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
858 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 859 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
859 goto out_err; 860 goto out_err;
860 861
861 while (psize > 0) { 862 while (size > 0) {
862 struct sk_buff *skb = tcp_write_queue_tail(sk); 863 struct sk_buff *skb = tcp_write_queue_tail(sk);
863 struct page *page = pages[poffset / PAGE_SIZE];
864 int copy, i; 864 int copy, i;
865 int offset = poffset % PAGE_SIZE;
866 int size = min_t(size_t, psize, PAGE_SIZE - offset);
867 bool can_coalesce; 865 bool can_coalesce;
868 866
869 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { 867 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
@@ -912,8 +910,8 @@ new_segment:
912 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; 910 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
913 911
914 copied += copy; 912 copied += copy;
915 poffset += copy; 913 offset += copy;
916 if (!(psize -= copy)) 914 if (!(size -= copy))
917 goto out; 915 goto out;
918 916
919 if (skb->len < size_goal || (flags & MSG_OOB)) 917 if (skb->len < size_goal || (flags & MSG_OOB))
@@ -960,7 +958,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
960 flags); 958 flags);
961 959
962 lock_sock(sk); 960 lock_sock(sk);
963 res = do_tcp_sendpages(sk, &page, offset, size, flags); 961 res = do_tcp_sendpages(sk, page, offset, size, flags);
964 release_sock(sk); 962 release_sock(sk);
965 return res; 963 return res;
966} 964}
@@ -1212,7 +1210,7 @@ new_segment:
1212wait_for_sndbuf: 1210wait_for_sndbuf:
1213 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1211 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1214wait_for_memory: 1212wait_for_memory:
1215 if (copied && likely(!tp->repair)) 1213 if (copied)
1216 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1214 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1217 1215
1218 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1216 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1223,7 +1221,7 @@ wait_for_memory:
1223 } 1221 }
1224 1222
1225out: 1223out:
1226 if (copied && likely(!tp->repair)) 1224 if (copied)
1227 tcp_push(sk, flags, mss_now, tp->nonagle); 1225 tcp_push(sk, flags, mss_now, tp->nonagle);
1228 release_sock(sk); 1226 release_sock(sk);
1229 return copied + copied_syn; 1227 return copied + copied_syn;
@@ -1430,12 +1428,12 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
1430} 1428}
1431#endif 1429#endif
1432 1430
1433static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) 1431static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1434{ 1432{
1435 struct sk_buff *skb; 1433 struct sk_buff *skb;
1436 u32 offset; 1434 u32 offset;
1437 1435
1438 skb_queue_walk(&sk->sk_receive_queue, skb) { 1436 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
1439 offset = seq - TCP_SKB_CB(skb)->seq; 1437 offset = seq - TCP_SKB_CB(skb)->seq;
1440 if (tcp_hdr(skb)->syn) 1438 if (tcp_hdr(skb)->syn)
1441 offset--; 1439 offset--;
@@ -1443,6 +1441,11 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1443 *off = offset; 1441 *off = offset;
1444 return skb; 1442 return skb;
1445 } 1443 }
1444 /* This looks weird, but this can happen if TCP collapsing
1445 * splitted a fat GRO packet, while we released socket lock
1446 * in skb_splice_bits()
1447 */
1448 sk_eat_skb(sk, skb, false);
1446 } 1449 }
1447 return NULL; 1450 return NULL;
1448} 1451}
@@ -1484,7 +1487,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1484 break; 1487 break;
1485 } 1488 }
1486 used = recv_actor(desc, skb, offset, len); 1489 used = recv_actor(desc, skb, offset, len);
1487 if (used < 0) { 1490 if (used <= 0) {
1488 if (!copied) 1491 if (!copied)
1489 copied = used; 1492 copied = used;
1490 break; 1493 break;
@@ -1493,15 +1496,19 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1493 copied += used; 1496 copied += used;
1494 offset += used; 1497 offset += used;
1495 } 1498 }
1496 /* 1499 /* If recv_actor drops the lock (e.g. TCP splice
1497 * If recv_actor drops the lock (e.g. TCP splice
1498 * receive) the skb pointer might be invalid when 1500 * receive) the skb pointer might be invalid when
1499 * getting here: tcp_collapse might have deleted it 1501 * getting here: tcp_collapse might have deleted it
1500 * while aggregating skbs from the socket queue. 1502 * while aggregating skbs from the socket queue.
1501 */ 1503 */
1502 skb = tcp_recv_skb(sk, seq-1, &offset); 1504 skb = tcp_recv_skb(sk, seq - 1, &offset);
1503 if (!skb || (offset+1 != skb->len)) 1505 if (!skb)
1504 break; 1506 break;
1507 /* TCP coalescing might have appended data to the skb.
1508 * Try to splice more frags
1509 */
1510 if (offset + 1 != skb->len)
1511 continue;
1505 } 1512 }
1506 if (tcp_hdr(skb)->fin) { 1513 if (tcp_hdr(skb)->fin) {
1507 sk_eat_skb(sk, skb, false); 1514 sk_eat_skb(sk, skb, false);
@@ -1518,8 +1525,10 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1518 tcp_rcv_space_adjust(sk); 1525 tcp_rcv_space_adjust(sk);
1519 1526
1520 /* Clean up data we have read: This will do ACK frames. */ 1527 /* Clean up data we have read: This will do ACK frames. */
1521 if (copied > 0) 1528 if (copied > 0) {
1529 tcp_recv_skb(sk, seq, &offset);
1522 tcp_cleanup_rbuf(sk, copied); 1530 tcp_cleanup_rbuf(sk, copied);
1531 }
1523 return copied; 1532 return copied;
1524} 1533}
1525EXPORT_SYMBOL(tcp_read_sock); 1534EXPORT_SYMBOL(tcp_read_sock);
@@ -2303,7 +2312,7 @@ void tcp_sock_destruct(struct sock *sk)
2303 2312
2304static inline bool tcp_can_repair_sock(const struct sock *sk) 2313static inline bool tcp_can_repair_sock(const struct sock *sk)
2305{ 2314{
2306 return capable(CAP_NET_ADMIN) && 2315 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
2307 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); 2316 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2308} 2317}
2309 2318
@@ -3589,8 +3598,7 @@ void __init tcp_init(void)
3589 alloc_large_system_hash("TCP established", 3598 alloc_large_system_hash("TCP established",
3590 sizeof(struct inet_ehash_bucket), 3599 sizeof(struct inet_ehash_bucket),
3591 thash_entries, 3600 thash_entries,
3592 (totalram_pages >= 128 * 1024) ? 3601 17, /* one slot per 128 KB of memory */
3593 13 : 15,
3594 0, 3602 0,
3595 NULL, 3603 NULL,
3596 &tcp_hashinfo.ehash_mask, 3604 &tcp_hashinfo.ehash_mask,
@@ -3606,8 +3614,7 @@ void __init tcp_init(void)
3606 alloc_large_system_hash("TCP bind", 3614 alloc_large_system_hash("TCP bind",
3607 sizeof(struct inet_bind_hashbucket), 3615 sizeof(struct inet_bind_hashbucket),
3608 tcp_hashinfo.ehash_mask + 1, 3616 tcp_hashinfo.ehash_mask + 1,
3609 (totalram_pages >= 128 * 1024) ? 3617 17, /* one slot per 128 KB of memory */
3610 13 : 15,
3611 0, 3618 0,
3612 &tcp_hashinfo.bhash_size, 3619 &tcp_hashinfo.bhash_size,
3613 NULL, 3620 NULL,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1432cdb0644c..291f2ed7cc31 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Plugable TCP congestion control support and newReno 2 * Plugable TCP congestion control support and newReno
3 * congestion control. 3 * congestion control.
4 * Based on ideas from I/O scheduler suport and Web100. 4 * Based on ideas from I/O scheduler support and Web100.
5 * 5 *
6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
7 */ 7 */
@@ -259,7 +259,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
259 if (!ca) 259 if (!ca)
260 err = -ENOENT; 260 err = -ENOENT;
261 261
262 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) 262 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
263 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
263 err = -EPERM; 264 err = -EPERM;
264 265
265 else if (!try_module_get(ca->owner)) 266 else if (!try_module_get(ca->owner))
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 813b43a76fec..834857f3c871 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
313 .tcpv_rttcnt = ca->cnt_rtt, 313 .tcpv_rttcnt = ca->cnt_rtt,
314 .tcpv_minrtt = ca->base_rtt, 314 .tcpv_minrtt = ca->base_rtt,
315 }; 315 };
316 u64 t = ca->sum_rtt;
317 316
318 do_div(t, ca->cnt_rtt); 317 if (info.tcpv_rttcnt > 0) {
319 info.tcpv_rtt = t; 318 u64 t = ca->sum_rtt;
320 319
320 do_div(t, info.tcpv_rttcnt);
321 info.tcpv_rtt = t;
322 }
321 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); 323 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
322 } 324 }
323} 325}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1db663983587..18f97ca76b00 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3552 return false; 3552 return false;
3553} 3553}
3554 3554
3555/* RFC 5961 7 [ACK Throttling] */
3556static void tcp_send_challenge_ack(struct sock *sk)
3557{
3558 /* unprotected vars, we dont care of overwrites */
3559 static u32 challenge_timestamp;
3560 static unsigned int challenge_count;
3561 u32 now = jiffies / HZ;
3562
3563 if (now != challenge_timestamp) {
3564 challenge_timestamp = now;
3565 challenge_count = 0;
3566 }
3567 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3568 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3569 tcp_send_ack(sk);
3570 }
3571}
3572
3555/* This routine deals with incoming acks, but not outgoing ones. */ 3573/* This routine deals with incoming acks, but not outgoing ones. */
3556static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3574static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3557{ 3575{
@@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3571 /* If the ack is older than previous acks 3589 /* If the ack is older than previous acks
3572 * then we can probably ignore it. 3590 * then we can probably ignore it.
3573 */ 3591 */
3574 if (before(ack, prior_snd_una)) 3592 if (before(ack, prior_snd_una)) {
3593 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3594 if (before(ack, prior_snd_una - tp->max_window)) {
3595 tcp_send_challenge_ack(sk);
3596 return -1;
3597 }
3575 goto old_ack; 3598 goto old_ack;
3599 }
3576 3600
3577 /* If the ack includes data we haven't sent yet, discard 3601 /* If the ack includes data we haven't sent yet, discard
3578 * this segment (RFC793 Section 3.9). 3602 * this segment (RFC793 Section 3.9).
@@ -4529,6 +4553,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4529 struct tcphdr *th; 4553 struct tcphdr *th;
4530 bool fragstolen; 4554 bool fragstolen;
4531 4555
4556 if (size == 0)
4557 return 0;
4558
4532 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); 4559 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4533 if (!skb) 4560 if (!skb)
4534 goto err; 4561 goto err;
@@ -5241,23 +5268,6 @@ out:
5241} 5268}
5242#endif /* CONFIG_NET_DMA */ 5269#endif /* CONFIG_NET_DMA */
5243 5270
5244static void tcp_send_challenge_ack(struct sock *sk)
5245{
5246 /* unprotected vars, we dont care of overwrites */
5247 static u32 challenge_timestamp;
5248 static unsigned int challenge_count;
5249 u32 now = jiffies / HZ;
5250
5251 if (now != challenge_timestamp) {
5252 challenge_timestamp = now;
5253 challenge_count = 0;
5254 }
5255 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
5256 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
5257 tcp_send_ack(sk);
5258 }
5259}
5260
5261/* Does PAWS and seqno based validation of an incoming segment, flags will 5271/* Does PAWS and seqno based validation of an incoming segment, flags will
5262 * play significant role here. 5272 * play significant role here.
5263 */ 5273 */
@@ -5310,11 +5320,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5310 goto discard; 5320 goto discard;
5311 } 5321 }
5312 5322
5313 /* ts_recent update must be made after we are sure that the packet
5314 * is in window.
5315 */
5316 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5317
5318 /* step 3: check security and precedence [ignored] */ 5323 /* step 3: check security and precedence [ignored] */
5319 5324
5320 /* step 4: Check for a SYN 5325 /* step 4: Check for a SYN
@@ -5538,6 +5543,9 @@ slow_path:
5538 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) 5543 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5539 goto csum_error; 5544 goto csum_error;
5540 5545
5546 if (!th->ack && !th->rst)
5547 goto discard;
5548
5541 /* 5549 /*
5542 * Standard slow path. 5550 * Standard slow path.
5543 */ 5551 */
@@ -5546,9 +5554,14 @@ slow_path:
5546 return 0; 5554 return 0;
5547 5555
5548step5: 5556step5:
5549 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) 5557 if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5550 goto discard; 5558 goto discard;
5551 5559
5560 /* ts_recent update must be made after we are sure that the packet
5561 * is in window.
5562 */
5563 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5564
5552 tcp_rcv_rtt_measure_ts(sk, skb); 5565 tcp_rcv_rtt_measure_ts(sk, skb);
5553 5566
5554 /* Process urgent data. */ 5567 /* Process urgent data. */
@@ -5642,7 +5655,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5642 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); 5655 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5643 5656
5644 if (data) { /* Retransmit unacked data in SYN */ 5657 if (data) { /* Retransmit unacked data in SYN */
5645 tcp_retransmit_skb(sk, data); 5658 tcp_for_write_queue_from(data, sk) {
5659 if (data == tcp_send_head(sk) ||
5660 __tcp_retransmit_skb(sk, data))
5661 break;
5662 }
5646 tcp_rearm_rto(sk); 5663 tcp_rearm_rto(sk);
5647 return true; 5664 return true;
5648 } 5665 }
@@ -5970,11 +5987,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5970 if (tcp_check_req(sk, skb, req, NULL, true) == NULL) 5987 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
5971 goto discard; 5988 goto discard;
5972 } 5989 }
5990
5991 if (!th->ack && !th->rst)
5992 goto discard;
5993
5973 if (!tcp_validate_incoming(sk, skb, th, 0)) 5994 if (!tcp_validate_incoming(sk, skb, th, 0))
5974 return 0; 5995 return 0;
5975 5996
5976 /* step 5: check the ACK field */ 5997 /* step 5: check the ACK field */
5977 if (th->ack) { 5998 if (true) {
5978 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; 5999 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5979 6000
5980 switch (sk->sk_state) { 6001 switch (sk->sk_state) {
@@ -5985,7 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5985 */ 6006 */
5986 if (req) { 6007 if (req) {
5987 tcp_synack_rtt_meas(sk, req); 6008 tcp_synack_rtt_meas(sk, req);
5988 tp->total_retrans = req->retrans; 6009 tp->total_retrans = req->num_retrans;
5989 6010
5990 reqsk_fastopen_remove(sk, req, false); 6011 reqsk_fastopen_remove(sk, req, false);
5991 } else { 6012 } else {
@@ -6124,8 +6145,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6124 } 6145 }
6125 break; 6146 break;
6126 } 6147 }
6127 } else 6148 }
6128 goto discard; 6149
6150 /* ts_recent update must be made after we are sure that the packet
6151 * is in window.
6152 */
6153 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
6129 6154
6130 /* step 6: check the URG bit */ 6155 /* step 6: check the URG bit */
6131 tcp_urg(sk, skb, th); 6156 tcp_urg(sk, skb, th);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c4a64355603..54139fa514e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,14 +138,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138} 138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique); 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
149/* This will initiate an outgoing connection. */ 141/* This will initiate an outgoing connection. */
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{ 143{
@@ -250,10 +242,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
250 242
251 inet->inet_id = tp->write_seq ^ jiffies; 243 inet->inet_id = tp->write_seq ^ jiffies;
252 244
253 if (likely(!tp->repair)) 245 err = tcp_connect(sk);
254 err = tcp_connect(sk);
255 else
256 err = tcp_repair_connect(sk);
257 246
258 rt = NULL; 247 rt = NULL;
259 if (err) 248 if (err)
@@ -877,10 +866,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
877} 866}
878 867
879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 868static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
880 struct request_values *rvp) 869 struct request_values *rvp)
881{ 870{
882 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 871 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
883 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 872
873 if (!res)
874 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
875 return res;
884} 876}
885 877
886/* 878/*
@@ -1070,7 +1062,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1070} 1062}
1071EXPORT_SYMBOL(tcp_md5_do_del); 1063EXPORT_SYMBOL(tcp_md5_do_del);
1072 1064
1073void tcp_clear_md5_list(struct sock *sk) 1065static void tcp_clear_md5_list(struct sock *sk)
1074{ 1066{
1075 struct tcp_sock *tp = tcp_sk(sk); 1067 struct tcp_sock *tp = tcp_sk(sk);
1076 struct tcp_md5sig_key *key; 1068 struct tcp_md5sig_key *key;
@@ -1386,7 +1378,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1386 struct sock *child; 1378 struct sock *child;
1387 int err; 1379 int err;
1388 1380
1389 req->retrans = 0; 1381 req->num_retrans = 0;
1382 req->num_timeout = 0;
1390 req->sk = NULL; 1383 req->sk = NULL;
1391 1384
1392 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 1385 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
@@ -1741,7 +1734,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1741 1734
1742 tcp_initialize_rcv_mss(newsk); 1735 tcp_initialize_rcv_mss(newsk);
1743 tcp_synack_rtt_meas(newsk, req); 1736 tcp_synack_rtt_meas(newsk, req);
1744 newtp->total_retrans = req->retrans; 1737 newtp->total_retrans = req->num_retrans;
1745 1738
1746#ifdef CONFIG_TCP_MD5SIG 1739#ifdef CONFIG_TCP_MD5SIG
1747 /* Copy over the MD5 key from the original socket */ 1740 /* Copy over the MD5 key from the original socket */
@@ -1774,10 +1767,8 @@ exit:
1774 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1767 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1775 return NULL; 1768 return NULL;
1776put_and_exit: 1769put_and_exit:
1777 tcp_clear_xmit_timers(newsk); 1770 inet_csk_prepare_forced_close(newsk);
1778 tcp_cleanup_congestion_control(newsk); 1771 tcp_done(newsk);
1779 bh_unlock_sock(newsk);
1780 sock_put(newsk);
1781 goto exit; 1772 goto exit;
1782} 1773}
1783EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1774EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
@@ -1919,7 +1910,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
1919 1910
1920void tcp_v4_early_demux(struct sk_buff *skb) 1911void tcp_v4_early_demux(struct sk_buff *skb)
1921{ 1912{
1922 struct net *net = dev_net(skb->dev);
1923 const struct iphdr *iph; 1913 const struct iphdr *iph;
1924 const struct tcphdr *th; 1914 const struct tcphdr *th;
1925 struct sock *sk; 1915 struct sock *sk;
@@ -1927,16 +1917,16 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1927 if (skb->pkt_type != PACKET_HOST) 1917 if (skb->pkt_type != PACKET_HOST)
1928 return; 1918 return;
1929 1919
1930 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) 1920 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1931 return; 1921 return;
1932 1922
1933 iph = ip_hdr(skb); 1923 iph = ip_hdr(skb);
1934 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); 1924 th = tcp_hdr(skb);
1935 1925
1936 if (th->doff < sizeof(struct tcphdr) / 4) 1926 if (th->doff < sizeof(struct tcphdr) / 4)
1937 return; 1927 return;
1938 1928
1939 sk = __inet_lookup_established(net, &tcp_hashinfo, 1929 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1940 iph->saddr, th->source, 1930 iph->saddr, th->source,
1941 iph->daddr, ntohs(th->dest), 1931 iph->daddr, ntohs(th->dest),
1942 skb->skb_iif); 1932 skb->skb_iif);
@@ -2640,7 +2630,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2640 0, 0, /* could print option size, but that is af dependent. */ 2630 0, 0, /* could print option size, but that is af dependent. */
2641 1, /* timers active (only the expire timer) */ 2631 1, /* timers active (only the expire timer) */
2642 jiffies_delta_to_clock_t(delta), 2632 jiffies_delta_to_clock_t(delta),
2643 req->retrans, 2633 req->num_timeout,
2644 from_kuid_munged(seq_user_ns(f), uid), 2634 from_kuid_munged(seq_user_ns(f), uid),
2645 0, /* non standard timer */ 2635 0, /* non standard timer */
2646 0, /* open_requests have no inode */ 2636 0, /* open_requests have no inode */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4c752a6e0bcd..f696d7c2e9fa 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,7 +1,6 @@
1#include <linux/rcupdate.h> 1#include <linux/rcupdate.h>
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <linux/jiffies.h> 3#include <linux/jiffies.h>
4#include <linux/bootmem.h>
5#include <linux/module.h> 4#include <linux/module.h>
6#include <linux/cache.h> 5#include <linux/cache.h>
7#include <linux/slab.h> 6#include <linux/slab.h>
@@ -9,6 +8,7 @@
9#include <linux/tcp.h> 8#include <linux/tcp.h>
10#include <linux/hash.h> 9#include <linux/hash.h>
11#include <linux/tcp_metrics.h> 10#include <linux/tcp_metrics.h>
11#include <linux/vmalloc.h>
12 12
13#include <net/inet_connection_sock.h> 13#include <net/inet_connection_sock.h>
14#include <net/net_namespace.h> 14#include <net/net_namespace.h>
@@ -864,7 +864,7 @@ static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
864 } 864 }
865 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; 865 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
866 if (a) { 866 if (a) {
867 if (nla_len(a) != sizeof(sizeof(struct in6_addr))) 867 if (nla_len(a) != sizeof(struct in6_addr))
868 return -EINVAL; 868 return -EINVAL;
869 addr->family = AF_INET6; 869 addr->family = AF_INET6;
870 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); 870 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
@@ -1034,7 +1034,10 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1034 net->ipv4.tcp_metrics_hash_log = order_base_2(slots); 1034 net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
1035 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; 1035 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
1036 1036
1037 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); 1037 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1038 if (!net->ipv4.tcp_metrics_hash)
1039 net->ipv4.tcp_metrics_hash = vzalloc(size);
1040
1038 if (!net->ipv4.tcp_metrics_hash) 1041 if (!net->ipv4.tcp_metrics_hash)
1039 return -ENOMEM; 1042 return -ENOMEM;
1040 1043
@@ -1055,7 +1058,10 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
1055 tm = next; 1058 tm = next;
1056 } 1059 }
1057 } 1060 }
1058 kfree(net->ipv4.tcp_metrics_hash); 1061 if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
1062 vfree(net->ipv4.tcp_metrics_hash);
1063 else
1064 kfree(net->ipv4.tcp_metrics_hash);
1059} 1065}
1060 1066
1061static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1067static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a7302d974f32..f35f2dfb6401 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -553,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
553 * it can be estimated (approximately) 553 * it can be estimated (approximately)
554 * from another data. 554 * from another data.
555 */ 555 */
556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst); 557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
558 } 558 }
559 } 559 }
@@ -582,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
582 * Note that even if there is new data in the SYN packet 582 * Note that even if there is new data in the SYN packet
583 * they will be thrown away too. 583 * they will be thrown away too.
584 */ 584 */
585 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 585 inet_rtx_syn_ack(sk, req);
586 return NULL; 586 return NULL;
587 } 587 }
588 588
@@ -696,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ 696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) 697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; 698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
699 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ 699 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
700 tcp_rsk(req)->snt_synack = 0; 700 tcp_rsk(req)->snt_synack = 0;
701 701
702 /* For Fast Open no more processing is needed (sk is the 702 /* For Fast Open no more processing is needed (sk is the
@@ -706,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
706 return sk; 706 return sk;
707 707
708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ 708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
709 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 709 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
711 inet_rsk(req)->acked = 1; 711 inet_rsk(req)->acked = 1;
712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe6ffe1c177..5d451593ef16 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1986,6 +1986,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1987 BUG_ON(!tso_segs); 1987 BUG_ON(!tso_segs);
1988 1988
1989 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1990 goto repair; /* Skip network transmission */
1991
1989 cwnd_quota = tcp_cwnd_test(tp, skb); 1992 cwnd_quota = tcp_cwnd_test(tp, skb);
1990 if (!cwnd_quota) 1993 if (!cwnd_quota)
1991 break; 1994 break;
@@ -2026,6 +2029,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2026 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) 2029 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2027 break; 2030 break;
2028 2031
2032repair:
2029 /* Advance the send_head. This one is sent out. 2033 /* Advance the send_head. This one is sent out.
2030 * This call will increment packets_out. 2034 * This call will increment packets_out.
2031 */ 2035 */
@@ -2305,12 +2309,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2305 * state updates are done by the caller. Returns non-zero if an 2309 * state updates are done by the caller. Returns non-zero if an
2306 * error occurred which prevented the send. 2310 * error occurred which prevented the send.
2307 */ 2311 */
2308int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 2312int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2309{ 2313{
2310 struct tcp_sock *tp = tcp_sk(sk); 2314 struct tcp_sock *tp = tcp_sk(sk);
2311 struct inet_connection_sock *icsk = inet_csk(sk); 2315 struct inet_connection_sock *icsk = inet_csk(sk);
2312 unsigned int cur_mss; 2316 unsigned int cur_mss;
2313 int err;
2314 2317
2315 /* Inconslusive MTU probe */ 2318 /* Inconslusive MTU probe */
2316 if (icsk->icsk_mtup.probe_size) { 2319 if (icsk->icsk_mtup.probe_size) {
@@ -2383,11 +2386,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2383 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { 2386 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2384 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2387 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2385 GFP_ATOMIC); 2388 GFP_ATOMIC);
2386 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2389 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2387 -ENOBUFS; 2390 -ENOBUFS;
2388 } else { 2391 } else {
2389 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2392 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2390 } 2393 }
2394}
2395
2396int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2397{
2398 struct tcp_sock *tp = tcp_sk(sk);
2399 int err = __tcp_retransmit_skb(sk, skb);
2391 2400
2392 if (err == 0) { 2401 if (err == 0) {
2393 /* Update global TCP statistics. */ 2402 /* Update global TCP statistics. */
@@ -2983,6 +2992,11 @@ int tcp_connect(struct sock *sk)
2983 2992
2984 tcp_connect_init(sk); 2993 tcp_connect_init(sk);
2985 2994
2995 if (unlikely(tp->repair)) {
2996 tcp_finish_connect(sk, NULL);
2997 return 0;
2998 }
2999
2986 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); 3000 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2987 if (unlikely(buff == NULL)) 3001 if (unlikely(buff == NULL))
2988 return -ENOBUFS; 3002 return -ENOBUFS;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d47c1b4421a3..b78aac30c498 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
318 req = tcp_sk(sk)->fastopen_rsk; 318 req = tcp_sk(sk)->fastopen_rsk;
319 req->rsk_ops->syn_ack_timeout(sk, req); 319 req->rsk_ops->syn_ack_timeout(sk, req);
320 320
321 if (req->retrans >= max_retries) { 321 if (req->num_timeout >= max_retries) {
322 tcp_write_err(sk); 322 tcp_write_err(sk);
323 return; 323 return;
324 } 324 }
@@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
327 * regular retransmit because if the child socket has been accepted 327 * regular retransmit because if the child socket has been accepted
328 * it's not good to give up too easily. 328 * it's not good to give up too easily.
329 */ 329 */
330 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 330 inet_rtx_syn_ack(sk, req);
331 req->retrans++; 331 req->num_timeout++;
332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
333 TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); 333 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
334} 334}
335 335
336/* 336/*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05c5ab8d983c..3be0ac2c1920 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -279,19 +279,8 @@ static void __exit xfrm4_policy_fini(void)
279 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); 279 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
280} 280}
281 281
282void __init xfrm4_init(int rt_max_size) 282void __init xfrm4_init(void)
283{ 283{
284 /*
285 * Select a default value for the gc_thresh based on the main route
286 * table hash size. It seems to me the worst case scenario is when
287 * we have ipsec operating in transport mode, in which we create a
288 * dst_entry per socket. The xfrm gc algorithm starts trying to remove
289 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
290 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
291 * That will let us store an ipsec connection per route table entry,
292 * and start cleaning when were 1/2 full
293 */
294 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
295 dst_entries_init(&xfrm4_dst_ops); 284 dst_entries_init(&xfrm4_dst_ops);
296 285
297 xfrm4_state_init(); 286 xfrm4_state_init();