aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 21:07:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 21:07:07 -0500
commit6be35c700f742e911ecedd07fcc43d4439922334 (patch)
treeca9f37214d204465fcc2d79c82efd291e357c53c /net/ipv4
parente37aa63e87bd581f9be5555ed0ba83f5295c92fc (diff)
parent520dfe3a3645257bf83660f672c47f8558f3d4c4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) Allow to dump, monitor, and change the bridge multicast database using netlink. From Cong Wang. 2) RFC 5961 TCP blind data injection attack mitigation, from Eric Dumazet. 3) Networking user namespace support from Eric W. Biederman. 4) tuntap/virtio-net multiqueue support by Jason Wang. 5) Support for checksum offload of encapsulated packets (basically, tunneled traffic can still be checksummed by HW). From Joseph Gasparakis. 6) Allow BPF filter access to VLAN tags, from Eric Dumazet and Daniel Borkmann. 7) Bridge port parameters over netlink and BPDU blocking support from Stephen Hemminger. 8) Improve data access patterns during inet socket demux by rearranging socket layout, from Eric Dumazet. 9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and Jon Maloy. 10) Update TCP socket hash sizing to be more in line with current day realities. The existing heurstics were choosen a decade ago. From Eric Dumazet. 11) Fix races, queue bloat, and excessive wakeups in ATM and associated drivers, from Krzysztof Mazur and David Woodhouse. 12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions in VXLAN driver, from David Stevens. 13) Add "oops_only" mode to netconsole, from Amerigo Wang. 14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also allow DCB netlink to work on namespaces other than the initial namespace. From John Fastabend. 15) Support PTP in the Tigon3 driver, from Matt Carlson. 16) tun/vhost zero copy fixes and improvements, plus turn it on by default, from Michael S. Tsirkin. 17) Support per-association statistics in SCTP, from Michele Baldessari. And many, many, driver updates, cleanups, and improvements. Too numerous to mention individually. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits) net/mlx4_en: Add support for destination MAC in steering rules net/mlx4_en: Use generic etherdevice.h functions. net: ethtool: Add destination MAC address to flow steering API bridge: add support of adding and deleting mdb entries bridge: notify mdb changes via netlink ndisc: Unexport ndisc_{build,send}_skb(). uapi: add missing netconf.h to export list pkt_sched: avoid requeues if possible solos-pci: fix double-free of TX skb in DMA mode bnx2: Fix accidental reversions. bna: Driver Version Updated to 3.1.2.1 bna: Firmware update bna: Add RX State bna: Rx Page Based Allocation bna: TX Intr Coalescing Fix bna: Tx and Rx Optimizations bna: Code Cleanup and Enhancements ath9k: check pdata variable before dereferencing it ath5k: RX timestamp is reported at end of frame ath9k_htc: RX timestamp is reported at end of frame ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c93
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c198
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c32
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c5
-rw-r--r--net/ipv4/ip_vti.c26
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c271
-rw-r--r--net/ipv4/ipmr.c137
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c9
-rw-r--r--net/ipv4/netfilter/iptable_nat.c8
-rw-r--r--net/ipv4/protocol.c21
-rw-r--r--net/ipv4/route.c27
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp.c25
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_ipv4.c38
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_timer.c8
32 files changed, 815 insertions, 257 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 766c59658563..24b384b7903e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -346,7 +346,8 @@ lookup_protocol:
346 } 346 }
347 347
348 err = -EPERM; 348 err = -EPERM;
349 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) 349 if (sock->type == SOCK_RAW && !kern &&
350 !ns_capable(net->user_ns, CAP_NET_RAW))
350 goto out_rcu_unlock; 351 goto out_rcu_unlock;
351 352
352 err = -EAFNOSUPPORT; 353 err = -EAFNOSUPPORT;
@@ -473,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
473 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 474 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
474 struct sock *sk = sock->sk; 475 struct sock *sk = sock->sk;
475 struct inet_sock *inet = inet_sk(sk); 476 struct inet_sock *inet = inet_sk(sk);
477 struct net *net = sock_net(sk);
476 unsigned short snum; 478 unsigned short snum;
477 int chk_addr_ret; 479 int chk_addr_ret;
478 int err; 480 int err;
@@ -496,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
496 goto out; 498 goto out;
497 } 499 }
498 500
499 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 501 chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
500 502
501 /* Not specified by any standard per-se, however it breaks too 503 /* Not specified by any standard per-se, however it breaks too
502 * many applications when removed. It is unfortunate since 504 * many applications when removed. It is unfortunate since
@@ -516,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
516 518
517 snum = ntohs(addr->sin_port); 519 snum = ntohs(addr->sin_port);
518 err = -EACCES; 520 err = -EACCES;
519 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 521 if (snum && snum < PROT_SOCK &&
522 !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
520 goto out; 523 goto out;
521 524
522 /* We keep a pair of addresses. rcv_saddr is the one 525 /* We keep a pair of addresses. rcv_saddr is the one
@@ -1251,7 +1254,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
1251 1254
1252static int inet_gso_send_check(struct sk_buff *skb) 1255static int inet_gso_send_check(struct sk_buff *skb)
1253{ 1256{
1254 const struct net_protocol *ops; 1257 const struct net_offload *ops;
1255 const struct iphdr *iph; 1258 const struct iphdr *iph;
1256 int proto; 1259 int proto;
1257 int ihl; 1260 int ihl;
@@ -1275,9 +1278,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
1275 err = -EPROTONOSUPPORT; 1278 err = -EPROTONOSUPPORT;
1276 1279
1277 rcu_read_lock(); 1280 rcu_read_lock();
1278 ops = rcu_dereference(inet_protos[proto]); 1281 ops = rcu_dereference(inet_offloads[proto]);
1279 if (likely(ops && ops->gso_send_check)) 1282 if (likely(ops && ops->callbacks.gso_send_check))
1280 err = ops->gso_send_check(skb); 1283 err = ops->callbacks.gso_send_check(skb);
1281 rcu_read_unlock(); 1284 rcu_read_unlock();
1282 1285
1283out: 1286out:
@@ -1288,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1288 netdev_features_t features) 1291 netdev_features_t features)
1289{ 1292{
1290 struct sk_buff *segs = ERR_PTR(-EINVAL); 1293 struct sk_buff *segs = ERR_PTR(-EINVAL);
1291 const struct net_protocol *ops; 1294 const struct net_offload *ops;
1292 struct iphdr *iph; 1295 struct iphdr *iph;
1293 int proto; 1296 int proto;
1294 int ihl; 1297 int ihl;
@@ -1325,9 +1328,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1325 segs = ERR_PTR(-EPROTONOSUPPORT); 1328 segs = ERR_PTR(-EPROTONOSUPPORT);
1326 1329
1327 rcu_read_lock(); 1330 rcu_read_lock();
1328 ops = rcu_dereference(inet_protos[proto]); 1331 ops = rcu_dereference(inet_offloads[proto]);
1329 if (likely(ops && ops->gso_segment)) 1332 if (likely(ops && ops->callbacks.gso_segment))
1330 segs = ops->gso_segment(skb, features); 1333 segs = ops->callbacks.gso_segment(skb, features);
1331 rcu_read_unlock(); 1334 rcu_read_unlock();
1332 1335
1333 if (!segs || IS_ERR(segs)) 1336 if (!segs || IS_ERR(segs))
@@ -1356,7 +1359,7 @@ out:
1356static struct sk_buff **inet_gro_receive(struct sk_buff **head, 1359static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1357 struct sk_buff *skb) 1360 struct sk_buff *skb)
1358{ 1361{
1359 const struct net_protocol *ops; 1362 const struct net_offload *ops;
1360 struct sk_buff **pp = NULL; 1363 struct sk_buff **pp = NULL;
1361 struct sk_buff *p; 1364 struct sk_buff *p;
1362 const struct iphdr *iph; 1365 const struct iphdr *iph;
@@ -1378,8 +1381,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1378 proto = iph->protocol; 1381 proto = iph->protocol;
1379 1382
1380 rcu_read_lock(); 1383 rcu_read_lock();
1381 ops = rcu_dereference(inet_protos[proto]); 1384 ops = rcu_dereference(inet_offloads[proto]);
1382 if (!ops || !ops->gro_receive) 1385 if (!ops || !ops->callbacks.gro_receive)
1383 goto out_unlock; 1386 goto out_unlock;
1384 1387
1385 if (*(u8 *)iph != 0x45) 1388 if (*(u8 *)iph != 0x45)
@@ -1420,7 +1423,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1420 skb_gro_pull(skb, sizeof(*iph)); 1423 skb_gro_pull(skb, sizeof(*iph));
1421 skb_set_transport_header(skb, skb_gro_offset(skb)); 1424 skb_set_transport_header(skb, skb_gro_offset(skb));
1422 1425
1423 pp = ops->gro_receive(head, skb); 1426 pp = ops->callbacks.gro_receive(head, skb);
1424 1427
1425out_unlock: 1428out_unlock:
1426 rcu_read_unlock(); 1429 rcu_read_unlock();
@@ -1435,7 +1438,7 @@ static int inet_gro_complete(struct sk_buff *skb)
1435{ 1438{
1436 __be16 newlen = htons(skb->len - skb_network_offset(skb)); 1439 __be16 newlen = htons(skb->len - skb_network_offset(skb));
1437 struct iphdr *iph = ip_hdr(skb); 1440 struct iphdr *iph = ip_hdr(skb);
1438 const struct net_protocol *ops; 1441 const struct net_offload *ops;
1439 int proto = iph->protocol; 1442 int proto = iph->protocol;
1440 int err = -ENOSYS; 1443 int err = -ENOSYS;
1441 1444
@@ -1443,11 +1446,11 @@ static int inet_gro_complete(struct sk_buff *skb)
1443 iph->tot_len = newlen; 1446 iph->tot_len = newlen;
1444 1447
1445 rcu_read_lock(); 1448 rcu_read_lock();
1446 ops = rcu_dereference(inet_protos[proto]); 1449 ops = rcu_dereference(inet_offloads[proto]);
1447 if (WARN_ON(!ops || !ops->gro_complete)) 1450 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
1448 goto out_unlock; 1451 goto out_unlock;
1449 1452
1450 err = ops->gro_complete(skb); 1453 err = ops->callbacks.gro_complete(skb);
1451 1454
1452out_unlock: 1455out_unlock:
1453 rcu_read_unlock(); 1456 rcu_read_unlock();
@@ -1558,23 +1561,33 @@ static const struct net_protocol tcp_protocol = {
1558 .early_demux = tcp_v4_early_demux, 1561 .early_demux = tcp_v4_early_demux,
1559 .handler = tcp_v4_rcv, 1562 .handler = tcp_v4_rcv,
1560 .err_handler = tcp_v4_err, 1563 .err_handler = tcp_v4_err,
1561 .gso_send_check = tcp_v4_gso_send_check,
1562 .gso_segment = tcp_tso_segment,
1563 .gro_receive = tcp4_gro_receive,
1564 .gro_complete = tcp4_gro_complete,
1565 .no_policy = 1, 1564 .no_policy = 1,
1566 .netns_ok = 1, 1565 .netns_ok = 1,
1567}; 1566};
1568 1567
1568static const struct net_offload tcp_offload = {
1569 .callbacks = {
1570 .gso_send_check = tcp_v4_gso_send_check,
1571 .gso_segment = tcp_tso_segment,
1572 .gro_receive = tcp4_gro_receive,
1573 .gro_complete = tcp4_gro_complete,
1574 },
1575};
1576
1569static const struct net_protocol udp_protocol = { 1577static const struct net_protocol udp_protocol = {
1570 .handler = udp_rcv, 1578 .handler = udp_rcv,
1571 .err_handler = udp_err, 1579 .err_handler = udp_err,
1572 .gso_send_check = udp4_ufo_send_check,
1573 .gso_segment = udp4_ufo_fragment,
1574 .no_policy = 1, 1580 .no_policy = 1,
1575 .netns_ok = 1, 1581 .netns_ok = 1,
1576}; 1582};
1577 1583
1584static const struct net_offload udp_offload = {
1585 .callbacks = {
1586 .gso_send_check = udp4_ufo_send_check,
1587 .gso_segment = udp4_ufo_fragment,
1588 },
1589};
1590
1578static const struct net_protocol icmp_protocol = { 1591static const struct net_protocol icmp_protocol = {
1579 .handler = icmp_rcv, 1592 .handler = icmp_rcv,
1580 .err_handler = ping_err, 1593 .err_handler = ping_err,
@@ -1659,13 +1672,35 @@ static int ipv4_proc_init(void);
1659 * IP protocol layer initialiser 1672 * IP protocol layer initialiser
1660 */ 1673 */
1661 1674
1675static struct packet_offload ip_packet_offload __read_mostly = {
1676 .type = cpu_to_be16(ETH_P_IP),
1677 .callbacks = {
1678 .gso_send_check = inet_gso_send_check,
1679 .gso_segment = inet_gso_segment,
1680 .gro_receive = inet_gro_receive,
1681 .gro_complete = inet_gro_complete,
1682 },
1683};
1684
1685static int __init ipv4_offload_init(void)
1686{
1687 /*
1688 * Add offloads
1689 */
1690 if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0)
1691 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
1692 if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0)
1693 pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__);
1694
1695 dev_add_offload(&ip_packet_offload);
1696 return 0;
1697}
1698
1699fs_initcall(ipv4_offload_init);
1700
1662static struct packet_type ip_packet_type __read_mostly = { 1701static struct packet_type ip_packet_type __read_mostly = {
1663 .type = cpu_to_be16(ETH_P_IP), 1702 .type = cpu_to_be16(ETH_P_IP),
1664 .func = ip_rcv, 1703 .func = ip_rcv,
1665 .gso_send_check = inet_gso_send_check,
1666 .gso_segment = inet_gso_segment,
1667 .gro_receive = inet_gro_receive,
1668 .gro_complete = inet_gro_complete,
1669}; 1704};
1670 1705
1671static int __init inet_init(void) 1706static int __init inet_init(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 47800459e4cb..ce6fbdfd40b8 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1161,7 +1161,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1161 switch (cmd) { 1161 switch (cmd) {
1162 case SIOCDARP: 1162 case SIOCDARP:
1163 case SIOCSARP: 1163 case SIOCSARP:
1164 if (!capable(CAP_NET_ADMIN)) 1164 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1165 return -EPERM; 1165 return -EPERM;
1166 case SIOCGARP: 1166 case SIOCGARP:
1167 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1167 err = copy_from_user(&r, arg, sizeof(struct arpreq));
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2a6abc163ed2..cc06a47f1216 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -55,6 +55,7 @@
55#include <linux/sysctl.h> 55#include <linux/sysctl.h>
56#endif 56#endif
57#include <linux/kmod.h> 57#include <linux/kmod.h>
58#include <linux/netconf.h>
58 59
59#include <net/arp.h> 60#include <net/arp.h>
60#include <net/ip.h> 61#include <net/ip.h>
@@ -723,7 +724,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
723 724
724 case SIOCSIFFLAGS: 725 case SIOCSIFFLAGS:
725 ret = -EPERM; 726 ret = -EPERM;
726 if (!capable(CAP_NET_ADMIN)) 727 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
727 goto out; 728 goto out;
728 break; 729 break;
729 case SIOCSIFADDR: /* Set interface address (and family) */ 730 case SIOCSIFADDR: /* Set interface address (and family) */
@@ -731,7 +732,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
731 case SIOCSIFDSTADDR: /* Set the destination address */ 732 case SIOCSIFDSTADDR: /* Set the destination address */
732 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 733 case SIOCSIFNETMASK: /* Set the netmask for the interface */
733 ret = -EPERM; 734 ret = -EPERM;
734 if (!capable(CAP_NET_ADMIN)) 735 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
735 goto out; 736 goto out;
736 ret = -EINVAL; 737 ret = -EINVAL;
737 if (sin->sin_family != AF_INET) 738 if (sin->sin_family != AF_INET)
@@ -1442,6 +1443,155 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1442 return 0; 1443 return 0;
1443} 1444}
1444 1445
1446static int inet_netconf_msgsize_devconf(int type)
1447{
1448 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449 + nla_total_size(4); /* NETCONFA_IFINDEX */
1450
1451 /* type -1 is used for ALL */
1452 if (type == -1 || type == NETCONFA_FORWARDING)
1453 size += nla_total_size(4);
1454 if (type == -1 || type == NETCONFA_RP_FILTER)
1455 size += nla_total_size(4);
1456 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1457 size += nla_total_size(4);
1458
1459 return size;
1460}
1461
1462static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1463 struct ipv4_devconf *devconf, u32 portid,
1464 u32 seq, int event, unsigned int flags,
1465 int type)
1466{
1467 struct nlmsghdr *nlh;
1468 struct netconfmsg *ncm;
1469
1470 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1471 flags);
1472 if (nlh == NULL)
1473 return -EMSGSIZE;
1474
1475 ncm = nlmsg_data(nlh);
1476 ncm->ncm_family = AF_INET;
1477
1478 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1479 goto nla_put_failure;
1480
1481 /* type -1 is used for ALL */
1482 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1483 nla_put_s32(skb, NETCONFA_FORWARDING,
1484 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1485 goto nla_put_failure;
1486 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1487 nla_put_s32(skb, NETCONFA_RP_FILTER,
1488 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1489 goto nla_put_failure;
1490 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1491 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1492 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1493 goto nla_put_failure;
1494
1495 return nlmsg_end(skb, nlh);
1496
1497nla_put_failure:
1498 nlmsg_cancel(skb, nlh);
1499 return -EMSGSIZE;
1500}
1501
1502void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1503 struct ipv4_devconf *devconf)
1504{
1505 struct sk_buff *skb;
1506 int err = -ENOBUFS;
1507
1508 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1509 if (skb == NULL)
1510 goto errout;
1511
1512 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1513 RTM_NEWNETCONF, 0, type);
1514 if (err < 0) {
1515 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1516 WARN_ON(err == -EMSGSIZE);
1517 kfree_skb(skb);
1518 goto errout;
1519 }
1520 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1521 return;
1522errout:
1523 if (err < 0)
1524 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1525}
1526
1527static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1528 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1529 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1530 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1531};
1532
1533static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1534 struct nlmsghdr *nlh,
1535 void *arg)
1536{
1537 struct net *net = sock_net(in_skb->sk);
1538 struct nlattr *tb[NETCONFA_MAX+1];
1539 struct netconfmsg *ncm;
1540 struct sk_buff *skb;
1541 struct ipv4_devconf *devconf;
1542 struct in_device *in_dev;
1543 struct net_device *dev;
1544 int ifindex;
1545 int err;
1546
1547 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1548 devconf_ipv4_policy);
1549 if (err < 0)
1550 goto errout;
1551
1552 err = EINVAL;
1553 if (!tb[NETCONFA_IFINDEX])
1554 goto errout;
1555
1556 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1557 switch (ifindex) {
1558 case NETCONFA_IFINDEX_ALL:
1559 devconf = net->ipv4.devconf_all;
1560 break;
1561 case NETCONFA_IFINDEX_DEFAULT:
1562 devconf = net->ipv4.devconf_dflt;
1563 break;
1564 default:
1565 dev = __dev_get_by_index(net, ifindex);
1566 if (dev == NULL)
1567 goto errout;
1568 in_dev = __in_dev_get_rtnl(dev);
1569 if (in_dev == NULL)
1570 goto errout;
1571 devconf = &in_dev->cnf;
1572 break;
1573 }
1574
1575 err = -ENOBUFS;
1576 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1577 if (skb == NULL)
1578 goto errout;
1579
1580 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1581 NETLINK_CB(in_skb).portid,
1582 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1583 -1);
1584 if (err < 0) {
1585 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1586 WARN_ON(err == -EMSGSIZE);
1587 kfree_skb(skb);
1588 goto errout;
1589 }
1590 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1591errout:
1592 return err;
1593}
1594
1445#ifdef CONFIG_SYSCTL 1595#ifdef CONFIG_SYSCTL
1446 1596
1447static void devinet_copy_dflt_conf(struct net *net, int i) 1597static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1467,6 +1617,12 @@ static void inet_forward_change(struct net *net)
1467 1617
1468 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1618 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1619 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1620 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1621 NETCONFA_IFINDEX_ALL,
1622 net->ipv4.devconf_all);
1623 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1624 NETCONFA_IFINDEX_DEFAULT,
1625 net->ipv4.devconf_dflt);
1470 1626
1471 for_each_netdev(net, dev) { 1627 for_each_netdev(net, dev) {
1472 struct in_device *in_dev; 1628 struct in_device *in_dev;
@@ -1474,8 +1630,11 @@ static void inet_forward_change(struct net *net)
1474 dev_disable_lro(dev); 1630 dev_disable_lro(dev);
1475 rcu_read_lock(); 1631 rcu_read_lock();
1476 in_dev = __in_dev_get_rcu(dev); 1632 in_dev = __in_dev_get_rcu(dev);
1477 if (in_dev) 1633 if (in_dev) {
1478 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1634 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1635 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1636 dev->ifindex, &in_dev->cnf);
1637 }
1479 rcu_read_unlock(); 1638 rcu_read_unlock();
1480 } 1639 }
1481} 1640}
@@ -1501,6 +1660,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1501 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1660 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502 if ((new_value == 0) && (old_value != 0)) 1661 if ((new_value == 0) && (old_value != 0))
1503 rt_cache_flush(net); 1662 rt_cache_flush(net);
1663 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1664 new_value != old_value) {
1665 int ifindex;
1666
1667 if (cnf == net->ipv4.devconf_dflt)
1668 ifindex = NETCONFA_IFINDEX_DEFAULT;
1669 else if (cnf == net->ipv4.devconf_all)
1670 ifindex = NETCONFA_IFINDEX_ALL;
1671 else {
1672 struct in_device *idev =
1673 container_of(cnf, struct in_device,
1674 cnf);
1675 ifindex = idev->dev->ifindex;
1676 }
1677 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1678 ifindex, cnf);
1679 }
1504 } 1680 }
1505 1681
1506 return ret; 1682 return ret;
@@ -1527,15 +1703,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1527 } 1703 }
1528 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1704 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 inet_forward_change(net); 1705 inet_forward_change(net);
1530 } else if (*valp) { 1706 } else {
1531 struct ipv4_devconf *cnf = ctl->extra1; 1707 struct ipv4_devconf *cnf = ctl->extra1;
1532 struct in_device *idev = 1708 struct in_device *idev =
1533 container_of(cnf, struct in_device, cnf); 1709 container_of(cnf, struct in_device, cnf);
1534 dev_disable_lro(idev->dev); 1710 if (*valp)
1711 dev_disable_lro(idev->dev);
1712 inet_netconf_notify_devconf(net,
1713 NETCONFA_FORWARDING,
1714 idev->dev->ifindex,
1715 cnf);
1535 } 1716 }
1536 rtnl_unlock(); 1717 rtnl_unlock();
1537 rt_cache_flush(net); 1718 rt_cache_flush(net);
1538 } 1719 } else
1720 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1721 NETCONFA_IFINDEX_DEFAULT,
1722 net->ipv4.devconf_dflt);
1539 } 1723 }
1540 1724
1541 return ret; 1725 return ret;
@@ -1809,5 +1993,7 @@ void __init devinet_init(void)
1809 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 1993 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 1994 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 1995 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1996 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1997 NULL, NULL);
1812} 1998}
1813 1999
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 825c608826de..5cd75e2dab2c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -488,7 +488,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
488 switch (cmd) { 488 switch (cmd) {
489 case SIOCADDRT: /* Add a route */ 489 case SIOCADDRT: /* Add a route */
490 case SIOCDELRT: /* Delete a route */ 490 case SIOCDELRT: /* Delete a route */
491 if (!capable(CAP_NET_ADMIN)) 491 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
492 return -EPERM; 492 return -EPERM;
493 493
494 if (copy_from_user(&rt, arg, sizeof(rt))) 494 if (copy_from_user(&rt, arg, sizeof(rt)))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 71b125cd5db1..4797a800faf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
803 unsigned int bytes; 803 unsigned int bytes;
804 804
805 if (!new_size) 805 if (!new_size)
806 new_size = 1; 806 new_size = 16;
807 bytes = new_size * sizeof(struct hlist_head *); 807 bytes = new_size * sizeof(struct hlist_head *);
808 new_info_hash = fib_info_hash_alloc(bytes); 808 new_info_hash = fib_info_hash_alloc(bytes);
809 new_laddrhash = fib_info_hash_alloc(bytes); 809 new_laddrhash = fib_info_hash_alloc(bytes);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d34ce2972c8f..2026542d6836 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
521 int *expire, int *resend) 521 int *expire, int *resend)
522{ 522{
523 if (!rskq_defer_accept) { 523 if (!rskq_defer_accept) {
524 *expire = req->retrans >= thresh; 524 *expire = req->num_timeout >= thresh;
525 *resend = 1; 525 *resend = 1;
526 return; 526 return;
527 } 527 }
528 *expire = req->retrans >= thresh && 528 *expire = req->num_timeout >= thresh &&
529 (!inet_rsk(req)->acked || req->retrans >= max_retries); 529 (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
530 /* 530 /*
531 * Do not resend while waiting for data after ACK, 531 * Do not resend while waiting for data after ACK,
532 * start to resend on end of deferring period to give 532 * start to resend on end of deferring period to give
533 * last chance for data or ACK to create established socket. 533 * last chance for data or ACK to create established socket.
534 */ 534 */
535 *resend = !inet_rsk(req)->acked || 535 *resend = !inet_rsk(req)->acked ||
536 req->retrans >= rskq_defer_accept - 1; 536 req->num_timeout >= rskq_defer_accept - 1;
537} 537}
538 538
539int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
540{
541 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
542
543 if (!err)
544 req->num_retrans++;
545 return err;
546}
547EXPORT_SYMBOL(inet_rtx_syn_ack);
548
539void inet_csk_reqsk_queue_prune(struct sock *parent, 549void inet_csk_reqsk_queue_prune(struct sock *parent,
540 const unsigned long interval, 550 const unsigned long interval,
541 const unsigned long timeout, 551 const unsigned long timeout,
@@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
599 req->rsk_ops->syn_ack_timeout(parent, req); 609 req->rsk_ops->syn_ack_timeout(parent, req);
600 if (!expire && 610 if (!expire &&
601 (!resend || 611 (!resend ||
602 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 612 !inet_rtx_syn_ack(parent, req) ||
603 inet_rsk(req)->acked)) { 613 inet_rsk(req)->acked)) {
604 unsigned long timeo; 614 unsigned long timeo;
605 615
606 if (req->retrans++ == 0) 616 if (req->num_timeout++ == 0)
607 lopt->qlen_young--; 617 lopt->qlen_young--;
608 timeo = min((timeout << req->retrans), max_rto); 618 timeo = min(timeout << req->num_timeout,
619 max_rto);
609 req->expires = now + timeo; 620 req->expires = now + timeo;
610 reqp = &req->dl_next; 621 reqp = &req->dl_next;
611 continue; 622 continue;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e23e16dc501d..7afa2c3c788f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -109,6 +109,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
109 r->id.idiag_src[0] = inet->inet_rcv_saddr; 109 r->id.idiag_src[0] = inet->inet_rcv_saddr;
110 r->id.idiag_dst[0] = inet->inet_daddr; 110 r->id.idiag_dst[0] = inet->inet_daddr;
111 111
112 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
113 goto errout;
114
112 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 115 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
113 * hence this needs to be included regardless of socket family. 116 * hence this needs to be included regardless of socket family.
114 */ 117 */
@@ -716,7 +719,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
716 r->idiag_family = sk->sk_family; 719 r->idiag_family = sk->sk_family;
717 r->idiag_state = TCP_SYN_RECV; 720 r->idiag_state = TCP_SYN_RECV;
718 r->idiag_timer = 1; 721 r->idiag_timer = 1;
719 r->idiag_retrans = req->retrans; 722 r->idiag_retrans = req->num_retrans;
720 723
721 r->id.idiag_if = sk->sk_bound_dev_if; 724 r->id.idiag_if = sk->sk_bound_dev_if;
722 sock_diag_save_cookie(req, r->id.idiag_cookie); 725 sock_diag_save_cookie(req, r->id.idiag_cookie);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7880af970208..fa3ae8148710 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -237,12 +237,14 @@ struct sock *__inet_lookup_established(struct net *net,
237 rcu_read_lock(); 237 rcu_read_lock();
238begin: 238begin:
239 sk_nulls_for_each_rcu(sk, node, &head->chain) { 239 sk_nulls_for_each_rcu(sk, node, &head->chain) {
240 if (INET_MATCH(sk, net, hash, acookie, 240 if (sk->sk_hash != hash)
241 saddr, daddr, ports, dif)) { 241 continue;
242 if (likely(INET_MATCH(sk, net, acookie,
243 saddr, daddr, ports, dif))) {
242 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 244 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
243 goto begintw; 245 goto begintw;
244 if (unlikely(!INET_MATCH(sk, net, hash, acookie, 246 if (unlikely(!INET_MATCH(sk, net, acookie,
245 saddr, daddr, ports, dif))) { 247 saddr, daddr, ports, dif))) {
246 sock_put(sk); 248 sock_put(sk);
247 goto begin; 249 goto begin;
248 } 250 }
@@ -260,14 +262,18 @@ begin:
260begintw: 262begintw:
261 /* Must check for a TIME_WAIT'er before going to listener hash. */ 263 /* Must check for a TIME_WAIT'er before going to listener hash. */
262 sk_nulls_for_each_rcu(sk, node, &head->twchain) { 264 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
263 if (INET_TW_MATCH(sk, net, hash, acookie, 265 if (sk->sk_hash != hash)
264 saddr, daddr, ports, dif)) { 266 continue;
267 if (likely(INET_TW_MATCH(sk, net, acookie,
268 saddr, daddr, ports,
269 dif))) {
265 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { 270 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
266 sk = NULL; 271 sk = NULL;
267 goto out; 272 goto out;
268 } 273 }
269 if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, 274 if (unlikely(!INET_TW_MATCH(sk, net, acookie,
270 saddr, daddr, ports, dif))) { 275 saddr, daddr, ports,
276 dif))) {
271 sock_put(sk); 277 sock_put(sk);
272 goto begintw; 278 goto begintw;
273 } 279 }
@@ -314,10 +320,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
314 320
315 /* Check TIME-WAIT sockets first. */ 321 /* Check TIME-WAIT sockets first. */
316 sk_nulls_for_each(sk2, node, &head->twchain) { 322 sk_nulls_for_each(sk2, node, &head->twchain) {
317 tw = inet_twsk(sk2); 323 if (sk2->sk_hash != hash)
324 continue;
318 325
319 if (INET_TW_MATCH(sk2, net, hash, acookie, 326 if (likely(INET_TW_MATCH(sk2, net, acookie,
320 saddr, daddr, ports, dif)) { 327 saddr, daddr, ports, dif))) {
328 tw = inet_twsk(sk2);
321 if (twsk_unique(sk, sk2, twp)) 329 if (twsk_unique(sk, sk2, twp))
322 goto unique; 330 goto unique;
323 else 331 else
@@ -328,8 +336,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
328 336
329 /* And established part... */ 337 /* And established part... */
330 sk_nulls_for_each(sk2, node, &head->chain) { 338 sk_nulls_for_each(sk2, node, &head->chain) {
331 if (INET_MATCH(sk2, net, hash, acookie, 339 if (sk2->sk_hash != hash)
332 saddr, daddr, ports, dif)) 340 continue;
341 if (likely(INET_MATCH(sk2, net, acookie,
342 saddr, daddr, ports, dif)))
333 goto not_unique; 343 goto not_unique;
334 } 344 }
335 345
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8d5cc75dac88..eb9d63a570cd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -801,6 +801,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
801 table[0].data = &net->ipv4.frags.high_thresh; 801 table[0].data = &net->ipv4.frags.high_thresh;
802 table[1].data = &net->ipv4.frags.low_thresh; 802 table[1].data = &net->ipv4.frags.low_thresh;
803 table[2].data = &net->ipv4.frags.timeout; 803 table[2].data = &net->ipv4.frags.timeout;
804
805 /* Don't export sysctls to unprivileged users */
806 if (net->user_ns != &init_user_ns)
807 table[0].procname = NULL;
804 } 808 }
805 809
806 hdr = register_net_sysctl(net, "net/ipv4", table); 810 hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7240f8e2dd45..a85ae2f7a21c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,21 +164,6 @@ struct ipgre_net {
164#define tunnels_r tunnels[2] 164#define tunnels_r tunnels[2]
165#define tunnels_l tunnels[1] 165#define tunnels_l tunnels[1]
166#define tunnels_wc tunnels[0] 166#define tunnels_wc tunnels[0]
167/*
168 * Locking : hash tables are protected by RCU and RTNL
169 */
170
171#define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
173
174/* often modified stats are per cpu, other are shared (netdev->stats) */
175struct pcpu_tstats {
176 u64 rx_packets;
177 u64 rx_bytes;
178 u64 tx_packets;
179 u64 tx_bytes;
180 struct u64_stats_sync syncp;
181};
182 167
183static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, 168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
184 struct rtnl_link_stats64 *tot) 169 struct rtnl_link_stats64 *tot)
@@ -250,7 +235,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
250 ARPHRD_ETHER : ARPHRD_IPGRE; 235 ARPHRD_ETHER : ARPHRD_IPGRE;
251 int score, cand_score = 4; 236 int score, cand_score = 4;
252 237
253 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { 238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
254 if (local != t->parms.iph.saddr || 239 if (local != t->parms.iph.saddr ||
255 remote != t->parms.iph.daddr || 240 remote != t->parms.iph.daddr ||
256 !(t->dev->flags & IFF_UP)) 241 !(t->dev->flags & IFF_UP))
@@ -277,7 +262,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
277 } 262 }
278 } 263 }
279 264
280 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { 265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
281 if (remote != t->parms.iph.daddr || 266 if (remote != t->parms.iph.daddr ||
282 !(t->dev->flags & IFF_UP)) 267 !(t->dev->flags & IFF_UP))
283 continue; 268 continue;
@@ -303,7 +288,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
303 } 288 }
304 } 289 }
305 290
306 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { 291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
307 if ((local != t->parms.iph.saddr && 292 if ((local != t->parms.iph.saddr &&
308 (local != t->parms.iph.daddr || 293 (local != t->parms.iph.daddr ||
309 !ipv4_is_multicast(local))) || 294 !ipv4_is_multicast(local))) ||
@@ -331,7 +316,7 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
331 } 316 }
332 } 317 }
333 318
334 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { 319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
335 if (t->parms.i_key != key || 320 if (t->parms.i_key != key ||
336 !(t->dev->flags & IFF_UP)) 321 !(t->dev->flags & IFF_UP))
337 continue; 322 continue;
@@ -753,7 +738,6 @@ drop:
753static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 738static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
754{ 739{
755 struct ip_tunnel *tunnel = netdev_priv(dev); 740 struct ip_tunnel *tunnel = netdev_priv(dev);
756 struct pcpu_tstats *tstats;
757 const struct iphdr *old_iph = ip_hdr(skb); 741 const struct iphdr *old_iph = ip_hdr(skb);
758 const struct iphdr *tiph; 742 const struct iphdr *tiph;
759 struct flowi4 fl4; 743 struct flowi4 fl4;
@@ -977,9 +961,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
977 } 961 }
978 } 962 }
979 963
980 nf_reset(skb); 964 iptunnel_xmit(skb, dev);
981 tstats = this_cpu_ptr(dev->tstats);
982 __IPTUNNEL_XMIT(tstats, &dev->stats);
983 return NETDEV_TX_OK; 965 return NETDEV_TX_OK;
984 966
985#if IS_ENABLED(CONFIG_IPV6) 967#if IS_ENABLED(CONFIG_IPV6)
@@ -1082,7 +1064,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1082 case SIOCADDTUNNEL: 1064 case SIOCADDTUNNEL:
1083 case SIOCCHGTUNNEL: 1065 case SIOCCHGTUNNEL:
1084 err = -EPERM; 1066 err = -EPERM;
1085 if (!capable(CAP_NET_ADMIN)) 1067 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086 goto done; 1068 goto done;
1087 1069
1088 err = -EFAULT; 1070 err = -EFAULT;
@@ -1157,7 +1139,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1157 1139
1158 case SIOCDELTUNNEL: 1140 case SIOCDELTUNNEL:
1159 err = -EPERM; 1141 err = -EPERM;
1160 if (!capable(CAP_NET_ADMIN)) 1142 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1161 goto done; 1143 goto done;
1162 1144
1163 if (dev == ign->fb_tunnel_dev) { 1145 if (dev == ign->fb_tunnel_dev) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1dc01f9793d5..f6289bf6f332 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -409,7 +409,7 @@ int ip_options_compile(struct net *net,
409 optptr[2] += 8; 409 optptr[2] += 8;
410 break; 410 break;
411 default: 411 default:
412 if (!skb && !capable(CAP_NET_RAW)) { 412 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
413 pp_ptr = optptr + 3; 413 pp_ptr = optptr + 3;
414 goto error; 414 goto error;
415 } 415 }
@@ -445,7 +445,7 @@ int ip_options_compile(struct net *net,
445 opt->router_alert = optptr - iph; 445 opt->router_alert = optptr - iph;
446 break; 446 break;
447 case IPOPT_CIPSO: 447 case IPOPT_CIPSO:
448 if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { 448 if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) {
449 pp_ptr = optptr; 449 pp_ptr = optptr;
450 goto error; 450 goto error;
451 } 451 }
@@ -458,7 +458,7 @@ int ip_options_compile(struct net *net,
458 case IPOPT_SEC: 458 case IPOPT_SEC:
459 case IPOPT_SID: 459 case IPOPT_SID:
460 default: 460 default:
461 if (!skb && !capable(CAP_NET_RAW)) { 461 if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) {
462 pp_ptr = optptr; 462 pp_ptr = optptr;
463 goto error; 463 goto error;
464 } 464 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6537a408a4fb..3e98ed2bff55 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -595,6 +595,10 @@ slow_path_clean:
595 } 595 }
596 596
597slow_path: 597slow_path:
598 /* for offloaded checksums cleanup checksum before fragmentation */
599 if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
600 goto fail;
601
598 left = skb->len - hlen; /* Space per frame */ 602 left = skb->len - hlen; /* Space per frame */
599 ptr = hlen; /* Where to start from */ 603 ptr = hlen; /* Where to start from */
600 604
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 14bbfcf717ac..3c9d20880283 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -989,13 +989,14 @@ mc_msf_out:
989 case IP_IPSEC_POLICY: 989 case IP_IPSEC_POLICY:
990 case IP_XFRM_POLICY: 990 case IP_XFRM_POLICY:
991 err = -EPERM; 991 err = -EPERM;
992 if (!capable(CAP_NET_ADMIN)) 992 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
993 break; 993 break;
994 err = xfrm_user_policy(sk, optname, optval, optlen); 994 err = xfrm_user_policy(sk, optname, optval, optlen);
995 break; 995 break;
996 996
997 case IP_TRANSPARENT: 997 case IP_TRANSPARENT:
998 if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) { 998 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
999 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
999 err = -EPERM; 1000 err = -EPERM;
1000 break; 1001 break;
1001 } 1002 }
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 858fddf6482a..c3a4233c0ac2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -66,20 +66,6 @@ static void vti_tunnel_setup(struct net_device *dev);
66static void vti_dev_free(struct net_device *dev); 66static void vti_dev_free(struct net_device *dev);
67static int vti_tunnel_bind_dev(struct net_device *dev); 67static int vti_tunnel_bind_dev(struct net_device *dev);
68 68
69/* Locking : hash tables are protected by RCU and RTNL */
70
71#define for_each_ip_tunnel_rcu(start) \
72 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
73
74/* often modified stats are per cpu, other are shared (netdev->stats) */
75struct pcpu_tstats {
76 u64 rx_packets;
77 u64 rx_bytes;
78 u64 tx_packets;
79 u64 tx_bytes;
80 struct u64_stats_sync syncp;
81};
82
83#define VTI_XMIT(stats1, stats2) do { \ 69#define VTI_XMIT(stats1, stats2) do { \
84 int err; \ 70 int err; \
85 int pkt_len = skb->len; \ 71 int pkt_len = skb->len; \
@@ -142,19 +128,19 @@ static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
142 struct ip_tunnel *t; 128 struct ip_tunnel *t;
143 struct vti_net *ipn = net_generic(net, vti_net_id); 129 struct vti_net *ipn = net_generic(net, vti_net_id);
144 130
145 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 131 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
146 if (local == t->parms.iph.saddr && 132 if (local == t->parms.iph.saddr &&
147 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 133 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
148 return t; 134 return t;
149 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 135 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
150 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 136 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 return t; 137 return t;
152 138
153 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 139 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
154 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 140 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 return t; 141 return t;
156 142
157 for_each_ip_tunnel_rcu(ipn->tunnels_wc[0]) 143 for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
158 if (t && (t->dev->flags&IFF_UP)) 144 if (t && (t->dev->flags&IFF_UP))
159 return t; 145 return t;
160 return NULL; 146 return NULL;
@@ -502,7 +488,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
502 case SIOCADDTUNNEL: 488 case SIOCADDTUNNEL:
503 case SIOCCHGTUNNEL: 489 case SIOCCHGTUNNEL:
504 err = -EPERM; 490 err = -EPERM;
505 if (!capable(CAP_NET_ADMIN)) 491 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
506 goto done; 492 goto done;
507 493
508 err = -EFAULT; 494 err = -EFAULT;
@@ -567,7 +553,7 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
567 553
568 case SIOCDELTUNNEL: 554 case SIOCDELTUNNEL:
569 err = -EPERM; 555 err = -EPERM;
570 if (!capable(CAP_NET_ADMIN)) 556 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
571 goto done; 557 goto done;
572 558
573 if (dev == ipn->fb_tunnel_dev) { 559 if (dev == ipn->fb_tunnel_dev) {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 798358b10717..d763701cff1b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1500,8 +1500,10 @@ static int __init ip_auto_config(void)
1500 * Clue in the operator. 1500 * Clue in the operator.
1501 */ 1501 */
1502 pr_info("IP-Config: Complete:\n"); 1502 pr_info("IP-Config: Complete:\n");
1503 pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", 1503
1504 ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); 1504 pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
1505 ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
1506 &ic_myaddr, &ic_netmask, &ic_gateway);
1505 pr_info(" host=%s, domain=%s, nis-domain=%s\n", 1507 pr_info(" host=%s, domain=%s, nis-domain=%s\n",
1506 utsname()->nodename, ic_domain, utsname()->domainname); 1508 utsname()->nodename, ic_domain, utsname()->domainname);
1507 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", 1509 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s",
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e15b45297c09..191fc24a745a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -138,22 +138,7 @@ struct ipip_net {
138static int ipip_tunnel_init(struct net_device *dev); 138static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev); 139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev); 140static void ipip_dev_free(struct net_device *dev);
141 141static struct rtnl_link_ops ipip_link_ops __read_mostly;
142/*
143 * Locking : hash tables are protected by RCU and RTNL
144 */
145
146#define for_each_ip_tunnel_rcu(start) \
147 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148
149/* often modified stats are per cpu, other are shared (netdev->stats) */
150struct pcpu_tstats {
151 u64 rx_packets;
152 u64 rx_bytes;
153 u64 tx_packets;
154 u64 tx_bytes;
155 struct u64_stats_sync syncp;
156};
157 142
158static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, 143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159 struct rtnl_link_stats64 *tot) 144 struct rtnl_link_stats64 *tot)
@@ -197,16 +182,16 @@ static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
197 struct ip_tunnel *t; 182 struct ip_tunnel *t;
198 struct ipip_net *ipn = net_generic(net, ipip_net_id); 183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
199 184
200 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
201 if (local == t->parms.iph.saddr && 186 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203 return t; 188 return t;
204 189
205 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
206 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207 return t; 192 return t;
208 193
209 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
210 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211 return t; 196 return t;
212 197
@@ -264,6 +249,32 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
264 rcu_assign_pointer(*tp, t); 249 rcu_assign_pointer(*tp, t);
265} 250}
266 251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
267static struct ip_tunnel *ipip_tunnel_locate(struct net *net, 278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268 struct ip_tunnel_parm *parms, int create) 279 struct ip_tunnel_parm *parms, int create)
269{ 280{
@@ -298,16 +309,9 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
298 nt = netdev_priv(dev); 309 nt = netdev_priv(dev);
299 nt->parms = *parms; 310 nt->parms = *parms;
300 311
301 if (ipip_tunnel_init(dev) < 0) 312 if (ipip_tunnel_create(dev) < 0)
302 goto failed_free; 313 goto failed_free;
303 314
304 if (register_netdevice(dev) < 0)
305 goto failed_free;
306
307 strcpy(nt->parms.name, dev->name);
308
309 dev_hold(dev);
310 ipip_tunnel_link(ipn, nt);
311 return nt; 315 return nt;
312 316
313failed_free: 317failed_free:
@@ -463,7 +467,6 @@ drop:
463static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464{ 468{
465 struct ip_tunnel *tunnel = netdev_priv(dev); 469 struct ip_tunnel *tunnel = netdev_priv(dev);
466 struct pcpu_tstats *tstats;
467 const struct iphdr *tiph = &tunnel->parms.iph; 470 const struct iphdr *tiph = &tunnel->parms.iph;
468 u8 tos = tunnel->parms.iph.tos; 471 u8 tos = tunnel->parms.iph.tos;
469 __be16 df = tiph->frag_off; 472 __be16 df = tiph->frag_off;
@@ -479,6 +482,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
479 if (skb->protocol != htons(ETH_P_IP)) 482 if (skb->protocol != htons(ETH_P_IP))
480 goto tx_error; 483 goto tx_error;
481 484
485 if (skb->ip_summed == CHECKSUM_PARTIAL &&
486 skb_checksum_help(skb))
487 goto tx_error;
488
482 if (tos & 1) 489 if (tos & 1)
483 tos = old_iph->tos; 490 tos = old_iph->tos;
484 491
@@ -586,9 +593,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
586 if ((iph->ttl = tiph->ttl) == 0) 593 if ((iph->ttl = tiph->ttl) == 0)
587 iph->ttl = old_iph->ttl; 594 iph->ttl = old_iph->ttl;
588 595
589 nf_reset(skb); 596 iptunnel_xmit(skb, dev);
590 tstats = this_cpu_ptr(dev->tstats);
591 __IPTUNNEL_XMIT(tstats, &dev->stats);
592 return NETDEV_TX_OK; 597 return NETDEV_TX_OK;
593 598
594tx_error_icmp: 599tx_error_icmp:
@@ -635,6 +640,28 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
635 dev->iflink = tunnel->parms.link; 640 dev->iflink = tunnel->parms.link;
636} 641}
637 642
643static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
644{
645 struct net *net = dev_net(t->dev);
646 struct ipip_net *ipn = net_generic(net, ipip_net_id);
647
648 ipip_tunnel_unlink(ipn, t);
649 synchronize_net();
650 t->parms.iph.saddr = p->iph.saddr;
651 t->parms.iph.daddr = p->iph.daddr;
652 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
653 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
654 ipip_tunnel_link(ipn, t);
655 t->parms.iph.ttl = p->iph.ttl;
656 t->parms.iph.tos = p->iph.tos;
657 t->parms.iph.frag_off = p->iph.frag_off;
658 if (t->parms.link != p->link) {
659 t->parms.link = p->link;
660 ipip_tunnel_bind_dev(t->dev);
661 }
662 netdev_state_change(t->dev);
663}
664
638static int 665static int
639ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 666ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
640{ 667{
@@ -664,7 +691,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
664 case SIOCADDTUNNEL: 691 case SIOCADDTUNNEL:
665 case SIOCCHGTUNNEL: 692 case SIOCCHGTUNNEL:
666 err = -EPERM; 693 err = -EPERM;
667 if (!capable(CAP_NET_ADMIN)) 694 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
668 goto done; 695 goto done;
669 696
670 err = -EFAULT; 697 err = -EFAULT;
@@ -693,29 +720,13 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
693 break; 720 break;
694 } 721 }
695 t = netdev_priv(dev); 722 t = netdev_priv(dev);
696 ipip_tunnel_unlink(ipn, t);
697 synchronize_net();
698 t->parms.iph.saddr = p.iph.saddr;
699 t->parms.iph.daddr = p.iph.daddr;
700 memcpy(dev->dev_addr, &p.iph.saddr, 4);
701 memcpy(dev->broadcast, &p.iph.daddr, 4);
702 ipip_tunnel_link(ipn, t);
703 netdev_state_change(dev);
704 } 723 }
724
725 ipip_tunnel_update(t, &p);
705 } 726 }
706 727
707 if (t) { 728 if (t) {
708 err = 0; 729 err = 0;
709 if (cmd == SIOCCHGTUNNEL) {
710 t->parms.iph.ttl = p.iph.ttl;
711 t->parms.iph.tos = p.iph.tos;
712 t->parms.iph.frag_off = p.iph.frag_off;
713 if (t->parms.link != p.link) {
714 t->parms.link = p.link;
715 ipip_tunnel_bind_dev(dev);
716 netdev_state_change(dev);
717 }
718 }
719 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 730 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
720 err = -EFAULT; 731 err = -EFAULT;
721 } else 732 } else
@@ -724,7 +735,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
724 735
725 case SIOCDELTUNNEL: 736 case SIOCDELTUNNEL:
726 err = -EPERM; 737 err = -EPERM;
727 if (!capable(CAP_NET_ADMIN)) 738 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728 goto done; 739 goto done;
729 740
730 if (dev == ipn->fb_tunnel_dev) { 741 if (dev == ipn->fb_tunnel_dev) {
@@ -773,6 +784,11 @@ static void ipip_dev_free(struct net_device *dev)
773 free_netdev(dev); 784 free_netdev(dev);
774} 785}
775 786
787#define IPIP_FEATURES (NETIF_F_SG | \
788 NETIF_F_FRAGLIST | \
789 NETIF_F_HIGHDMA | \
790 NETIF_F_HW_CSUM)
791
776static void ipip_tunnel_setup(struct net_device *dev) 792static void ipip_tunnel_setup(struct net_device *dev)
777{ 793{
778 dev->netdev_ops = &ipip_netdev_ops; 794 dev->netdev_ops = &ipip_netdev_ops;
@@ -787,6 +803,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
787 dev->features |= NETIF_F_NETNS_LOCAL; 803 dev->features |= NETIF_F_NETNS_LOCAL;
788 dev->features |= NETIF_F_LLTX; 804 dev->features |= NETIF_F_LLTX;
789 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 805 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
806
807 dev->features |= IPIP_FEATURES;
808 dev->hw_features |= IPIP_FEATURES;
790} 809}
791 810
792static int ipip_tunnel_init(struct net_device *dev) 811static int ipip_tunnel_init(struct net_device *dev)
@@ -829,6 +848,142 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
829 return 0; 848 return 0;
830} 849}
831 850
851static void ipip_netlink_parms(struct nlattr *data[],
852 struct ip_tunnel_parm *parms)
853{
854 memset(parms, 0, sizeof(*parms));
855
856 parms->iph.version = 4;
857 parms->iph.protocol = IPPROTO_IPIP;
858 parms->iph.ihl = 5;
859
860 if (!data)
861 return;
862
863 if (data[IFLA_IPTUN_LINK])
864 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
865
866 if (data[IFLA_IPTUN_LOCAL])
867 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
868
869 if (data[IFLA_IPTUN_REMOTE])
870 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
871
872 if (data[IFLA_IPTUN_TTL]) {
873 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
874 if (parms->iph.ttl)
875 parms->iph.frag_off = htons(IP_DF);
876 }
877
878 if (data[IFLA_IPTUN_TOS])
879 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
880
881 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
882 parms->iph.frag_off = htons(IP_DF);
883}
884
885static int ipip_newlink(struct net *src_net, struct net_device *dev,
886 struct nlattr *tb[], struct nlattr *data[])
887{
888 struct net *net = dev_net(dev);
889 struct ip_tunnel *nt;
890
891 nt = netdev_priv(dev);
892 ipip_netlink_parms(data, &nt->parms);
893
894 if (ipip_tunnel_locate(net, &nt->parms, 0))
895 return -EEXIST;
896
897 return ipip_tunnel_create(dev);
898}
899
900static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
901 struct nlattr *data[])
902{
903 struct ip_tunnel *t;
904 struct ip_tunnel_parm p;
905 struct net *net = dev_net(dev);
906 struct ipip_net *ipn = net_generic(net, ipip_net_id);
907
908 if (dev == ipn->fb_tunnel_dev)
909 return -EINVAL;
910
911 ipip_netlink_parms(data, &p);
912
913 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
914 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
915 return -EINVAL;
916
917 t = ipip_tunnel_locate(net, &p, 0);
918
919 if (t) {
920 if (t->dev != dev)
921 return -EEXIST;
922 } else
923 t = netdev_priv(dev);
924
925 ipip_tunnel_update(t, &p);
926 return 0;
927}
928
929static size_t ipip_get_size(const struct net_device *dev)
930{
931 return
932 /* IFLA_IPTUN_LINK */
933 nla_total_size(4) +
934 /* IFLA_IPTUN_LOCAL */
935 nla_total_size(4) +
936 /* IFLA_IPTUN_REMOTE */
937 nla_total_size(4) +
938 /* IFLA_IPTUN_TTL */
939 nla_total_size(1) +
940 /* IFLA_IPTUN_TOS */
941 nla_total_size(1) +
942 /* IFLA_IPTUN_PMTUDISC */
943 nla_total_size(1) +
944 0;
945}
946
947static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
948{
949 struct ip_tunnel *tunnel = netdev_priv(dev);
950 struct ip_tunnel_parm *parm = &tunnel->parms;
951
952 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
953 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
954 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
955 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
956 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
957 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
958 !!(parm->iph.frag_off & htons(IP_DF))))
959 goto nla_put_failure;
960 return 0;
961
962nla_put_failure:
963 return -EMSGSIZE;
964}
965
966static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
967 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
968 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
969 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
970 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
971 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
972 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
973};
974
975static struct rtnl_link_ops ipip_link_ops __read_mostly = {
976 .kind = "ipip",
977 .maxtype = IFLA_IPTUN_MAX,
978 .policy = ipip_policy,
979 .priv_size = sizeof(struct ip_tunnel),
980 .setup = ipip_tunnel_setup,
981 .newlink = ipip_newlink,
982 .changelink = ipip_changelink,
983 .get_size = ipip_get_size,
984 .fill_info = ipip_fill_info,
985};
986
832static struct xfrm_tunnel ipip_handler __read_mostly = { 987static struct xfrm_tunnel ipip_handler __read_mostly = {
833 .handler = ipip_rcv, 988 .handler = ipip_rcv,
834 .err_handler = ipip_err, 989 .err_handler = ipip_err,
@@ -925,14 +1080,26 @@ static int __init ipip_init(void)
925 return err; 1080 return err;
926 err = xfrm4_tunnel_register(&ipip_handler, AF_INET); 1081 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927 if (err < 0) { 1082 if (err < 0) {
928 unregister_pernet_device(&ipip_net_ops);
929 pr_info("%s: can't register tunnel\n", __func__); 1083 pr_info("%s: can't register tunnel\n", __func__);
1084 goto xfrm_tunnel_failed;
930 } 1085 }
1086 err = rtnl_link_register(&ipip_link_ops);
1087 if (err < 0)
1088 goto rtnl_link_failed;
1089
1090out:
931 return err; 1091 return err;
1092
1093rtnl_link_failed:
1094 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1095xfrm_tunnel_failed:
1096 unregister_pernet_device(&ipip_net_ops);
1097 goto out;
932} 1098}
933 1099
934static void __exit ipip_fini(void) 1100static void __exit ipip_fini(void)
935{ 1101{
1102 rtnl_link_unregister(&ipip_link_ops);
936 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) 1103 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937 pr_info("%s: can't deregister tunnel\n", __func__); 1104 pr_info("%s: can't deregister tunnel\n", __func__);
938 1105
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3eab2b2ffd34..a9454cbd953c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -65,6 +65,7 @@
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
68#include <linux/netconf.h>
68 69
69#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 70#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
70#define CONFIG_IP_PIMSM 1 71#define CONFIG_IP_PIMSM 1
@@ -83,8 +84,8 @@ struct mr_table {
83 struct vif_device vif_table[MAXVIFS]; 84 struct vif_device vif_table[MAXVIFS];
84 int maxvif; 85 int maxvif;
85 atomic_t cache_resolve_queue_len; 86 atomic_t cache_resolve_queue_len;
86 int mroute_do_assert; 87 bool mroute_do_assert;
87 int mroute_do_pim; 88 bool mroute_do_pim;
88#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 89#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
89 int mroute_reg_vif_num; 90 int mroute_reg_vif_num;
90#endif 91#endif
@@ -133,6 +134,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
133 struct sk_buff *pkt, vifi_t vifi, int assert); 134 struct sk_buff *pkt, vifi_t vifi, int assert);
134static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 135static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
135 struct mfc_cache *c, struct rtmsg *rtm); 136 struct mfc_cache *c, struct rtmsg *rtm);
137static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
138 int cmd);
136static void mroute_clean_tables(struct mr_table *mrt); 139static void mroute_clean_tables(struct mr_table *mrt);
137static void ipmr_expire_process(unsigned long arg); 140static void ipmr_expire_process(unsigned long arg);
138 141
@@ -582,6 +585,9 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
582 in_dev = __in_dev_get_rtnl(dev); 585 in_dev = __in_dev_get_rtnl(dev);
583 if (in_dev) { 586 if (in_dev) {
584 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 587 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
588 inet_netconf_notify_devconf(dev_net(dev),
589 NETCONFA_MC_FORWARDING,
590 dev->ifindex, &in_dev->cnf);
585 ip_rt_multicast_event(in_dev); 591 ip_rt_multicast_event(in_dev);
586 } 592 }
587 593
@@ -665,6 +671,7 @@ static void ipmr_expire_process(unsigned long arg)
665 } 671 }
666 672
667 list_del(&c->list); 673 list_del(&c->list);
674 mroute_netlink_event(mrt, c, RTM_DELROUTE);
668 ipmr_destroy_unres(mrt, c); 675 ipmr_destroy_unres(mrt, c);
669 } 676 }
670 677
@@ -772,6 +779,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
772 return -EADDRNOTAVAIL; 779 return -EADDRNOTAVAIL;
773 } 780 }
774 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 781 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
782 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
783 &in_dev->cnf);
775 ip_rt_multicast_event(in_dev); 784 ip_rt_multicast_event(in_dev);
776 785
777 /* Fill in the VIF structures */ 786 /* Fill in the VIF structures */
@@ -1020,6 +1029,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1020 1029
1021 atomic_inc(&mrt->cache_resolve_queue_len); 1030 atomic_inc(&mrt->cache_resolve_queue_len);
1022 list_add(&c->list, &mrt->mfc_unres_queue); 1031 list_add(&c->list, &mrt->mfc_unres_queue);
1032 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1023 1033
1024 if (atomic_read(&mrt->cache_resolve_queue_len) == 1) 1034 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1025 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1035 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
@@ -1054,7 +1064,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1054 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1064 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1055 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1065 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1056 list_del_rcu(&c->list); 1066 list_del_rcu(&c->list);
1057 1067 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1058 ipmr_cache_free(c); 1068 ipmr_cache_free(c);
1059 return 0; 1069 return 0;
1060 } 1070 }
@@ -1089,6 +1099,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1089 if (!mrtsock) 1099 if (!mrtsock)
1090 c->mfc_flags |= MFC_STATIC; 1100 c->mfc_flags |= MFC_STATIC;
1091 write_unlock_bh(&mrt_lock); 1101 write_unlock_bh(&mrt_lock);
1102 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1092 return 0; 1103 return 0;
1093 } 1104 }
1094 1105
@@ -1131,6 +1142,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1131 ipmr_cache_resolve(net, mrt, uc, c); 1142 ipmr_cache_resolve(net, mrt, uc, c);
1132 ipmr_cache_free(uc); 1143 ipmr_cache_free(uc);
1133 } 1144 }
1145 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1134 return 0; 1146 return 0;
1135} 1147}
1136 1148
@@ -1159,6 +1171,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
1159 if (c->mfc_flags & MFC_STATIC) 1171 if (c->mfc_flags & MFC_STATIC)
1160 continue; 1172 continue;
1161 list_del_rcu(&c->list); 1173 list_del_rcu(&c->list);
1174 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1162 ipmr_cache_free(c); 1175 ipmr_cache_free(c);
1163 } 1176 }
1164 } 1177 }
@@ -1167,6 +1180,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
1167 spin_lock_bh(&mfc_unres_lock); 1180 spin_lock_bh(&mfc_unres_lock);
1168 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { 1181 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1169 list_del(&c->list); 1182 list_del(&c->list);
1183 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1170 ipmr_destroy_unres(mrt, c); 1184 ipmr_destroy_unres(mrt, c);
1171 } 1185 }
1172 spin_unlock_bh(&mfc_unres_lock); 1186 spin_unlock_bh(&mfc_unres_lock);
@@ -1185,6 +1199,9 @@ static void mrtsock_destruct(struct sock *sk)
1185 ipmr_for_each_table(mrt, net) { 1199 ipmr_for_each_table(mrt, net) {
1186 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1200 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1187 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1201 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1202 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1203 NETCONFA_IFINDEX_ALL,
1204 net->ipv4.devconf_all);
1188 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1205 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1189 mroute_clean_tables(mrt); 1206 mroute_clean_tables(mrt);
1190 } 1207 }
@@ -1207,23 +1224,24 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1207 struct net *net = sock_net(sk); 1224 struct net *net = sock_net(sk);
1208 struct mr_table *mrt; 1225 struct mr_table *mrt;
1209 1226
1227 if (sk->sk_type != SOCK_RAW ||
1228 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1229 return -EOPNOTSUPP;
1230
1210 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1231 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1211 if (mrt == NULL) 1232 if (mrt == NULL)
1212 return -ENOENT; 1233 return -ENOENT;
1213 1234
1214 if (optname != MRT_INIT) { 1235 if (optname != MRT_INIT) {
1215 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1236 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1216 !capable(CAP_NET_ADMIN)) 1237 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1217 return -EACCES; 1238 return -EACCES;
1218 } 1239 }
1219 1240
1220 switch (optname) { 1241 switch (optname) {
1221 case MRT_INIT: 1242 case MRT_INIT:
1222 if (sk->sk_type != SOCK_RAW ||
1223 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1224 return -EOPNOTSUPP;
1225 if (optlen != sizeof(int)) 1243 if (optlen != sizeof(int))
1226 return -ENOPROTOOPT; 1244 return -EINVAL;
1227 1245
1228 rtnl_lock(); 1246 rtnl_lock();
1229 if (rtnl_dereference(mrt->mroute_sk)) { 1247 if (rtnl_dereference(mrt->mroute_sk)) {
@@ -1235,6 +1253,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1235 if (ret == 0) { 1253 if (ret == 0) {
1236 rcu_assign_pointer(mrt->mroute_sk, sk); 1254 rcu_assign_pointer(mrt->mroute_sk, sk);
1237 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1255 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1256 inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1257 NETCONFA_IFINDEX_ALL,
1258 net->ipv4.devconf_all);
1238 } 1259 }
1239 rtnl_unlock(); 1260 rtnl_unlock();
1240 return ret; 1261 return ret;
@@ -1284,9 +1305,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1284 case MRT_ASSERT: 1305 case MRT_ASSERT:
1285 { 1306 {
1286 int v; 1307 int v;
1308 if (optlen != sizeof(v))
1309 return -EINVAL;
1287 if (get_user(v, (int __user *)optval)) 1310 if (get_user(v, (int __user *)optval))
1288 return -EFAULT; 1311 return -EFAULT;
1289 mrt->mroute_do_assert = (v) ? 1 : 0; 1312 mrt->mroute_do_assert = v;
1290 return 0; 1313 return 0;
1291 } 1314 }
1292#ifdef CONFIG_IP_PIMSM 1315#ifdef CONFIG_IP_PIMSM
@@ -1294,9 +1317,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1294 { 1317 {
1295 int v; 1318 int v;
1296 1319
1320 if (optlen != sizeof(v))
1321 return -EINVAL;
1297 if (get_user(v, (int __user *)optval)) 1322 if (get_user(v, (int __user *)optval))
1298 return -EFAULT; 1323 return -EFAULT;
1299 v = (v) ? 1 : 0; 1324 v = !!v;
1300 1325
1301 rtnl_lock(); 1326 rtnl_lock();
1302 ret = 0; 1327 ret = 0;
@@ -1329,7 +1354,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1329 } else { 1354 } else {
1330 if (!ipmr_new_table(net, v)) 1355 if (!ipmr_new_table(net, v))
1331 ret = -ENOMEM; 1356 ret = -ENOMEM;
1332 raw_sk(sk)->ipmr_table = v; 1357 else
1358 raw_sk(sk)->ipmr_table = v;
1333 } 1359 }
1334 rtnl_unlock(); 1360 rtnl_unlock();
1335 return ret; 1361 return ret;
@@ -1355,6 +1381,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1355 struct net *net = sock_net(sk); 1381 struct net *net = sock_net(sk);
1356 struct mr_table *mrt; 1382 struct mr_table *mrt;
1357 1383
1384 if (sk->sk_type != SOCK_RAW ||
1385 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1386 return -EOPNOTSUPP;
1387
1358 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1388 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1359 if (mrt == NULL) 1389 if (mrt == NULL)
1360 return -ENOENT; 1390 return -ENOENT;
@@ -2024,6 +2054,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2024 int ct; 2054 int ct;
2025 struct rtnexthop *nhp; 2055 struct rtnexthop *nhp;
2026 struct nlattr *mp_attr; 2056 struct nlattr *mp_attr;
2057 struct rta_mfc_stats mfcs;
2027 2058
2028 /* If cache is unresolved, don't try to parse IIF and OIF */ 2059 /* If cache is unresolved, don't try to parse IIF and OIF */
2029 if (c->mfc_parent >= MAXVIFS) 2060 if (c->mfc_parent >= MAXVIFS)
@@ -2052,6 +2083,12 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2052 2083
2053 nla_nest_end(skb, mp_attr); 2084 nla_nest_end(skb, mp_attr);
2054 2085
2086 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2087 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2088 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2089 if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2090 return -EMSGSIZE;
2091
2055 rtm->rtm_type = RTN_MULTICAST; 2092 rtm->rtm_type = RTN_MULTICAST;
2056 return 1; 2093 return 1;
2057} 2094}
@@ -2121,12 +2158,13 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
2121} 2158}
2122 2159
2123static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 2160static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2124 u32 portid, u32 seq, struct mfc_cache *c) 2161 u32 portid, u32 seq, struct mfc_cache *c, int cmd)
2125{ 2162{
2126 struct nlmsghdr *nlh; 2163 struct nlmsghdr *nlh;
2127 struct rtmsg *rtm; 2164 struct rtmsg *rtm;
2165 int err;
2128 2166
2129 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2167 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2130 if (nlh == NULL) 2168 if (nlh == NULL)
2131 return -EMSGSIZE; 2169 return -EMSGSIZE;
2132 2170
@@ -2140,13 +2178,18 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2140 goto nla_put_failure; 2178 goto nla_put_failure;
2141 rtm->rtm_type = RTN_MULTICAST; 2179 rtm->rtm_type = RTN_MULTICAST;
2142 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2180 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2143 rtm->rtm_protocol = RTPROT_UNSPEC; 2181 if (c->mfc_flags & MFC_STATIC)
2182 rtm->rtm_protocol = RTPROT_STATIC;
2183 else
2184 rtm->rtm_protocol = RTPROT_MROUTED;
2144 rtm->rtm_flags = 0; 2185 rtm->rtm_flags = 0;
2145 2186
2146 if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || 2187 if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
2147 nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) 2188 nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
2148 goto nla_put_failure; 2189 goto nla_put_failure;
2149 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2190 err = __ipmr_fill_mroute(mrt, skb, c, rtm);
2191 /* do not break the dump if cache is unresolved */
2192 if (err < 0 && err != -ENOENT)
2150 goto nla_put_failure; 2193 goto nla_put_failure;
2151 2194
2152 return nlmsg_end(skb, nlh); 2195 return nlmsg_end(skb, nlh);
@@ -2156,6 +2199,52 @@ nla_put_failure:
2156 return -EMSGSIZE; 2199 return -EMSGSIZE;
2157} 2200}
2158 2201
2202static size_t mroute_msgsize(bool unresolved, int maxvif)
2203{
2204 size_t len =
2205 NLMSG_ALIGN(sizeof(struct rtmsg))
2206 + nla_total_size(4) /* RTA_TABLE */
2207 + nla_total_size(4) /* RTA_SRC */
2208 + nla_total_size(4) /* RTA_DST */
2209 ;
2210
2211 if (!unresolved)
2212 len = len
2213 + nla_total_size(4) /* RTA_IIF */
2214 + nla_total_size(0) /* RTA_MULTIPATH */
2215 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2216 /* RTA_MFC_STATS */
2217 + nla_total_size(sizeof(struct rta_mfc_stats))
2218 ;
2219
2220 return len;
2221}
2222
2223static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2224 int cmd)
2225{
2226 struct net *net = read_pnet(&mrt->net);
2227 struct sk_buff *skb;
2228 int err = -ENOBUFS;
2229
2230 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
2231 GFP_ATOMIC);
2232 if (skb == NULL)
2233 goto errout;
2234
2235 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2236 if (err < 0)
2237 goto errout;
2238
2239 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2240 return;
2241
2242errout:
2243 kfree_skb(skb);
2244 if (err < 0)
2245 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2246}
2247
2159static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2248static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2160{ 2249{
2161 struct net *net = sock_net(skb->sk); 2250 struct net *net = sock_net(skb->sk);
@@ -2182,13 +2271,29 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2182 if (ipmr_fill_mroute(mrt, skb, 2271 if (ipmr_fill_mroute(mrt, skb,
2183 NETLINK_CB(cb->skb).portid, 2272 NETLINK_CB(cb->skb).portid,
2184 cb->nlh->nlmsg_seq, 2273 cb->nlh->nlmsg_seq,
2185 mfc) < 0) 2274 mfc, RTM_NEWROUTE) < 0)
2186 goto done; 2275 goto done;
2187next_entry: 2276next_entry:
2188 e++; 2277 e++;
2189 } 2278 }
2190 e = s_e = 0; 2279 e = s_e = 0;
2191 } 2280 }
2281 spin_lock_bh(&mfc_unres_lock);
2282 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
2283 if (e < s_e)
2284 goto next_entry2;
2285 if (ipmr_fill_mroute(mrt, skb,
2286 NETLINK_CB(cb->skb).portid,
2287 cb->nlh->nlmsg_seq,
2288 mfc, RTM_NEWROUTE) < 0) {
2289 spin_unlock_bh(&mfc_unres_lock);
2290 goto done;
2291 }
2292next_entry2:
2293 e++;
2294 }
2295 spin_unlock_bh(&mfc_unres_lock);
2296 e = s_e = 0;
2192 s_h = 0; 2297 s_h = 0;
2193next_table: 2298next_table:
2194 t++; 2299 t++;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 97e61eadf580..3ea4127404d6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1533,7 +1533,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1533{ 1533{
1534 int ret; 1534 int ret;
1535 1535
1536 if (!capable(CAP_NET_ADMIN)) 1536 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1537 return -EPERM; 1537 return -EPERM;
1538 1538
1539 switch (cmd) { 1539 switch (cmd) {
@@ -1677,7 +1677,7 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1677{ 1677{
1678 int ret; 1678 int ret;
1679 1679
1680 if (!capable(CAP_NET_ADMIN)) 1680 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1681 return -EPERM; 1681 return -EPERM;
1682 1682
1683 switch (cmd) { 1683 switch (cmd) {
@@ -1698,7 +1698,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1698{ 1698{
1699 int ret; 1699 int ret;
1700 1700
1701 if (!capable(CAP_NET_ADMIN)) 1701 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1702 return -EPERM; 1702 return -EPERM;
1703 1703
1704 switch (cmd) { 1704 switch (cmd) {
@@ -1722,7 +1722,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1722{ 1722{
1723 int ret; 1723 int ret;
1724 1724
1725 if (!capable(CAP_NET_ADMIN)) 1725 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1726 return -EPERM; 1726 return -EPERM;
1727 1727
1728 switch (cmd) { 1728 switch (cmd) {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 170b1fdd6b72..17c5e06da662 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1846,7 +1846,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1846{ 1846{
1847 int ret; 1847 int ret;
1848 1848
1849 if (!capable(CAP_NET_ADMIN)) 1849 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1850 return -EPERM; 1850 return -EPERM;
1851 1851
1852 switch (cmd) { 1852 switch (cmd) {
@@ -1961,7 +1961,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1961{ 1961{
1962 int ret; 1962 int ret;
1963 1963
1964 if (!capable(CAP_NET_ADMIN)) 1964 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1965 return -EPERM; 1965 return -EPERM;
1966 1966
1967 switch (cmd) { 1967 switch (cmd) {
@@ -1983,7 +1983,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1983{ 1983{
1984 int ret; 1984 int ret;
1985 1985
1986 if (!capable(CAP_NET_ADMIN)) 1986 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1987 return -EPERM; 1987 return -EPERM;
1988 1988
1989 switch (cmd) { 1989 switch (cmd) {
@@ -2008,7 +2008,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2008{ 2008{
2009 int ret; 2009 int ret;
2010 2010
2011 if (!capable(CAP_NET_ADMIN)) 2011 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2012 return -EPERM; 2012 return -EPERM;
2013 2013
2014 switch (cmd) { 2014 switch (cmd) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fe5daea5214d..75e33a7048f8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -661,6 +661,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
661#define PROC_WRITELEN 10 661#define PROC_WRITELEN 10
662 char buffer[PROC_WRITELEN+1]; 662 char buffer[PROC_WRITELEN+1];
663 unsigned long nodenum; 663 unsigned long nodenum;
664 int rc;
664 665
665 if (size > PROC_WRITELEN) 666 if (size > PROC_WRITELEN)
666 return -EIO; 667 return -EIO;
@@ -669,11 +670,15 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
669 buffer[size] = 0; 670 buffer[size] = 0;
670 671
671 if (*buffer == '+') { 672 if (*buffer == '+') {
672 nodenum = simple_strtoul(buffer+1, NULL, 10); 673 rc = kstrtoul(buffer+1, 10, &nodenum);
674 if (rc)
675 return rc;
673 if (clusterip_add_node(c, nodenum)) 676 if (clusterip_add_node(c, nodenum))
674 return -ENOMEM; 677 return -ENOMEM;
675 } else if (*buffer == '-') { 678 } else if (*buffer == '-') {
676 nodenum = simple_strtoul(buffer+1, NULL,10); 679 rc = kstrtoul(buffer+1, 10, &nodenum);
680 if (rc)
681 return rc;
677 if (clusterip_del_node(c, nodenum)) 682 if (clusterip_del_node(c, nodenum))
678 return -ENOENT; 683 return -ENOENT;
679 } else 684 } else
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a82047282dbb..da2c8a368f68 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum,
134 /* ESTABLISHED */ 134 /* ESTABLISHED */
135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 ctinfo == IP_CT_ESTABLISHED_REPLY); 136 ctinfo == IP_CT_ESTABLISHED_REPLY);
137 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
138 nf_ct_kill_acct(ct, ctinfo, skb);
139 return NF_DROP;
140 }
137 } 141 }
138 142
139 return nf_nat_packet(ct, ctinfo, hooknum, skb); 143 return nf_nat_packet(ct, ctinfo, hooknum, skb);
@@ -276,9 +280,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
276 return -ENOMEM; 280 return -ENOMEM;
277 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 281 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
278 kfree(repl); 282 kfree(repl);
279 if (IS_ERR(net->ipv4.nat_table)) 283 return PTR_RET(net->ipv4.nat_table);
280 return PTR_ERR(net->ipv4.nat_table);
281 return 0;
282} 284}
283 285
284static void __net_exit iptable_nat_net_exit(struct net *net) 286static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 8918eff1426d..0f9d09f54bd9 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -29,6 +29,7 @@
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
32 33
33/* 34/*
34 * Add a protocol handler to the hash tables 35 * Add a protocol handler to the hash tables
@@ -41,6 +42,13 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
41} 42}
42EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
43 44
45int inet_add_offload(const struct net_offload *prot, unsigned char protocol)
46{
47 return !cmpxchg((const struct net_offload **)&inet_offloads[protocol],
48 NULL, prot) ? 0 : -1;
49}
50EXPORT_SYMBOL(inet_add_offload);
51
44/* 52/*
45 * Remove a protocol from the hash tables. 53 * Remove a protocol from the hash tables.
46 */ 54 */
@@ -57,3 +65,16 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
57 return ret; 65 return ret;
58} 66}
59EXPORT_SYMBOL(inet_del_protocol); 67EXPORT_SYMBOL(inet_del_protocol);
68
69int inet_del_offload(const struct net_offload *prot, unsigned char protocol)
70{
71 int ret;
72
73 ret = (cmpxchg((const struct net_offload **)&inet_offloads[protocol],
74 prot, NULL) == prot) ? 0 : -1;
75
76 synchronize_net();
77
78 return ret;
79}
80EXPORT_SYMBOL(inet_del_offload);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df251424d816..844a9ef60dbd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2232,8 +2232,27 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2232 error = rt->dst.error; 2232 error = rt->dst.error;
2233 2233
2234 if (rt_is_input_route(rt)) { 2234 if (rt_is_input_route(rt)) {
2235 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) 2235#ifdef CONFIG_IP_MROUTE
2236 goto nla_put_failure; 2236 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2237 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2238 int err = ipmr_get_route(net, skb,
2239 fl4->saddr, fl4->daddr,
2240 r, nowait);
2241 if (err <= 0) {
2242 if (!nowait) {
2243 if (err == 0)
2244 return 0;
2245 goto nla_put_failure;
2246 } else {
2247 if (err == -EMSGSIZE)
2248 goto nla_put_failure;
2249 error = err;
2250 }
2251 }
2252 } else
2253#endif
2254 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2255 goto nla_put_failure;
2237 } 2256 }
2238 2257
2239 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) 2258 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
@@ -2496,6 +2515,10 @@ static __net_init int sysctl_route_net_init(struct net *net)
2496 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); 2515 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2497 if (tbl == NULL) 2516 if (tbl == NULL)
2498 goto err_dup; 2517 goto err_dup;
2518
2519 /* Don't export sysctls to unprivileged users */
2520 if (net->user_ns != &init_user_ns)
2521 tbl[0].procname = NULL;
2499 } 2522 }
2500 tbl[0].extra1 = net; 2523 tbl[0].extra1 = net;
2501 2524
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ba48e799b031..b236ef04914f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
340 } 340 }
341 341
342 req->expires = 0UL; 342 req->expires = 0UL;
343 req->retrans = 0; 343 req->num_retrans = 0;
344 344
345 /* 345 /*
346 * We need to lookup the route here to get at the correct 346 * We need to lookup the route here to get at the correct
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 63d4eccc674d..d84400b65049 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,9 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
883 table[6].data = 883 table[6].data =
884 &net->ipv4.sysctl_ping_group_range; 884 &net->ipv4.sysctl_ping_group_range;
885 885
886 /* Don't export sysctls to unprivileged users */
887 if (net->user_ns != &init_user_ns)
888 table[0].procname = NULL;
886 } 889 }
887 890
888 /* 891 /*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e457c7ab2e28..1ca253635f7a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
536{ 536{
537 struct tcp_sock *tp = tcp_sk(sk); 537 struct tcp_sock *tp = tcp_sk(sk);
538 int answ; 538 int answ;
539 bool slow;
539 540
540 switch (cmd) { 541 switch (cmd) {
541 case SIOCINQ: 542 case SIOCINQ:
542 if (sk->sk_state == TCP_LISTEN) 543 if (sk->sk_state == TCP_LISTEN)
543 return -EINVAL; 544 return -EINVAL;
544 545
545 lock_sock(sk); 546 slow = lock_sock_fast(sk);
546 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 547 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
547 answ = 0; 548 answ = 0;
548 else if (sock_flag(sk, SOCK_URGINLINE) || 549 else if (sock_flag(sk, SOCK_URGINLINE) ||
@@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
557 answ--; 558 answ--;
558 } else 559 } else
559 answ = tp->urg_seq - tp->copied_seq; 560 answ = tp->urg_seq - tp->copied_seq;
560 release_sock(sk); 561 unlock_sock_fast(sk, slow);
561 break; 562 break;
562 case SIOCATMARK: 563 case SIOCATMARK:
563 answ = tp->urg_data && tp->urg_seq == tp->copied_seq; 564 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
@@ -1490,15 +1491,19 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1490 copied += used; 1491 copied += used;
1491 offset += used; 1492 offset += used;
1492 } 1493 }
1493 /* 1494 /* If recv_actor drops the lock (e.g. TCP splice
1494 * If recv_actor drops the lock (e.g. TCP splice
1495 * receive) the skb pointer might be invalid when 1495 * receive) the skb pointer might be invalid when
1496 * getting here: tcp_collapse might have deleted it 1496 * getting here: tcp_collapse might have deleted it
1497 * while aggregating skbs from the socket queue. 1497 * while aggregating skbs from the socket queue.
1498 */ 1498 */
1499 skb = tcp_recv_skb(sk, seq-1, &offset); 1499 skb = tcp_recv_skb(sk, seq - 1, &offset);
1500 if (!skb || (offset+1 != skb->len)) 1500 if (!skb)
1501 break; 1501 break;
1502 /* TCP coalescing might have appended data to the skb.
1503 * Try to splice more frags
1504 */
1505 if (offset + 1 != skb->len)
1506 continue;
1502 } 1507 }
1503 if (tcp_hdr(skb)->fin) { 1508 if (tcp_hdr(skb)->fin) {
1504 sk_eat_skb(sk, skb, false); 1509 sk_eat_skb(sk, skb, false);
@@ -2300,7 +2305,7 @@ void tcp_sock_destruct(struct sock *sk)
2300 2305
2301static inline bool tcp_can_repair_sock(const struct sock *sk) 2306static inline bool tcp_can_repair_sock(const struct sock *sk)
2302{ 2307{
2303 return capable(CAP_NET_ADMIN) && 2308 return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
2304 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); 2309 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2305} 2310}
2306 2311
@@ -3586,8 +3591,7 @@ void __init tcp_init(void)
3586 alloc_large_system_hash("TCP established", 3591 alloc_large_system_hash("TCP established",
3587 sizeof(struct inet_ehash_bucket), 3592 sizeof(struct inet_ehash_bucket),
3588 thash_entries, 3593 thash_entries,
3589 (totalram_pages >= 128 * 1024) ? 3594 17, /* one slot per 128 KB of memory */
3590 13 : 15,
3591 0, 3595 0,
3592 NULL, 3596 NULL,
3593 &tcp_hashinfo.ehash_mask, 3597 &tcp_hashinfo.ehash_mask,
@@ -3603,8 +3607,7 @@ void __init tcp_init(void)
3603 alloc_large_system_hash("TCP bind", 3607 alloc_large_system_hash("TCP bind",
3604 sizeof(struct inet_bind_hashbucket), 3608 sizeof(struct inet_bind_hashbucket),
3605 tcp_hashinfo.ehash_mask + 1, 3609 tcp_hashinfo.ehash_mask + 1,
3606 (totalram_pages >= 128 * 1024) ? 3610 17, /* one slot per 128 KB of memory */
3607 13 : 15,
3608 0, 3611 0,
3609 &tcp_hashinfo.bhash_size, 3612 &tcp_hashinfo.bhash_size,
3610 NULL, 3613 NULL,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1432cdb0644c..baf28611b334 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -259,7 +259,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
259 if (!ca) 259 if (!ca)
260 err = -ENOENT; 260 err = -ENOENT;
261 261
262 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) 262 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
263 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
263 err = -EPERM; 264 err = -EPERM;
264 265
265 else if (!try_module_get(ca->owner)) 266 else if (!try_module_get(ca->owner))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 181fc8234a52..a13692560e63 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3552 return false; 3552 return false;
3553} 3553}
3554 3554
3555/* RFC 5961 7 [ACK Throttling] */
3556static void tcp_send_challenge_ack(struct sock *sk)
3557{
3558 /* unprotected vars, we dont care of overwrites */
3559 static u32 challenge_timestamp;
3560 static unsigned int challenge_count;
3561 u32 now = jiffies / HZ;
3562
3563 if (now != challenge_timestamp) {
3564 challenge_timestamp = now;
3565 challenge_count = 0;
3566 }
3567 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3568 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3569 tcp_send_ack(sk);
3570 }
3571}
3572
3555/* This routine deals with incoming acks, but not outgoing ones. */ 3573/* This routine deals with incoming acks, but not outgoing ones. */
3556static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3574static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3557{ 3575{
@@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3571 /* If the ack is older than previous acks 3589 /* If the ack is older than previous acks
3572 * then we can probably ignore it. 3590 * then we can probably ignore it.
3573 */ 3591 */
3574 if (before(ack, prior_snd_una)) 3592 if (before(ack, prior_snd_una)) {
3593 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3594 if (before(ack, prior_snd_una - tp->max_window)) {
3595 tcp_send_challenge_ack(sk);
3596 return -1;
3597 }
3575 goto old_ack; 3598 goto old_ack;
3599 }
3576 3600
3577 /* If the ack includes data we haven't sent yet, discard 3601 /* If the ack includes data we haven't sent yet, discard
3578 * this segment (RFC793 Section 3.9). 3602 * this segment (RFC793 Section 3.9).
@@ -5244,23 +5268,6 @@ out:
5244} 5268}
5245#endif /* CONFIG_NET_DMA */ 5269#endif /* CONFIG_NET_DMA */
5246 5270
5247static void tcp_send_challenge_ack(struct sock *sk)
5248{
5249 /* unprotected vars, we dont care of overwrites */
5250 static u32 challenge_timestamp;
5251 static unsigned int challenge_count;
5252 u32 now = jiffies / HZ;
5253
5254 if (now != challenge_timestamp) {
5255 challenge_timestamp = now;
5256 challenge_count = 0;
5257 }
5258 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
5259 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
5260 tcp_send_ack(sk);
5261 }
5262}
5263
5264/* Does PAWS and seqno based validation of an incoming segment, flags will 5271/* Does PAWS and seqno based validation of an incoming segment, flags will
5265 * play significant role here. 5272 * play significant role here.
5266 */ 5273 */
@@ -5992,7 +5999,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5992 */ 5999 */
5993 if (req) { 6000 if (req) {
5994 tcp_synack_rtt_meas(sk, req); 6001 tcp_synack_rtt_meas(sk, req);
5995 tp->total_retrans = req->retrans; 6002 tp->total_retrans = req->num_retrans;
5996 6003
5997 reqsk_fastopen_remove(sk, req, false); 6004 reqsk_fastopen_remove(sk, req, false);
5998 } else { 6005 } else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c4a64355603..1ed230716d51 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,14 +138,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138} 138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique); 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
149/* This will initiate an outgoing connection. */ 141/* This will initiate an outgoing connection. */
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{ 143{
@@ -250,10 +242,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
250 242
251 inet->inet_id = tp->write_seq ^ jiffies; 243 inet->inet_id = tp->write_seq ^ jiffies;
252 244
253 if (likely(!tp->repair)) 245 err = tcp_connect(sk);
254 err = tcp_connect(sk);
255 else
256 err = tcp_repair_connect(sk);
257 246
258 rt = NULL; 247 rt = NULL;
259 if (err) 248 if (err)
@@ -877,10 +866,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
877} 866}
878 867
879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 868static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
880 struct request_values *rvp) 869 struct request_values *rvp)
881{ 870{
882 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 871 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
883 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 872
873 if (!res)
874 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
875 return res;
884} 876}
885 877
886/* 878/*
@@ -1070,7 +1062,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1070} 1062}
1071EXPORT_SYMBOL(tcp_md5_do_del); 1063EXPORT_SYMBOL(tcp_md5_do_del);
1072 1064
1073void tcp_clear_md5_list(struct sock *sk) 1065static void tcp_clear_md5_list(struct sock *sk)
1074{ 1066{
1075 struct tcp_sock *tp = tcp_sk(sk); 1067 struct tcp_sock *tp = tcp_sk(sk);
1076 struct tcp_md5sig_key *key; 1068 struct tcp_md5sig_key *key;
@@ -1386,7 +1378,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1386 struct sock *child; 1378 struct sock *child;
1387 int err; 1379 int err;
1388 1380
1389 req->retrans = 0; 1381 req->num_retrans = 0;
1382 req->num_timeout = 0;
1390 req->sk = NULL; 1383 req->sk = NULL;
1391 1384
1392 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 1385 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
@@ -1741,7 +1734,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1741 1734
1742 tcp_initialize_rcv_mss(newsk); 1735 tcp_initialize_rcv_mss(newsk);
1743 tcp_synack_rtt_meas(newsk, req); 1736 tcp_synack_rtt_meas(newsk, req);
1744 newtp->total_retrans = req->retrans; 1737 newtp->total_retrans = req->num_retrans;
1745 1738
1746#ifdef CONFIG_TCP_MD5SIG 1739#ifdef CONFIG_TCP_MD5SIG
1747 /* Copy over the MD5 key from the original socket */ 1740 /* Copy over the MD5 key from the original socket */
@@ -1919,7 +1912,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
1919 1912
1920void tcp_v4_early_demux(struct sk_buff *skb) 1913void tcp_v4_early_demux(struct sk_buff *skb)
1921{ 1914{
1922 struct net *net = dev_net(skb->dev);
1923 const struct iphdr *iph; 1915 const struct iphdr *iph;
1924 const struct tcphdr *th; 1916 const struct tcphdr *th;
1925 struct sock *sk; 1917 struct sock *sk;
@@ -1927,16 +1919,16 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1927 if (skb->pkt_type != PACKET_HOST) 1919 if (skb->pkt_type != PACKET_HOST)
1928 return; 1920 return;
1929 1921
1930 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) 1922 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1931 return; 1923 return;
1932 1924
1933 iph = ip_hdr(skb); 1925 iph = ip_hdr(skb);
1934 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); 1926 th = tcp_hdr(skb);
1935 1927
1936 if (th->doff < sizeof(struct tcphdr) / 4) 1928 if (th->doff < sizeof(struct tcphdr) / 4)
1937 return; 1929 return;
1938 1930
1939 sk = __inet_lookup_established(net, &tcp_hashinfo, 1931 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1940 iph->saddr, th->source, 1932 iph->saddr, th->source,
1941 iph->daddr, ntohs(th->dest), 1933 iph->daddr, ntohs(th->dest),
1942 skb->skb_iif); 1934 skb->skb_iif);
@@ -2640,7 +2632,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2640 0, 0, /* could print option size, but that is af dependent. */ 2632 0, 0, /* could print option size, but that is af dependent. */
2641 1, /* timers active (only the expire timer) */ 2633 1, /* timers active (only the expire timer) */
2642 jiffies_delta_to_clock_t(delta), 2634 jiffies_delta_to_clock_t(delta),
2643 req->retrans, 2635 req->num_timeout,
2644 from_kuid_munged(seq_user_ns(f), uid), 2636 from_kuid_munged(seq_user_ns(f), uid),
2645 0, /* non standard timer */ 2637 0, /* non standard timer */
2646 0, /* open_requests have no inode */ 2638 0, /* open_requests have no inode */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a7302d974f32..f35f2dfb6401 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -553,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
553 * it can be estimated (approximately) 553 * it can be estimated (approximately)
554 * from another data. 554 * from another data.
555 */ 555 */
556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst); 557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
558 } 558 }
559 } 559 }
@@ -582,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
582 * Note that even if there is new data in the SYN packet 582 * Note that even if there is new data in the SYN packet
583 * they will be thrown away too. 583 * they will be thrown away too.
584 */ 584 */
585 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 585 inet_rtx_syn_ack(sk, req);
586 return NULL; 586 return NULL;
587 } 587 }
588 588
@@ -696,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ 696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) 697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; 698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
699 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ 699 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
700 tcp_rsk(req)->snt_synack = 0; 700 tcp_rsk(req)->snt_synack = 0;
701 701
702 /* For Fast Open no more processing is needed (sk is the 702 /* For Fast Open no more processing is needed (sk is the
@@ -706,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
706 return sk; 706 return sk;
707 707
708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ 708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
709 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 709 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
711 inet_rsk(req)->acked = 1; 711 inet_rsk(req)->acked = 1;
712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 948ac275b9b5..5d451593ef16 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2992,6 +2992,11 @@ int tcp_connect(struct sock *sk)
2992 2992
2993 tcp_connect_init(sk); 2993 tcp_connect_init(sk);
2994 2994
2995 if (unlikely(tp->repair)) {
2996 tcp_finish_connect(sk, NULL);
2997 return 0;
2998 }
2999
2995 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); 3000 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
2996 if (unlikely(buff == NULL)) 3001 if (unlikely(buff == NULL))
2997 return -ENOBUFS; 3002 return -ENOBUFS;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d47c1b4421a3..b78aac30c498 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
318 req = tcp_sk(sk)->fastopen_rsk; 318 req = tcp_sk(sk)->fastopen_rsk;
319 req->rsk_ops->syn_ack_timeout(sk, req); 319 req->rsk_ops->syn_ack_timeout(sk, req);
320 320
321 if (req->retrans >= max_retries) { 321 if (req->num_timeout >= max_retries) {
322 tcp_write_err(sk); 322 tcp_write_err(sk);
323 return; 323 return;
324 } 324 }
@@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
327 * regular retransmit because if the child socket has been accepted 327 * regular retransmit because if the child socket has been accepted
328 * it's not good to give up too easily. 328 * it's not good to give up too easily.
329 */ 329 */
330 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 330 inet_rtx_syn_ack(sk, req);
331 req->retrans++; 331 req->num_timeout++;
332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
333 TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); 333 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
334} 334}
335 335
336/* 336/*