aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/ah4.c6
-rw-r--r--net/ipv4/arp.c26
-rw-r--r--net/ipv4/devinet.c66
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/fib_rules.c16
-rw-r--r--net/ipv4/fib_semantics.c47
-rw-r--r--net/ipv4/icmp.c20
-rw-r--r--net/ipv4/igmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c29
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c6
-rw-r--r--net/ipv4/ip_forward.c4
-rw-r--r--net/ipv4/ip_fragment.c9
-rw-r--r--net/ipv4/ip_gre.c102
-rw-r--r--net/ipv4/ip_input.c13
-rw-r--r--net/ipv4/ip_options.c32
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c19
-rw-r--r--net/ipv4/ipconfig.c15
-rw-r--r--net/ipv4/ipip.c57
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c12
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c7
-rw-r--r--net/ipv4/netfilter/ip_queue.c639
-rw-r--r--net/ipv4/netfilter/ip_tables.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c26
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c8
-rw-r--r--net/ipv4/ping.c10
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c152
-rw-r--r--net/ipv4/sysctl_net_ipv4.c26
-rw-r--r--net/ipv4/tcp.c273
-rw-r--r--net/ipv4/tcp_input.c461
-rw-r--r--net/ipv4/tcp_ipv4.c90
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_output.c76
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/xfrm4_policy.c6
49 files changed, 1088 insertions, 1293 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index d183262943d9..20f1cb5c8aba 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -262,8 +262,8 @@ config ARPD
262 bool "IP: ARP daemon support" 262 bool "IP: ARP daemon support"
263 ---help--- 263 ---help---
264 The kernel maintains an internal cache which maps IP addresses to 264 The kernel maintains an internal cache which maps IP addresses to
265 hardware addresses on the local network, so that Ethernet/Token Ring/ 265 hardware addresses on the local network, so that Ethernet
266 etc. frames are sent to the proper address on the physical networking 266 frames are sent to the proper address on the physical networking
267 layer. Normally, kernel uses the ARP protocol to resolve these 267 layer. Normally, kernel uses the ARP protocol to resolve these
268 mappings. 268 mappings.
269 269
@@ -312,7 +312,7 @@ config SYN_COOKIES
312 312
313config INET_AH 313config INET_AH
314 tristate "IP: AH transformation" 314 tristate "IP: AH transformation"
315 select XFRM 315 select XFRM_ALGO
316 select CRYPTO 316 select CRYPTO
317 select CRYPTO_HMAC 317 select CRYPTO_HMAC
318 select CRYPTO_MD5 318 select CRYPTO_MD5
@@ -324,7 +324,7 @@ config INET_AH
324 324
325config INET_ESP 325config INET_ESP
326 tristate "IP: ESP transformation" 326 tristate "IP: ESP transformation"
327 select XFRM 327 select XFRM_ALGO
328 select CRYPTO 328 select CRYPTO
329 select CRYPTO_AUTHENC 329 select CRYPTO_AUTHENC
330 select CRYPTO_HMAC 330 select CRYPTO_HMAC
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 10e3751466b5..c8f7aee587d1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -350,7 +350,7 @@ lookup_protocol:
350 err = 0; 350 err = 0;
351 sk->sk_no_check = answer_no_check; 351 sk->sk_no_check = answer_no_check;
352 if (INET_PROTOSW_REUSE & answer_flags) 352 if (INET_PROTOSW_REUSE & answer_flags)
353 sk->sk_reuse = 1; 353 sk->sk_reuse = SK_CAN_REUSE;
354 354
355 inet = inet_sk(sk); 355 inet = inet_sk(sk);
356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -541,7 +541,7 @@ out:
541} 541}
542EXPORT_SYMBOL(inet_bind); 542EXPORT_SYMBOL(inet_bind);
543 543
544int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 544int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
545 int addr_len, int flags) 545 int addr_len, int flags)
546{ 546{
547 struct sock *sk = sock->sk; 547 struct sock *sk = sock->sk;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index fd508b526014..e8f2617ecd47 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
77 77
78static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) 78static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
79{ 79{
80 unsigned char * optptr = (unsigned char*)(iph+1); 80 unsigned char *optptr = (unsigned char *)(iph+1);
81 int l = iph->ihl*4 - sizeof(struct iphdr); 81 int l = iph->ihl*4 - sizeof(struct iphdr);
82 int optlen; 82 int optlen;
83 83
@@ -406,8 +406,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
406 ah->spi, IPPROTO_AH, AF_INET); 406 ah->spi, IPPROTO_AH, AF_INET);
407 if (!x) 407 if (!x)
408 return; 408 return;
409 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 409 pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
410 ntohl(ah->spi), ntohl(iph->daddr)); 410 ntohl(ah->spi), ntohl(iph->daddr));
411 xfrm_state_put(x); 411 xfrm_state_put(x);
412} 412}
413 413
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 18d9b81ecb1a..cda37be02f8d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -73,6 +73,8 @@
73 * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. 73 * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support.
74 */ 74 */
75 75
76#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
77
76#include <linux/module.h> 78#include <linux/module.h>
77#include <linux/types.h> 79#include <linux/types.h>
78#include <linux/string.h> 80#include <linux/string.h>
@@ -89,7 +91,6 @@
89#include <linux/etherdevice.h> 91#include <linux/etherdevice.h>
90#include <linux/fddidevice.h> 92#include <linux/fddidevice.h>
91#include <linux/if_arp.h> 93#include <linux/if_arp.h>
92#include <linux/trdevice.h>
93#include <linux/skbuff.h> 94#include <linux/skbuff.h>
94#include <linux/proc_fs.h> 95#include <linux/proc_fs.h>
95#include <linux/seq_file.h> 96#include <linux/seq_file.h>
@@ -193,9 +194,6 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
193 case ARPHRD_IEEE802: 194 case ARPHRD_IEEE802:
194 ip_eth_mc_map(addr, haddr); 195 ip_eth_mc_map(addr, haddr);
195 return 0; 196 return 0;
196 case ARPHRD_IEEE802_TR:
197 ip_tr_mc_map(addr, haddr);
198 return 0;
199 case ARPHRD_INFINIBAND: 197 case ARPHRD_INFINIBAND:
200 ip_ib_mc_map(addr, dev->broadcast, haddr); 198 ip_ib_mc_map(addr, dev->broadcast, haddr);
201 return 0; 199 return 0;
@@ -364,8 +362,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
364 probes -= neigh->parms->ucast_probes; 362 probes -= neigh->parms->ucast_probes;
365 if (probes < 0) { 363 if (probes < 0) {
366 if (!(neigh->nud_state & NUD_VALID)) 364 if (!(neigh->nud_state & NUD_VALID))
367 printk(KERN_DEBUG 365 pr_debug("trying to ucast probe in NUD_INVALID\n");
368 "trying to ucast probe in NUD_INVALID\n");
369 dst_ha = neigh->ha; 366 dst_ha = neigh->ha;
370 read_lock_bh(&neigh->lock); 367 read_lock_bh(&neigh->lock);
371 } else { 368 } else {
@@ -452,7 +449,7 @@ static int arp_set_predefined(int addr_hint, unsigned char *haddr,
452{ 449{
453 switch (addr_hint) { 450 switch (addr_hint) {
454 case RTN_LOCAL: 451 case RTN_LOCAL:
455 printk(KERN_DEBUG "ARP: arp called for own IP address\n"); 452 pr_debug("arp called for own IP address\n");
456 memcpy(haddr, dev->dev_addr, dev->addr_len); 453 memcpy(haddr, dev->dev_addr, dev->addr_len);
457 return 1; 454 return 1;
458 case RTN_MULTICAST: 455 case RTN_MULTICAST:
@@ -473,7 +470,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
473 struct neighbour *n; 470 struct neighbour *n;
474 471
475 if (!skb_dst(skb)) { 472 if (!skb_dst(skb)) {
476 printk(KERN_DEBUG "arp_find is called with dst==NULL\n"); 473 pr_debug("arp_find is called with dst==NULL\n");
477 kfree_skb(skb); 474 kfree_skb(skb);
478 return 1; 475 return 1;
479 } 476 }
@@ -648,12 +645,6 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
648 arp->ar_pro = htons(ETH_P_IP); 645 arp->ar_pro = htons(ETH_P_IP);
649 break; 646 break;
650#endif 647#endif
651#if IS_ENABLED(CONFIG_TR)
652 case ARPHRD_IEEE802_TR:
653 arp->ar_hrd = htons(ARPHRD_IEEE802);
654 arp->ar_pro = htons(ETH_P_IP);
655 break;
656#endif
657 } 648 }
658 649
659 arp->ar_hln = dev->addr_len; 650 arp->ar_hln = dev->addr_len;
@@ -751,11 +742,10 @@ static int arp_process(struct sk_buff *skb)
751 goto out; 742 goto out;
752 break; 743 break;
753 case ARPHRD_ETHER: 744 case ARPHRD_ETHER:
754 case ARPHRD_IEEE802_TR:
755 case ARPHRD_FDDI: 745 case ARPHRD_FDDI:
756 case ARPHRD_IEEE802: 746 case ARPHRD_IEEE802:
757 /* 747 /*
758 * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802 748 * ETHERNET, and Fibre Channel (which are IEEE 802
759 * devices, according to RFC 2625) devices will accept ARP 749 * devices, according to RFC 2625) devices will accept ARP
760 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2). 750 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
761 * This is the case also of FDDI, where the RFC 1390 says that 751 * This is the case also of FDDI, where the RFC 1390 says that
@@ -1059,7 +1049,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1059 neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); 1049 neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
1060 err = PTR_ERR(neigh); 1050 err = PTR_ERR(neigh);
1061 if (!IS_ERR(neigh)) { 1051 if (!IS_ERR(neigh)) {
1062 unsigned state = NUD_STALE; 1052 unsigned int state = NUD_STALE;
1063 if (r->arp_flags & ATF_PERM) 1053 if (r->arp_flags & ATF_PERM)
1064 state = NUD_PERMANENT; 1054 state = NUD_PERMANENT;
1065 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? 1055 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
@@ -1071,7 +1061,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1071 return err; 1061 return err;
1072} 1062}
1073 1063
1074static unsigned arp_state_to_flags(struct neighbour *neigh) 1064static unsigned int arp_state_to_flags(struct neighbour *neigh)
1075{ 1065{
1076 if (neigh->nud_state&NUD_PERMANENT) 1066 if (neigh->nud_state&NUD_PERMANENT)
1077 return ATF_PERM | ATF_COM; 1067 return ATF_PERM | ATF_COM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6e447ff94dfa..10e15a144e95 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -217,8 +217,7 @@ void in_dev_finish_destroy(struct in_device *idev)
217 WARN_ON(idev->ifa_list); 217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list); 218 WARN_ON(idev->mc_list);
219#ifdef NET_REFCNT_DEBUG 219#ifdef NET_REFCNT_DEBUG
220 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", 220 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 idev, dev ? dev->name : "NIL");
222#endif 221#endif
223 dev_put(dev); 222 dev_put(dev);
224 if (!idev->dead) 223 if (!idev->dead)
@@ -1125,7 +1124,7 @@ skip:
1125 } 1124 }
1126} 1125}
1127 1126
1128static inline bool inetdev_valid_mtu(unsigned mtu) 1127static inline bool inetdev_valid_mtu(unsigned int mtu)
1129{ 1128{
1130 return mtu >= 68; 1129 return mtu >= 68;
1131} 1130}
@@ -1174,7 +1173,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1174 1173
1175 switch (event) { 1174 switch (event) {
1176 case NETDEV_REGISTER: 1175 case NETDEV_REGISTER:
1177 printk(KERN_DEBUG "inetdev_event: bug\n"); 1176 pr_debug("%s: bug\n", __func__);
1178 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1177 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179 break; 1178 break;
1180 case NETDEV_UP: 1179 case NETDEV_UP:
@@ -1266,17 +1265,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1266 ifm->ifa_scope = ifa->ifa_scope; 1265 ifm->ifa_scope = ifa->ifa_scope;
1267 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1266 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268 1267
1269 if (ifa->ifa_address) 1268 if ((ifa->ifa_address &&
1270 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); 1269 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1271 1270 (ifa->ifa_local &&
1272 if (ifa->ifa_local) 1271 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1273 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); 1272 (ifa->ifa_broadcast &&
1274 1273 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1275 if (ifa->ifa_broadcast) 1274 (ifa->ifa_label[0] &&
1276 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1275 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1277 1276 goto nla_put_failure;
1278 if (ifa->ifa_label[0])
1279 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280 1277
1281 return nlmsg_end(skb, nlh); 1278 return nlmsg_end(skb, nlh);
1282 1279
@@ -1587,7 +1584,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1587static struct devinet_sysctl_table { 1584static struct devinet_sysctl_table {
1588 struct ctl_table_header *sysctl_header; 1585 struct ctl_table_header *sysctl_header;
1589 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1586 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1590 char *dev_name;
1591} devinet_sysctl = { 1587} devinet_sysctl = {
1592 .devinet_vars = { 1588 .devinet_vars = {
1593 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1589 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1629,16 +1625,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
1629{ 1625{
1630 int i; 1626 int i;
1631 struct devinet_sysctl_table *t; 1627 struct devinet_sysctl_table *t;
1632 1628 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1633#define DEVINET_CTL_PATH_DEV 3
1634
1635 struct ctl_path devinet_ctl_path[] = {
1636 { .procname = "net", },
1637 { .procname = "ipv4", },
1638 { .procname = "conf", },
1639 { /* to be set */ },
1640 { },
1641 };
1642 1629
1643 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1630 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1644 if (!t) 1631 if (!t)
@@ -1650,27 +1637,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
1650 t->devinet_vars[i].extra2 = net; 1637 t->devinet_vars[i].extra2 = net;
1651 } 1638 }
1652 1639
1653 /* 1640 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1654 * Make a copy of dev_name, because '.procname' is regarded as const
1655 * by sysctl and we wouldn't want anyone to change it under our feet
1656 * (see SIOCSIFNAME).
1657 */
1658 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1659 if (!t->dev_name)
1660 goto free;
1661
1662 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1663 1641
1664 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, 1642 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1665 t->devinet_vars);
1666 if (!t->sysctl_header) 1643 if (!t->sysctl_header)
1667 goto free_procname; 1644 goto free;
1668 1645
1669 p->sysctl = t; 1646 p->sysctl = t;
1670 return 0; 1647 return 0;
1671 1648
1672free_procname:
1673 kfree(t->dev_name);
1674free: 1649free:
1675 kfree(t); 1650 kfree(t);
1676out: 1651out:
@@ -1686,7 +1661,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1686 1661
1687 cnf->sysctl = NULL; 1662 cnf->sysctl = NULL;
1688 unregister_net_sysctl_table(t->sysctl_header); 1663 unregister_net_sysctl_table(t->sysctl_header);
1689 kfree(t->dev_name);
1690 kfree(t); 1664 kfree(t);
1691} 1665}
1692 1666
@@ -1716,12 +1690,6 @@ static struct ctl_table ctl_forward_entry[] = {
1716 }, 1690 },
1717 { }, 1691 { },
1718}; 1692};
1719
1720static __net_initdata struct ctl_path net_ipv4_path[] = {
1721 { .procname = "net", },
1722 { .procname = "ipv4", },
1723 { },
1724};
1725#endif 1693#endif
1726 1694
1727static __net_init int devinet_init_net(struct net *net) 1695static __net_init int devinet_init_net(struct net *net)
@@ -1767,7 +1735,7 @@ static __net_init int devinet_init_net(struct net *net)
1767 goto err_reg_dflt; 1735 goto err_reg_dflt;
1768 1736
1769 err = -ENOMEM; 1737 err = -ENOMEM;
1770 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); 1738 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1771 if (forw_hdr == NULL) 1739 if (forw_hdr == NULL)
1772 goto err_reg_ctl; 1740 goto err_reg_ctl;
1773 net->ipv4.forw_hdr = forw_hdr; 1741 net->ipv4.forw_hdr = forw_hdr;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cbe3a68507cf..3854411fa37c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -136,13 +136,13 @@ static void fib_flush(struct net *net)
136 * Find address type as if only "dev" was present in the system. If 136 * Find address type as if only "dev" was present in the system. If
137 * on_dev is NULL then all interfaces are taken into consideration. 137 * on_dev is NULL then all interfaces are taken into consideration.
138 */ 138 */
139static inline unsigned __inet_dev_addr_type(struct net *net, 139static inline unsigned int __inet_dev_addr_type(struct net *net,
140 const struct net_device *dev, 140 const struct net_device *dev,
141 __be32 addr) 141 __be32 addr)
142{ 142{
143 struct flowi4 fl4 = { .daddr = addr }; 143 struct flowi4 fl4 = { .daddr = addr };
144 struct fib_result res; 144 struct fib_result res;
145 unsigned ret = RTN_BROADCAST; 145 unsigned int ret = RTN_BROADCAST;
146 struct fib_table *local_table; 146 struct fib_table *local_table;
147 147
148 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 148 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
@@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
740#define BRD_OK 2 740#define BRD_OK 2
741#define BRD0_OK 4 741#define BRD0_OK 4
742#define BRD1_OK 8 742#define BRD1_OK 8
743 unsigned ok = 0; 743 unsigned int ok = 0;
744 int subnet = 0; /* Primary network */ 744 int subnet = 0; /* Primary network */
745 int gone = 1; /* Address is missing */ 745 int gone = 1; /* Address is missing */
746 int same_prefsrc = 0; /* Another primary with same IP */ 746 int same_prefsrc = 0; /* Another primary with same IP */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 799fc790b3cf..2d043f71ef70 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
221 frh->src_len = rule4->src_len; 221 frh->src_len = rule4->src_len;
222 frh->tos = rule4->tos; 222 frh->tos = rule4->tos;
223 223
224 if (rule4->dst_len) 224 if ((rule4->dst_len &&
225 NLA_PUT_BE32(skb, FRA_DST, rule4->dst); 225 nla_put_be32(skb, FRA_DST, rule4->dst)) ||
226 226 (rule4->src_len &&
227 if (rule4->src_len) 227 nla_put_be32(skb, FRA_SRC, rule4->src)))
228 NLA_PUT_BE32(skb, FRA_SRC, rule4->src); 228 goto nla_put_failure;
229
230#ifdef CONFIG_IP_ROUTE_CLASSID 229#ifdef CONFIG_IP_ROUTE_CLASSID
231 if (rule4->tclassid) 230 if (rule4->tclassid &&
232 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); 231 nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
232 goto nla_put_failure;
233#endif 233#endif
234 return 0; 234 return 0;
235 235
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5063fa38ac7b..a8bdf7405433 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
931 rtm->rtm_table = tb_id; 931 rtm->rtm_table = tb_id;
932 else 932 else
933 rtm->rtm_table = RT_TABLE_COMPAT; 933 rtm->rtm_table = RT_TABLE_COMPAT;
934 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 934 if (nla_put_u32(skb, RTA_TABLE, tb_id))
935 goto nla_put_failure;
935 rtm->rtm_type = type; 936 rtm->rtm_type = type;
936 rtm->rtm_flags = fi->fib_flags; 937 rtm->rtm_flags = fi->fib_flags;
937 rtm->rtm_scope = fi->fib_scope; 938 rtm->rtm_scope = fi->fib_scope;
938 rtm->rtm_protocol = fi->fib_protocol; 939 rtm->rtm_protocol = fi->fib_protocol;
939 940
940 if (rtm->rtm_dst_len) 941 if (rtm->rtm_dst_len &&
941 NLA_PUT_BE32(skb, RTA_DST, dst); 942 nla_put_be32(skb, RTA_DST, dst))
942 943 goto nla_put_failure;
943 if (fi->fib_priority) 944 if (fi->fib_priority &&
944 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 945 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
945 946 goto nla_put_failure;
946 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 947 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
947 goto nla_put_failure; 948 goto nla_put_failure;
948 949
949 if (fi->fib_prefsrc) 950 if (fi->fib_prefsrc &&
950 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 951 nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc))
951 952 goto nla_put_failure;
952 if (fi->fib_nhs == 1) { 953 if (fi->fib_nhs == 1) {
953 if (fi->fib_nh->nh_gw) 954 if (fi->fib_nh->nh_gw &&
954 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 955 nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
955 956 goto nla_put_failure;
956 if (fi->fib_nh->nh_oif) 957 if (fi->fib_nh->nh_oif &&
957 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 958 nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
959 goto nla_put_failure;
958#ifdef CONFIG_IP_ROUTE_CLASSID 960#ifdef CONFIG_IP_ROUTE_CLASSID
959 if (fi->fib_nh[0].nh_tclassid) 961 if (fi->fib_nh[0].nh_tclassid &&
960 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 962 nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
963 goto nla_put_failure;
961#endif 964#endif
962 } 965 }
963#ifdef CONFIG_IP_ROUTE_MULTIPATH 966#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
978 rtnh->rtnh_hops = nh->nh_weight - 1; 981 rtnh->rtnh_hops = nh->nh_weight - 1;
979 rtnh->rtnh_ifindex = nh->nh_oif; 982 rtnh->rtnh_ifindex = nh->nh_oif;
980 983
981 if (nh->nh_gw) 984 if (nh->nh_gw &&
982 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 985 nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
986 goto nla_put_failure;
983#ifdef CONFIG_IP_ROUTE_CLASSID 987#ifdef CONFIG_IP_ROUTE_CLASSID
984 if (nh->nh_tclassid) 988 if (nh->nh_tclassid &&
985 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 989 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
990 goto nla_put_failure;
986#endif 991#endif
987 /* length of rtnetlink header + attributes */ 992 /* length of rtnetlink header + attributes */
988 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 993 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2cb2bf845641..c75efbdc71cb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -713,11 +713,10 @@ static void icmp_unreach(struct sk_buff *skb)
713 713
714 if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && 714 if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
715 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { 715 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
716 if (net_ratelimit()) 716 net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
717 pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", 717 &ip_hdr(skb)->saddr,
718 &ip_hdr(skb)->saddr, 718 icmph->type, icmph->code,
719 icmph->type, icmph->code, 719 &iph->daddr, skb->dev->name);
720 &iph->daddr, skb->dev->name);
721 goto out; 720 goto out;
722 } 721 }
723 722
@@ -906,8 +905,7 @@ out_err:
906static void icmp_address(struct sk_buff *skb) 905static void icmp_address(struct sk_buff *skb)
907{ 906{
908#if 0 907#if 0
909 if (net_ratelimit()) 908 net_dbg_ratelimited("a guy asks for address mask. Who is it?\n");
910 printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
911#endif 909#endif
912} 910}
913 911
@@ -943,10 +941,10 @@ static void icmp_address_reply(struct sk_buff *skb)
943 inet_ifa_match(ip_hdr(skb)->saddr, ifa)) 941 inet_ifa_match(ip_hdr(skb)->saddr, ifa))
944 break; 942 break;
945 } 943 }
946 if (!ifa && net_ratelimit()) { 944 if (!ifa)
947 pr_info("Wrong address mask %pI4 from %s/%pI4\n", 945 net_info_ratelimited("Wrong address mask %pI4 from %s/%pI4\n",
948 mp, dev->name, &ip_hdr(skb)->saddr); 946 mp,
949 } 947 dev->name, &ip_hdr(skb)->saddr);
950 } 948 }
951} 949}
952 950
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5dfecfd7d5e9..6699f23e6f55 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
344 pip->protocol = IPPROTO_IGMP; 344 pip->protocol = IPPROTO_IGMP;
345 pip->tot_len = 0; /* filled in later */ 345 pip->tot_len = 0; /* filled in later */
346 ip_select_ident(pip, &rt->dst, NULL); 346 ip_select_ident(pip, &rt->dst, NULL);
347 ((u8*)&pip[1])[0] = IPOPT_RA; 347 ((u8 *)&pip[1])[0] = IPOPT_RA;
348 ((u8*)&pip[1])[1] = 4; 348 ((u8 *)&pip[1])[1] = 4;
349 ((u8*)&pip[1])[2] = 0; 349 ((u8 *)&pip[1])[2] = 0;
350 ((u8*)&pip[1])[3] = 0; 350 ((u8 *)&pip[1])[3] = 0;
351 351
352 skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; 352 skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
353 skb_put(skb, sizeof(*pig)); 353 skb_put(skb, sizeof(*pig));
@@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
688 iph->saddr = fl4.saddr; 688 iph->saddr = fl4.saddr;
689 iph->protocol = IPPROTO_IGMP; 689 iph->protocol = IPPROTO_IGMP;
690 ip_select_ident(iph, &rt->dst, NULL); 690 ip_select_ident(iph, &rt->dst, NULL);
691 ((u8*)&iph[1])[0] = IPOPT_RA; 691 ((u8 *)&iph[1])[0] = IPOPT_RA;
692 ((u8*)&iph[1])[1] = 4; 692 ((u8 *)&iph[1])[1] = 4;
693 ((u8*)&iph[1])[2] = 0; 693 ((u8 *)&iph[1])[2] = 0;
694 ((u8*)&iph[1])[3] = 0; 694 ((u8 *)&iph[1])[3] = 0;
695 695
696 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 696 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
697 ih->type = type; 697 ih->type = type;
@@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
774 if (psf->sf_count[MCAST_INCLUDE] || 774 if (psf->sf_count[MCAST_INCLUDE] ||
775 pmc->sfcount[MCAST_EXCLUDE] != 775 pmc->sfcount[MCAST_EXCLUDE] !=
776 psf->sf_count[MCAST_EXCLUDE]) 776 psf->sf_count[MCAST_EXCLUDE])
777 continue; 777 break;
778 if (srcs[i] == psf->sf_inaddr) { 778 if (srcs[i] == psf->sf_inaddr) {
779 scount++; 779 scount++;
780 break; 780 break;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66cefd7d3..95e61596e605 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports);
42 42
43void inet_get_local_port_range(int *low, int *high) 43void inet_get_local_port_range(int *low, int *high)
44{ 44{
45 unsigned seq; 45 unsigned int seq;
46
46 do { 47 do {
47 seq = read_seqbegin(&sysctl_local_ports.lock); 48 seq = read_seqbegin(&sysctl_local_ports.lock);
48 49
@@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high)
53EXPORT_SYMBOL(inet_get_local_port_range); 54EXPORT_SYMBOL(inet_get_local_port_range);
54 55
55int inet_csk_bind_conflict(const struct sock *sk, 56int inet_csk_bind_conflict(const struct sock *sk,
56 const struct inet_bind_bucket *tb) 57 const struct inet_bind_bucket *tb, bool relax)
57{ 58{
58 struct sock *sk2; 59 struct sock *sk2;
59 struct hlist_node *node; 60 struct hlist_node *node;
@@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk,
79 sk2_rcv_saddr == sk_rcv_saddr(sk)) 80 sk2_rcv_saddr == sk_rcv_saddr(sk))
80 break; 81 break;
81 } 82 }
83 if (!relax && reuse && sk2->sk_reuse &&
84 sk2->sk_state != TCP_LISTEN) {
85 const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
86
87 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
88 sk2_rcv_saddr == sk_rcv_saddr(sk))
89 break;
90 }
82 } 91 }
83 } 92 }
84 return node != NULL; 93 return node != NULL;
@@ -122,12 +131,13 @@ again:
122 (tb->num_owners < smallest_size || smallest_size == -1)) { 131 (tb->num_owners < smallest_size || smallest_size == -1)) {
123 smallest_size = tb->num_owners; 132 smallest_size = tb->num_owners;
124 smallest_rover = rover; 133 smallest_rover = rover;
125 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { 134 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
135 !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
126 snum = smallest_rover; 136 snum = smallest_rover;
127 goto tb_found; 137 goto tb_found;
128 } 138 }
129 } 139 }
130 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { 140 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
131 snum = rover; 141 snum = rover;
132 goto tb_found; 142 goto tb_found;
133 } 143 }
@@ -172,18 +182,22 @@ have_snum:
172 goto tb_not_found; 182 goto tb_not_found;
173tb_found: 183tb_found:
174 if (!hlist_empty(&tb->owners)) { 184 if (!hlist_empty(&tb->owners)) {
185 if (sk->sk_reuse == SK_FORCE_REUSE)
186 goto success;
187
175 if (tb->fastreuse > 0 && 188 if (tb->fastreuse > 0 &&
176 sk->sk_reuse && sk->sk_state != TCP_LISTEN && 189 sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
177 smallest_size == -1) { 190 smallest_size == -1) {
178 goto success; 191 goto success;
179 } else { 192 } else {
180 ret = 1; 193 ret = 1;
181 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { 194 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
182 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && 195 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
183 smallest_size != -1 && --attempts >= 0) { 196 smallest_size != -1 && --attempts >= 0) {
184 spin_unlock(&head->lock); 197 spin_unlock(&head->lock);
185 goto again; 198 goto again;
186 } 199 }
200
187 goto fail_unlock; 201 goto fail_unlock;
188 } 202 }
189 } 203 }
@@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
514 528
515 /* Normally all the openreqs are young and become mature 529 /* Normally all the openreqs are young and become mature
516 * (i.e. converted to established socket) for first timeout. 530 * (i.e. converted to established socket) for first timeout.
517 * If synack was not acknowledged for 3 seconds, it means 531 * If synack was not acknowledged for 1 second, it means
518 * one of the following things: synack was lost, ack was lost, 532 * one of the following things: synack was lost, ack was lost,
519 * rtt is high or nobody planned to ack (i.e. synflood). 533 * rtt is high or nobody planned to ack (i.e. synflood).
520 * When server is a bit loaded, queue is populated with old 534 * When server is a bit loaded, queue is populated with old
@@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
555 syn_ack_recalc(req, thresh, max_retries, 569 syn_ack_recalc(req, thresh, max_retries,
556 queue->rskq_defer_accept, 570 queue->rskq_defer_accept,
557 &expire, &resend); 571 &expire, &resend);
558 if (req->rsk_ops->syn_ack_timeout) 572 req->rsk_ops->syn_ack_timeout(parent, req);
559 req->rsk_ops->syn_ack_timeout(parent, req);
560 if (!expire && 573 if (!expire &&
561 (!resend || 574 (!resend ||
562 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 575 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8f8db724bfaf..46d1e7199a8c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -999,12 +999,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
999 return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); 999 return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
1000} 1000}
1001 1001
1002static struct sock_diag_handler inet_diag_handler = { 1002static const struct sock_diag_handler inet_diag_handler = {
1003 .family = AF_INET, 1003 .family = AF_INET,
1004 .dump = inet_diag_handler_dump, 1004 .dump = inet_diag_handler_dump,
1005}; 1005};
1006 1006
1007static struct sock_diag_handler inet6_diag_handler = { 1007static const struct sock_diag_handler inet6_diag_handler = {
1008 .family = AF_INET6, 1008 .family = AF_INET6,
1009 .dump = inet_diag_handler_dump, 1009 .dump = inet_diag_handler_dump,
1010}; 1010};
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 984ec656b03b..7880af970208 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -217,7 +217,7 @@ begin:
217} 217}
218EXPORT_SYMBOL_GPL(__inet_lookup_listener); 218EXPORT_SYMBOL_GPL(__inet_lookup_listener);
219 219
220struct sock * __inet_lookup_established(struct net *net, 220struct sock *__inet_lookup_established(struct net *net,
221 struct inet_hashinfo *hashinfo, 221 struct inet_hashinfo *hashinfo,
222 const __be32 saddr, const __be16 sport, 222 const __be32 saddr, const __be16 sport,
223 const __be32 daddr, const u16 hnum, 223 const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 89168c6351ff..2784db3155fb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -89,8 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
89 89
90#ifdef SOCK_REFCNT_DEBUG 90#ifdef SOCK_REFCNT_DEBUG
91 if (atomic_read(&tw->tw_refcnt) != 1) { 91 if (atomic_read(&tw->tw_refcnt) != 1) {
92 printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", 92 pr_debug("%s timewait_sock %p refcnt=%d\n",
93 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 93 tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
94 } 94 }
95#endif 95#endif
96 while (refcnt) { 96 while (refcnt) {
@@ -263,7 +263,7 @@ rescan:
263void inet_twdr_hangman(unsigned long data) 263void inet_twdr_hangman(unsigned long data)
264{ 264{
265 struct inet_timewait_death_row *twdr; 265 struct inet_timewait_death_row *twdr;
266 int unsigned need_timer; 266 unsigned int need_timer;
267 267
268 twdr = (struct inet_timewait_death_row *)data; 268 twdr = (struct inet_timewait_death_row *)data;
269 spin_lock(&twdr->death_lock); 269 spin_lock(&twdr->death_lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 29a07b6c7168..e5c44fc586ab 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -41,7 +41,7 @@
41 41
42static int ip_forward_finish(struct sk_buff *skb) 42static int ip_forward_finish(struct sk_buff *skb)
43{ 43{
44 struct ip_options * opt = &(IPCB(skb)->opt); 44 struct ip_options *opt = &(IPCB(skb)->opt);
45 45
46 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 46 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
47 47
@@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb)
55{ 55{
56 struct iphdr *iph; /* Our header */ 56 struct iphdr *iph; /* Our header */
57 struct rtable *rt; /* Route we use */ 57 struct rtable *rt; /* Route we use */
58 struct ip_options * opt = &(IPCB(skb)->opt); 58 struct ip_options *opt = &(IPCB(skb)->opt);
59 59
60 if (skb_warn_if_lro(skb)) 60 if (skb_warn_if_lro(skb))
61 goto drop; 61 goto drop;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3727e234c884..9f9bd139335f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -569,7 +569,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
569 skb_morph(head, qp->q.fragments); 569 skb_morph(head, qp->q.fragments);
570 head->next = qp->q.fragments->next; 570 head->next = qp->q.fragments->next;
571 571
572 kfree_skb(qp->q.fragments); 572 consume_skb(qp->q.fragments);
573 qp->q.fragments = head; 573 qp->q.fragments = head;
574 } 574 }
575 575
@@ -644,8 +644,7 @@ out_nomem:
644 err = -ENOMEM; 644 err = -ENOMEM;
645 goto out_fail; 645 goto out_fail;
646out_oversize: 646out_oversize:
647 if (net_ratelimit()) 647 net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
648 pr_info("Oversized IP packet from %pI4\n", &qp->saddr);
649out_fail: 648out_fail:
650 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 649 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
651 return err; 650 return err;
@@ -782,7 +781,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
782 table[2].data = &net->ipv4.frags.timeout; 781 table[2].data = &net->ipv4.frags.timeout;
783 } 782 }
784 783
785 hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); 784 hdr = register_net_sysctl(net, "net/ipv4", table);
786 if (hdr == NULL) 785 if (hdr == NULL)
787 goto err_reg; 786 goto err_reg;
788 787
@@ -807,7 +806,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
807 806
808static void ip4_frags_ctl_register(void) 807static void ip4_frags_ctl_register(void)
809{ 808{
810 register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); 809 register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
811} 810}
812#else 811#else
813static inline int ip4_frags_ns_ctl_register(struct net *net) 812static inline int ip4_frags_ns_ctl_register(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b57532d4742c..f49047b79609 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -169,37 +169,56 @@ struct ipgre_net {
169 169
170/* often modified stats are per cpu, other are shared (netdev->stats) */ 170/* often modified stats are per cpu, other are shared (netdev->stats) */
171struct pcpu_tstats { 171struct pcpu_tstats {
172 unsigned long rx_packets; 172 u64 rx_packets;
173 unsigned long rx_bytes; 173 u64 rx_bytes;
174 unsigned long tx_packets; 174 u64 tx_packets;
175 unsigned long tx_bytes; 175 u64 tx_bytes;
176} __attribute__((aligned(4*sizeof(unsigned long)))); 176 struct u64_stats_sync syncp;
177};
177 178
178static struct net_device_stats *ipgre_get_stats(struct net_device *dev) 179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
179{ 181{
180 struct pcpu_tstats sum = { 0 };
181 int i; 182 int i;
182 183
183 for_each_possible_cpu(i) { 184 for_each_possible_cpu(i) {
184 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
185 186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
186 sum.rx_packets += tstats->rx_packets; 187 unsigned int start;
187 sum.rx_bytes += tstats->rx_bytes; 188
188 sum.tx_packets += tstats->tx_packets; 189 do {
189 sum.tx_bytes += tstats->tx_bytes; 190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
196
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
190 } 201 }
191 dev->stats.rx_packets = sum.rx_packets; 202
192 dev->stats.rx_bytes = sum.rx_bytes; 203 tot->multicast = dev->stats.multicast;
193 dev->stats.tx_packets = sum.tx_packets; 204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 dev->stats.tx_bytes = sum.tx_bytes; 205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 return &dev->stats; 206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
213
214 return tot;
196} 215}
197 216
198/* Given src, dst and key, find appropriate for input tunnel. */ 217/* Given src, dst and key, find appropriate for input tunnel. */
199 218
200static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 219static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
201 __be32 remote, __be32 local, 220 __be32 remote, __be32 local,
202 __be32 key, __be16 gre_proto) 221 __be32 key, __be16 gre_proto)
203{ 222{
204 struct net *net = dev_net(dev); 223 struct net *net = dev_net(dev);
205 int link = dev->ifindex; 224 int link = dev->ifindex;
@@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
464 */ 483 */
465 484
466 const struct iphdr *iph = (const struct iphdr *)skb->data; 485 const struct iphdr *iph = (const struct iphdr *)skb->data;
467 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 486 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
468 int grehlen = (iph->ihl<<2) + 4; 487 int grehlen = (iph->ihl<<2) + 4;
469 const int type = icmp_hdr(skb)->type; 488 const int type = icmp_hdr(skb)->type;
470 const int code = icmp_hdr(skb)->code; 489 const int code = icmp_hdr(skb)->code;
@@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb)
574 593
575 iph = ip_hdr(skb); 594 iph = ip_hdr(skb);
576 h = skb->data; 595 h = skb->data;
577 flags = *(__be16*)h; 596 flags = *(__be16 *)h;
578 597
579 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 598 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
580 /* - Version must be 0. 599 /* - Version must be 0.
@@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb)
598 offset += 4; 617 offset += 4;
599 } 618 }
600 if (flags&GRE_KEY) { 619 if (flags&GRE_KEY) {
601 key = *(__be32*)(h + offset); 620 key = *(__be32 *)(h + offset);
602 offset += 4; 621 offset += 4;
603 } 622 }
604 if (flags&GRE_SEQ) { 623 if (flags&GRE_SEQ) {
605 seqno = ntohl(*(__be32*)(h + offset)); 624 seqno = ntohl(*(__be32 *)(h + offset));
606 offset += 4; 625 offset += 4;
607 } 626 }
608 } 627 }
@@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb)
672 } 691 }
673 692
674 tstats = this_cpu_ptr(tunnel->dev->tstats); 693 tstats = this_cpu_ptr(tunnel->dev->tstats);
694 u64_stats_update_begin(&tstats->syncp);
675 tstats->rx_packets++; 695 tstats->rx_packets++;
676 tstats->rx_bytes += skb->len; 696 tstats->rx_bytes += skb->len;
697 u64_stats_update_end(&tstats->syncp);
677 698
678 __skb_tunnel_rx(skb, tunnel->dev); 699 __skb_tunnel_rx(skb, tunnel->dev);
679 700
@@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
900 htons(ETH_P_TEB) : skb->protocol; 921 htons(ETH_P_TEB) : skb->protocol;
901 922
902 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 923 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
903 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 924 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
904 925
905 if (tunnel->parms.o_flags&GRE_SEQ) { 926 if (tunnel->parms.o_flags&GRE_SEQ) {
906 ++tunnel->o_seqno; 927 ++tunnel->o_seqno;
@@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
913 } 934 }
914 if (tunnel->parms.o_flags&GRE_CSUM) { 935 if (tunnel->parms.o_flags&GRE_CSUM) {
915 *ptr = 0; 936 *ptr = 0;
916 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 937 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
917 } 938 }
918 } 939 }
919 940
@@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1169{ 1190{
1170 struct ip_tunnel *t = netdev_priv(dev); 1191 struct ip_tunnel *t = netdev_priv(dev);
1171 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1192 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1172 __be16 *p = (__be16*)(iph+1); 1193 __be16 *p = (__be16 *)(iph+1);
1173 1194
1174 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1195 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1175 p[0] = t->parms.o_flags; 1196 p[0] = t->parms.o_flags;
@@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
1253 .ndo_start_xmit = ipgre_tunnel_xmit, 1274 .ndo_start_xmit = ipgre_tunnel_xmit,
1254 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1275 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1255 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1276 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1256 .ndo_get_stats = ipgre_get_stats, 1277 .ndo_get_stats64 = ipgre_get_stats64,
1257}; 1278};
1258 1279
1259static void ipgre_dev_free(struct net_device *dev) 1280static void ipgre_dev_free(struct net_device *dev)
@@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
1507 .ndo_set_mac_address = eth_mac_addr, 1528 .ndo_set_mac_address = eth_mac_addr,
1508 .ndo_validate_addr = eth_validate_addr, 1529 .ndo_validate_addr = eth_validate_addr,
1509 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1530 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1510 .ndo_get_stats = ipgre_get_stats, 1531 .ndo_get_stats64 = ipgre_get_stats64,
1511}; 1532};
1512 1533
1513static void ipgre_tap_setup(struct net_device *dev) 1534static void ipgre_tap_setup(struct net_device *dev)
@@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1654 struct ip_tunnel *t = netdev_priv(dev); 1675 struct ip_tunnel *t = netdev_priv(dev);
1655 struct ip_tunnel_parm *p = &t->parms; 1676 struct ip_tunnel_parm *p = &t->parms;
1656 1677
1657 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); 1678 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1658 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); 1679 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1659 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); 1680 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1660 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); 1681 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1661 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); 1682 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1662 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); 1683 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1663 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); 1684 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1664 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); 1685 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1665 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); 1686 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1666 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); 1687 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1667 1688 !!(p->iph.frag_off & htons(IP_DF))))
1689 goto nla_put_failure;
1668 return 0; 1690 return 0;
1669 1691
1670nla_put_failure: 1692nla_put_failure:
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 26eccc5bab1c..8590144ca330 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -210,9 +210,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
210 int ret; 210 int ret;
211 211
212 if (!net_eq(net, &init_net) && !ipprot->netns_ok) { 212 if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
213 if (net_ratelimit()) 213 net_info_ratelimited("%s: proto %d isn't netns-ready\n",
214 printk("%s: proto %d isn't netns-ready\n", 214 __func__, protocol);
215 __func__, protocol);
216 kfree_skb(skb); 215 kfree_skb(skb);
217 goto out; 216 goto out;
218 } 217 }
@@ -298,10 +297,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
298 297
299 if (in_dev) { 298 if (in_dev) {
300 if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 299 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
301 if (IN_DEV_LOG_MARTIANS(in_dev) && 300 if (IN_DEV_LOG_MARTIANS(in_dev))
302 net_ratelimit()) 301 net_info_ratelimited("source route option %pI4 -> %pI4\n",
303 pr_info("source route option %pI4 -> %pI4\n", 302 &iph->saddr,
304 &iph->saddr, &iph->daddr); 303 &iph->daddr);
305 goto drop; 304 goto drop;
306 } 305 }
307 } 306 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index a0d0d9d9b870..708b99494e23 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
210 * Simple and stupid 8), but the most efficient way. 210 * Simple and stupid 8), but the most efficient way.
211 */ 211 */
212 212
213void ip_options_fragment(struct sk_buff * skb) 213void ip_options_fragment(struct sk_buff *skb)
214{ 214{
215 unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); 215 unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
216 struct ip_options * opt = &(IPCB(skb)->opt); 216 struct ip_options *opt = &(IPCB(skb)->opt);
217 int l = opt->optlen; 217 int l = opt->optlen;
218 int optlen; 218 int optlen;
219 219
@@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb)
248 */ 248 */
249 249
250int ip_options_compile(struct net *net, 250int ip_options_compile(struct net *net,
251 struct ip_options * opt, struct sk_buff * skb) 251 struct ip_options *opt, struct sk_buff *skb)
252{ 252{
253 int l; 253 int l;
254 unsigned char * iph; 254 unsigned char *iph;
255 unsigned char * optptr; 255 unsigned char *optptr;
256 int optlen; 256 int optlen;
257 unsigned char * pp_ptr = NULL; 257 unsigned char *pp_ptr = NULL;
258 struct rtable *rt = NULL; 258 struct rtable *rt = NULL;
259 259
260 if (skb != NULL) { 260 if (skb != NULL) {
@@ -413,7 +413,7 @@ int ip_options_compile(struct net *net,
413 opt->is_changed = 1; 413 opt->is_changed = 1;
414 } 414 }
415 } else { 415 } else {
416 unsigned overflow = optptr[3]>>4; 416 unsigned int overflow = optptr[3]>>4;
417 if (overflow == 15) { 417 if (overflow == 15) {
418 pp_ptr = optptr + 3; 418 pp_ptr = optptr + 3;
419 goto error; 419 goto error;
@@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile);
473 * Undo all the changes done by ip_options_compile(). 473 * Undo all the changes done by ip_options_compile().
474 */ 474 */
475 475
476void ip_options_undo(struct ip_options * opt) 476void ip_options_undo(struct ip_options *opt)
477{ 477{
478 if (opt->srr) { 478 if (opt->srr) {
479 unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); 479 unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr);
480 memmove(optptr+7, optptr+3, optptr[1]-7); 480 memmove(optptr+7, optptr+3, optptr[1]-7);
481 memcpy(optptr+3, &opt->faddr, 4); 481 memcpy(optptr+3, &opt->faddr, 4);
482 } 482 }
483 if (opt->rr_needaddr) { 483 if (opt->rr_needaddr) {
484 unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); 484 unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr);
485 optptr[2] -= 4; 485 optptr[2] -= 4;
486 memset(&optptr[optptr[2]-1], 0, 4); 486 memset(&optptr[optptr[2]-1], 0, 4);
487 } 487 }
488 if (opt->ts) { 488 if (opt->ts) {
489 unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); 489 unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr);
490 if (opt->ts_needtime) { 490 if (opt->ts_needtime) {
491 optptr[2] -= 4; 491 optptr[2] -= 4;
492 memset(&optptr[optptr[2]-1], 0, 4); 492 memset(&optptr[optptr[2]-1], 0, 4);
@@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp,
549 549
550void ip_forward_options(struct sk_buff *skb) 550void ip_forward_options(struct sk_buff *skb)
551{ 551{
552 struct ip_options * opt = &(IPCB(skb)->opt); 552 struct ip_options *opt = &(IPCB(skb)->opt);
553 unsigned char * optptr; 553 unsigned char *optptr;
554 struct rtable *rt = skb_rtable(skb); 554 struct rtable *rt = skb_rtable(skb);
555 unsigned char *raw = skb_network_header(skb); 555 unsigned char *raw = skb_network_header(skb);
556 556
@@ -578,8 +578,10 @@ void ip_forward_options(struct sk_buff *skb)
578 ip_hdr(skb)->daddr = opt->nexthop; 578 ip_hdr(skb)->daddr = opt->nexthop;
579 ip_rt_get_source(&optptr[srrptr-1], skb, rt); 579 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
580 optptr[2] = srrptr+4; 580 optptr[2] = srrptr+4;
581 } else if (net_ratelimit()) 581 } else {
582 pr_crit("%s(): Argh! Destination lost!\n", __func__); 582 net_crit_ratelimited("%s(): Argh! Destination lost!\n",
583 __func__);
584 }
583 if (opt->ts_needaddr) { 585 if (opt->ts_needaddr) {
584 optptr = raw + opt->ts; 586 optptr = raw + opt->ts;
585 ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); 587 ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4910176d24ed..451f97c42eb4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -214,8 +214,8 @@ static inline int ip_finish_output2(struct sk_buff *skb)
214 } 214 }
215 rcu_read_unlock(); 215 rcu_read_unlock();
216 216
217 if (net_ratelimit()) 217 net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
218 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 218 __func__);
219 kfree_skb(skb); 219 kfree_skb(skb);
220 return -EINVAL; 220 return -EINVAL;
221} 221}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2fd0fba77124..0d11f234d615 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
90static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) 90static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
91{ 91{
92 unsigned char optbuf[sizeof(struct ip_options) + 40]; 92 unsigned char optbuf[sizeof(struct ip_options) + 40];
93 struct ip_options * opt = (struct ip_options *)optbuf; 93 struct ip_options *opt = (struct ip_options *)optbuf;
94 94
95 if (IPCB(skb)->opt.optlen == 0) 95 if (IPCB(skb)->opt.optlen == 0)
96 return; 96 return;
@@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
147void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) 147void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
148{ 148{
149 struct inet_sock *inet = inet_sk(skb->sk); 149 struct inet_sock *inet = inet_sk(skb->sk);
150 unsigned flags = inet->cmsg_flags; 150 unsigned int flags = inet->cmsg_flags;
151 151
152 /* Ordered by supposed usage frequency */ 152 /* Ordered by supposed usage frequency */
153 if (flags & 1) 153 if (flags & 1)
@@ -673,10 +673,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
673 break; 673 break;
674 } else { 674 } else {
675 memset(&mreq, 0, sizeof(mreq)); 675 memset(&mreq, 0, sizeof(mreq));
676 if (optlen >= sizeof(struct in_addr) && 676 if (optlen >= sizeof(struct ip_mreq)) {
677 copy_from_user(&mreq.imr_address, optval, 677 if (copy_from_user(&mreq, optval,
678 sizeof(struct in_addr))) 678 sizeof(struct ip_mreq)))
679 break; 679 break;
680 } else if (optlen >= sizeof(struct in_addr)) {
681 if (copy_from_user(&mreq.imr_address, optval,
682 sizeof(struct in_addr)))
683 break;
684 }
680 } 685 }
681 686
682 if (!mreq.imr_ifindex) { 687 if (!mreq.imr_ifindex) {
@@ -1094,7 +1099,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
1094 */ 1099 */
1095 1100
1096static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1101static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1097 char __user *optval, int __user *optlen, unsigned flags) 1102 char __user *optval, int __user *optlen, unsigned int flags)
1098{ 1103{
1099 struct inet_sock *inet = inet_sk(sk); 1104 struct inet_sock *inet = inet_sk(sk);
1100 int val; 1105 int val;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 92ac7e7363a0..430015010e57 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -808,8 +808,6 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
808 b->op = BOOTP_REQUEST; 808 b->op = BOOTP_REQUEST;
809 if (dev->type < 256) /* check for false types */ 809 if (dev->type < 256) /* check for false types */
810 b->htype = dev->type; 810 b->htype = dev->type;
811 else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */
812 b->htype = ARPHRD_IEEE802;
813 else if (dev->type == ARPHRD_FDDI) 811 else if (dev->type == ARPHRD_FDDI)
814 b->htype = ARPHRD_ETHER; 812 b->htype = ARPHRD_ETHER;
815 else { 813 else {
@@ -955,8 +953,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
955 953
956 /* Fragments are not supported */ 954 /* Fragments are not supported */
957 if (ip_is_fragment(h)) { 955 if (ip_is_fragment(h)) {
958 if (net_ratelimit()) 956 net_err_ratelimited("DHCP/BOOTP: Ignoring fragmented reply\n");
959 pr_err("DHCP/BOOTP: Ignoring fragmented reply\n");
960 goto drop; 957 goto drop;
961 } 958 }
962 959
@@ -1004,16 +1001,14 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
1004 /* Is it a reply to our BOOTP request? */ 1001 /* Is it a reply to our BOOTP request? */
1005 if (b->op != BOOTP_REPLY || 1002 if (b->op != BOOTP_REPLY ||
1006 b->xid != d->xid) { 1003 b->xid != d->xid) {
1007 if (net_ratelimit()) 1004 net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
1008 pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", 1005 b->op, b->xid);
1009 b->op, b->xid);
1010 goto drop_unlock; 1006 goto drop_unlock;
1011 } 1007 }
1012 1008
1013 /* Is it a reply for the device we are configuring? */ 1009 /* Is it a reply for the device we are configuring? */
1014 if (b->xid != ic_dev_xid) { 1010 if (b->xid != ic_dev_xid) {
1015 if (net_ratelimit()) 1011 net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
1016 pr_err("DHCP/BOOTP: Ignoring delayed packet\n");
1017 goto drop_unlock; 1012 goto drop_unlock;
1018 } 1013 }
1019 1014
@@ -1198,7 +1193,7 @@ static int __init ic_dynamic(void)
1198 d = ic_first_dev; 1193 d = ic_first_dev;
1199 retries = CONF_SEND_RETRIES; 1194 retries = CONF_SEND_RETRIES;
1200 get_random_bytes(&timeout, sizeof(timeout)); 1195 get_random_bytes(&timeout, sizeof(timeout));
1201 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); 1196 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
1202 for (;;) { 1197 for (;;) {
1203 /* Track the device we are configuring */ 1198 /* Track the device we are configuring */
1204 ic_dev_xid = d->xid; 1199 ic_dev_xid = d->xid;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ae1413e3f2f8..2d0f99bf61b3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev);
144 144
145/* often modified stats are per cpu, other are shared (netdev->stats) */ 145/* often modified stats are per cpu, other are shared (netdev->stats) */
146struct pcpu_tstats { 146struct pcpu_tstats {
147 unsigned long rx_packets; 147 u64 rx_packets;
148 unsigned long rx_bytes; 148 u64 rx_bytes;
149 unsigned long tx_packets; 149 u64 tx_packets;
150 unsigned long tx_bytes; 150 u64 tx_bytes;
151} __attribute__((aligned(4*sizeof(unsigned long)))); 151 struct u64_stats_sync syncp;
152};
152 153
153static struct net_device_stats *ipip_get_stats(struct net_device *dev) 154static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
155 struct rtnl_link_stats64 *tot)
154{ 156{
155 struct pcpu_tstats sum = { 0 };
156 int i; 157 int i;
157 158
158 for_each_possible_cpu(i) { 159 for_each_possible_cpu(i) {
159 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 160 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160 161 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
161 sum.rx_packets += tstats->rx_packets; 162 unsigned int start;
162 sum.rx_bytes += tstats->rx_bytes; 163
163 sum.tx_packets += tstats->tx_packets; 164 do {
164 sum.tx_bytes += tstats->tx_bytes; 165 start = u64_stats_fetch_begin_bh(&tstats->syncp);
166 rx_packets = tstats->rx_packets;
167 tx_packets = tstats->tx_packets;
168 rx_bytes = tstats->rx_bytes;
169 tx_bytes = tstats->tx_bytes;
170 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
171
172 tot->rx_packets += rx_packets;
173 tot->tx_packets += tx_packets;
174 tot->rx_bytes += rx_bytes;
175 tot->tx_bytes += tx_bytes;
165 } 176 }
166 dev->stats.rx_packets = sum.rx_packets; 177
167 dev->stats.rx_bytes = sum.rx_bytes; 178 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 dev->stats.tx_packets = sum.tx_packets; 179 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 dev->stats.tx_bytes = sum.tx_bytes; 180 tot->tx_dropped = dev->stats.tx_dropped;
170 return &dev->stats; 181 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
182 tot->tx_errors = dev->stats.tx_errors;
183 tot->collisions = dev->stats.collisions;
184
185 return tot;
171} 186}
172 187
173static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 188static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
174 __be32 remote, __be32 local) 189 __be32 remote, __be32 local)
175{ 190{
176 unsigned int h0 = HASH(remote); 191 unsigned int h0 = HASH(remote);
@@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 rcu_assign_pointer(*tp, t); 260 rcu_assign_pointer(*tp, t);
246} 261}
247 262
248static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 263static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
249 struct ip_tunnel_parm *parms, int create) 264 struct ip_tunnel_parm *parms, int create)
250{ 265{
251 __be32 remote = parms->iph.daddr; 266 __be32 remote = parms->iph.daddr;
@@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb)
404 skb->pkt_type = PACKET_HOST; 419 skb->pkt_type = PACKET_HOST;
405 420
406 tstats = this_cpu_ptr(tunnel->dev->tstats); 421 tstats = this_cpu_ptr(tunnel->dev->tstats);
422 u64_stats_update_begin(&tstats->syncp);
407 tstats->rx_packets++; 423 tstats->rx_packets++;
408 tstats->rx_bytes += skb->len; 424 tstats->rx_bytes += skb->len;
425 u64_stats_update_end(&tstats->syncp);
409 426
410 __skb_tunnel_rx(skb, tunnel->dev); 427 __skb_tunnel_rx(skb, tunnel->dev);
411 428
@@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = {
730 .ndo_start_xmit = ipip_tunnel_xmit, 747 .ndo_start_xmit = ipip_tunnel_xmit,
731 .ndo_do_ioctl = ipip_tunnel_ioctl, 748 .ndo_do_ioctl = ipip_tunnel_ioctl,
732 .ndo_change_mtu = ipip_tunnel_change_mtu, 749 .ndo_change_mtu = ipip_tunnel_change_mtu,
733 .ndo_get_stats = ipip_get_stats, 750 .ndo_get_stats64 = ipip_get_stats64,
734}; 751};
735 752
736static void ipip_dev_free(struct net_device *dev) 753static void ipip_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 960fbfc3e976..a9e519ad6db5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -949,8 +949,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
949 ret = sock_queue_rcv_skb(mroute_sk, skb); 949 ret = sock_queue_rcv_skb(mroute_sk, skb);
950 rcu_read_unlock(); 950 rcu_read_unlock();
951 if (ret < 0) { 951 if (ret < 0) {
952 if (net_ratelimit()) 952 net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
953 pr_warn("mroute: pending queue full, dropping entries\n");
954 kfree_skb(skb); 953 kfree_skb(skb);
955 } 954 }
956 955
@@ -2119,15 +2118,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2119 rtm->rtm_src_len = 32; 2118 rtm->rtm_src_len = 32;
2120 rtm->rtm_tos = 0; 2119 rtm->rtm_tos = 0;
2121 rtm->rtm_table = mrt->id; 2120 rtm->rtm_table = mrt->id;
2122 NLA_PUT_U32(skb, RTA_TABLE, mrt->id); 2121 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2122 goto nla_put_failure;
2123 rtm->rtm_type = RTN_MULTICAST; 2123 rtm->rtm_type = RTN_MULTICAST;
2124 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2124 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2125 rtm->rtm_protocol = RTPROT_UNSPEC; 2125 rtm->rtm_protocol = RTPROT_UNSPEC;
2126 rtm->rtm_flags = 0; 2126 rtm->rtm_flags = 0;
2127 2127
2128 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); 2128 if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
2129 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); 2129 nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
2130 2130 goto nla_put_failure;
2131 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) 2131 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2132 goto nla_put_failure; 2132 goto nla_put_failure;
2133 2133
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4f47e064e262..ed1b36783192 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,7 +12,7 @@
12#include <net/netfilter/nf_queue.h> 12#include <net/netfilter/nf_queue.h>
13 13
14/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 14/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
15int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) 15int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
16{ 16{
17 struct net *net = dev_net(skb_dst(skb)->dev); 17 struct net *net = dev_net(skb_dst(skb)->dev);
18 const struct iphdr *iph = ip_hdr(skb); 18 const struct iphdr *iph = ip_hdr(skb);
@@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void)
237 237
238module_init(ipv4_netfilter_init); 238module_init(ipv4_netfilter_init);
239module_exit(ipv4_netfilter_fini); 239module_exit(ipv4_netfilter_fini);
240
241#ifdef CONFIG_SYSCTL
242struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
243 { .procname = "net", },
244 { .procname = "ipv4", },
245 { .procname = "netfilter", },
246 { }
247};
248EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
249#endif /* CONFIG_SYSCTL */
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 240b68469a7a..c20674dc9452 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
66 66
67# just filtering instance of ARP tables for now 67# just filtering instance of ARP tables for now
68obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o 68obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
69
70obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
71
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fd7a3f68917f..97e61eadf580 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -221,9 +221,8 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
221static unsigned int 221static unsigned int
222arpt_error(struct sk_buff *skb, const struct xt_action_param *par) 222arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
223{ 223{
224 if (net_ratelimit()) 224 net_err_ratelimited("arp_tables: error: '%s'\n",
225 pr_err("arp_tables: error: '%s'\n", 225 (const char *)par->targinfo);
226 (const char *)par->targinfo);
227 226
228 return NF_DROP; 227 return NF_DROP;
229} 228}
@@ -303,7 +302,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
303 if (v < 0) { 302 if (v < 0) {
304 /* Pop from stack? */ 303 /* Pop from stack? */
305 if (v != XT_RETURN) { 304 if (v != XT_RETURN) {
306 verdict = (unsigned)(-v) - 1; 305 verdict = (unsigned int)(-v) - 1;
307 break; 306 break;
308 } 307 }
309 e = back; 308 e = back;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
deleted file mode 100644
index 94d45e1f8882..000000000000
--- a/net/ipv4/netfilter/ip_queue.c
+++ /dev/null
@@ -1,639 +0,0 @@
1/*
2 * This is a module which is used for queueing IPv4 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <linux/ip.h>
16#include <linux/notifier.h>
17#include <linux/netdevice.h>
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4/ip_queue.h>
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netlink.h>
22#include <linux/spinlock.h>
23#include <linux/sysctl.h>
24#include <linux/proc_fs.h>
25#include <linux/seq_file.h>
26#include <linux/security.h>
27#include <linux/net.h>
28#include <linux/mutex.h>
29#include <linux/slab.h>
30#include <net/net_namespace.h>
31#include <net/sock.h>
32#include <net/route.h>
33#include <net/netfilter/nf_queue.h>
34#include <net/ip.h>
35
36#define IPQ_QMAX_DEFAULT 1024
37#define IPQ_PROC_FS_NAME "ip_queue"
38#define NET_IPQ_QMAX 2088
39#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
40
41typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total;
49static unsigned int queue_dropped = 0;
50static unsigned int queue_user_dropped = 0;
51static struct sock *ipqnl __read_mostly;
52static LIST_HEAD(queue_list);
53static DEFINE_MUTEX(ipqnl_mutex);
54
55static inline void
56__ipq_enqueue_entry(struct nf_queue_entry *entry)
57{
58 list_add_tail(&entry->list, &queue_list);
59 queue_total++;
60}
61
62static inline int
63__ipq_set_mode(unsigned char mode, unsigned int range)
64{
65 int status = 0;
66
67 switch(mode) {
68 case IPQ_COPY_NONE:
69 case IPQ_COPY_META:
70 copy_mode = mode;
71 copy_range = 0;
72 break;
73
74 case IPQ_COPY_PACKET:
75 if (range > 0xFFFF)
76 range = 0xFFFF;
77 copy_range = range;
78 copy_mode = mode;
79 break;
80
81 default:
82 status = -EINVAL;
83
84 }
85 return status;
86}
87
88static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
89
90static inline void
91__ipq_reset(void)
92{
93 peer_pid = 0;
94 net_disable_timestamp();
95 __ipq_set_mode(IPQ_COPY_NONE, 0);
96 __ipq_flush(NULL, 0);
97}
98
99static struct nf_queue_entry *
100ipq_find_dequeue_entry(unsigned long id)
101{
102 struct nf_queue_entry *entry = NULL, *i;
103
104 spin_lock_bh(&queue_lock);
105
106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) {
108 entry = i;
109 break;
110 }
111 }
112
113 if (entry) {
114 list_del(&entry->list);
115 queue_total--;
116 }
117
118 spin_unlock_bh(&queue_lock);
119 return entry;
120}
121
122static void
123__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
124{
125 struct nf_queue_entry *entry, *next;
126
127 list_for_each_entry_safe(entry, next, &queue_list, list) {
128 if (!cmpfn || cmpfn(entry, data)) {
129 list_del(&entry->list);
130 queue_total--;
131 nf_reinject(entry, NF_DROP);
132 }
133 }
134}
135
136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{
139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data);
141 spin_unlock_bh(&queue_lock);
142}
143
144static struct sk_buff *
145ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
146{
147 sk_buff_data_t old_tail;
148 size_t size = 0;
149 size_t data_len = 0;
150 struct sk_buff *skb;
151 struct ipq_packet_msg *pmsg;
152 struct nlmsghdr *nlh;
153 struct timeval tv;
154
155 switch (ACCESS_ONCE(copy_mode)) {
156 case IPQ_COPY_META:
157 case IPQ_COPY_NONE:
158 size = NLMSG_SPACE(sizeof(*pmsg));
159 break;
160
161 case IPQ_COPY_PACKET:
162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
163 (*errp = skb_checksum_help(entry->skb)))
164 return NULL;
165
166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
168 data_len = entry->skb->len;
169
170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
171 break;
172
173 default:
174 *errp = -EINVAL;
175 return NULL;
176 }
177
178 skb = alloc_skb(size, GFP_ATOMIC);
179 if (!skb)
180 goto nlmsg_failure;
181
182 old_tail = skb->tail;
183 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
184 pmsg = NLMSG_DATA(nlh);
185 memset(pmsg, 0, sizeof(*pmsg));
186
187 pmsg->packet_id = (unsigned long )entry;
188 pmsg->data_len = data_len;
189 tv = ktime_to_timeval(entry->skb->tstamp);
190 pmsg->timestamp_sec = tv.tv_sec;
191 pmsg->timestamp_usec = tv.tv_usec;
192 pmsg->mark = entry->skb->mark;
193 pmsg->hook = entry->hook;
194 pmsg->hw_protocol = entry->skb->protocol;
195
196 if (entry->indev)
197 strcpy(pmsg->indev_name, entry->indev->name);
198 else
199 pmsg->indev_name[0] = '\0';
200
201 if (entry->outdev)
202 strcpy(pmsg->outdev_name, entry->outdev->name);
203 else
204 pmsg->outdev_name[0] = '\0';
205
206 if (entry->indev && entry->skb->dev &&
207 entry->skb->mac_header != entry->skb->network_header) {
208 pmsg->hw_type = entry->skb->dev->type;
209 pmsg->hw_addrlen = dev_parse_header(entry->skb,
210 pmsg->hw_addr);
211 }
212
213 if (data_len)
214 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
215 BUG();
216
217 nlh->nlmsg_len = skb->tail - old_tail;
218 return skb;
219
220nlmsg_failure:
221 kfree_skb(skb);
222 *errp = -EINVAL;
223 printk(KERN_ERR "ip_queue: error creating packet message\n");
224 return NULL;
225}
226
227static int
228ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
229{
230 int status = -EINVAL;
231 struct sk_buff *nskb;
232
233 if (copy_mode == IPQ_COPY_NONE)
234 return -EAGAIN;
235
236 nskb = ipq_build_packet_message(entry, &status);
237 if (nskb == NULL)
238 return status;
239
240 spin_lock_bh(&queue_lock);
241
242 if (!peer_pid)
243 goto err_out_free_nskb;
244
245 if (queue_total >= queue_maxlen) {
246 queue_dropped++;
247 status = -ENOSPC;
248 if (net_ratelimit())
249 printk (KERN_WARNING "ip_queue: full at %d entries, "
250 "dropping packets(s). Dropped: %d\n", queue_total,
251 queue_dropped);
252 goto err_out_free_nskb;
253 }
254
255 /* netlink_unicast will either free the nskb or attach it to a socket */
256 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
257 if (status < 0) {
258 queue_user_dropped++;
259 goto err_out_unlock;
260 }
261
262 __ipq_enqueue_entry(entry);
263
264 spin_unlock_bh(&queue_lock);
265 return status;
266
267err_out_free_nskb:
268 kfree_skb(nskb);
269
270err_out_unlock:
271 spin_unlock_bh(&queue_lock);
272 return status;
273}
274
275static int
276ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
277{
278 int diff;
279 struct iphdr *user_iph = (struct iphdr *)v->payload;
280 struct sk_buff *nskb;
281
282 if (v->data_len < sizeof(*user_iph))
283 return 0;
284 diff = v->data_len - e->skb->len;
285 if (diff < 0) {
286 if (pskb_trim(e->skb, v->data_len))
287 return -ENOMEM;
288 } else if (diff > 0) {
289 if (v->data_len > 0xFFFF)
290 return -EINVAL;
291 if (diff > skb_tailroom(e->skb)) {
292 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
293 diff, GFP_ATOMIC);
294 if (!nskb) {
295 printk(KERN_WARNING "ip_queue: error "
296 "in mangle, dropping packet\n");
297 return -ENOMEM;
298 }
299 kfree_skb(e->skb);
300 e->skb = nskb;
301 }
302 skb_put(e->skb, diff);
303 }
304 if (!skb_make_writable(e->skb, v->data_len))
305 return -ENOMEM;
306 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
307 e->skb->ip_summed = CHECKSUM_NONE;
308
309 return 0;
310}
311
312static int
313ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
314{
315 struct nf_queue_entry *entry;
316
317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
318 return -EINVAL;
319
320 entry = ipq_find_dequeue_entry(vmsg->id);
321 if (entry == NULL)
322 return -ENOENT;
323 else {
324 int verdict = vmsg->value;
325
326 if (vmsg->data_len && vmsg->data_len == len)
327 if (ipq_mangle_ipv4(vmsg, entry) < 0)
328 verdict = NF_DROP;
329
330 nf_reinject(entry, verdict);
331 return 0;
332 }
333}
334
335static int
336ipq_set_mode(unsigned char mode, unsigned int range)
337{
338 int status;
339
340 spin_lock_bh(&queue_lock);
341 status = __ipq_set_mode(mode, range);
342 spin_unlock_bh(&queue_lock);
343 return status;
344}
345
346static int
347ipq_receive_peer(struct ipq_peer_msg *pmsg,
348 unsigned char type, unsigned int len)
349{
350 int status = 0;
351
352 if (len < sizeof(*pmsg))
353 return -EINVAL;
354
355 switch (type) {
356 case IPQM_MODE:
357 status = ipq_set_mode(pmsg->msg.mode.value,
358 pmsg->msg.mode.range);
359 break;
360
361 case IPQM_VERDICT:
362 status = ipq_set_verdict(&pmsg->msg.verdict,
363 len - sizeof(*pmsg));
364 break;
365 default:
366 status = -EINVAL;
367 }
368 return status;
369}
370
371static int
372dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
373{
374 if (entry->indev)
375 if (entry->indev->ifindex == ifindex)
376 return 1;
377 if (entry->outdev)
378 if (entry->outdev->ifindex == ifindex)
379 return 1;
380#ifdef CONFIG_BRIDGE_NETFILTER
381 if (entry->skb->nf_bridge) {
382 if (entry->skb->nf_bridge->physindev &&
383 entry->skb->nf_bridge->physindev->ifindex == ifindex)
384 return 1;
385 if (entry->skb->nf_bridge->physoutdev &&
386 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
387 return 1;
388 }
389#endif
390 return 0;
391}
392
393static void
394ipq_dev_drop(int ifindex)
395{
396 ipq_flush(dev_cmp, ifindex);
397}
398
399#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
400
401static inline void
402__ipq_rcv_skb(struct sk_buff *skb)
403{
404 int status, type, pid, flags;
405 unsigned int nlmsglen, skblen;
406 struct nlmsghdr *nlh;
407 bool enable_timestamp = false;
408
409 skblen = skb->len;
410 if (skblen < sizeof(*nlh))
411 return;
412
413 nlh = nlmsg_hdr(skb);
414 nlmsglen = nlh->nlmsg_len;
415 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
416 return;
417
418 pid = nlh->nlmsg_pid;
419 flags = nlh->nlmsg_flags;
420
421 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
422 RCV_SKB_FAIL(-EINVAL);
423
424 if (flags & MSG_TRUNC)
425 RCV_SKB_FAIL(-ECOMM);
426
427 type = nlh->nlmsg_type;
428 if (type < NLMSG_NOOP || type >= IPQM_MAX)
429 RCV_SKB_FAIL(-EINVAL);
430
431 if (type <= IPQM_BASE)
432 return;
433
434 if (!capable(CAP_NET_ADMIN))
435 RCV_SKB_FAIL(-EPERM);
436
437 spin_lock_bh(&queue_lock);
438
439 if (peer_pid) {
440 if (peer_pid != pid) {
441 spin_unlock_bh(&queue_lock);
442 RCV_SKB_FAIL(-EBUSY);
443 }
444 } else {
445 enable_timestamp = true;
446 peer_pid = pid;
447 }
448
449 spin_unlock_bh(&queue_lock);
450 if (enable_timestamp)
451 net_enable_timestamp();
452 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
453 nlmsglen - NLMSG_LENGTH(0));
454 if (status < 0)
455 RCV_SKB_FAIL(status);
456
457 if (flags & NLM_F_ACK)
458 netlink_ack(skb, nlh, 0);
459}
460
461static void
462ipq_rcv_skb(struct sk_buff *skb)
463{
464 mutex_lock(&ipqnl_mutex);
465 __ipq_rcv_skb(skb);
466 mutex_unlock(&ipqnl_mutex);
467}
468
469static int
470ipq_rcv_dev_event(struct notifier_block *this,
471 unsigned long event, void *ptr)
472{
473 struct net_device *dev = ptr;
474
475 if (!net_eq(dev_net(dev), &init_net))
476 return NOTIFY_DONE;
477
478 /* Drop any packets associated with the downed device */
479 if (event == NETDEV_DOWN)
480 ipq_dev_drop(dev->ifindex);
481 return NOTIFY_DONE;
482}
483
484static struct notifier_block ipq_dev_notifier = {
485 .notifier_call = ipq_rcv_dev_event,
486};
487
488static int
489ipq_rcv_nl_event(struct notifier_block *this,
490 unsigned long event, void *ptr)
491{
492 struct netlink_notify *n = ptr;
493
494 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
495 spin_lock_bh(&queue_lock);
496 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
497 __ipq_reset();
498 spin_unlock_bh(&queue_lock);
499 }
500 return NOTIFY_DONE;
501}
502
503static struct notifier_block ipq_nl_notifier = {
504 .notifier_call = ipq_rcv_nl_event,
505};
506
507#ifdef CONFIG_SYSCTL
508static struct ctl_table_header *ipq_sysctl_header;
509
510static ctl_table ipq_table[] = {
511 {
512 .procname = NET_IPQ_QMAX_NAME,
513 .data = &queue_maxlen,
514 .maxlen = sizeof(queue_maxlen),
515 .mode = 0644,
516 .proc_handler = proc_dointvec
517 },
518 { }
519};
520#endif
521
522#ifdef CONFIG_PROC_FS
523static int ip_queue_show(struct seq_file *m, void *v)
524{
525 spin_lock_bh(&queue_lock);
526
527 seq_printf(m,
528 "Peer PID : %d\n"
529 "Copy mode : %hu\n"
530 "Copy range : %u\n"
531 "Queue length : %u\n"
532 "Queue max. length : %u\n"
533 "Queue dropped : %u\n"
534 "Netlink dropped : %u\n",
535 peer_pid,
536 copy_mode,
537 copy_range,
538 queue_total,
539 queue_maxlen,
540 queue_dropped,
541 queue_user_dropped);
542
543 spin_unlock_bh(&queue_lock);
544 return 0;
545}
546
547static int ip_queue_open(struct inode *inode, struct file *file)
548{
549 return single_open(file, ip_queue_show, NULL);
550}
551
552static const struct file_operations ip_queue_proc_fops = {
553 .open = ip_queue_open,
554 .read = seq_read,
555 .llseek = seq_lseek,
556 .release = single_release,
557 .owner = THIS_MODULE,
558};
559#endif
560
561static const struct nf_queue_handler nfqh = {
562 .name = "ip_queue",
563 .outfn = &ipq_enqueue_packet,
564};
565
566static int __init ip_queue_init(void)
567{
568 int status = -ENOMEM;
569 struct proc_dir_entry *proc __maybe_unused;
570
571 netlink_register_notifier(&ipq_nl_notifier);
572 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
573 ipq_rcv_skb, NULL, THIS_MODULE);
574 if (ipqnl == NULL) {
575 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
576 goto cleanup_netlink_notifier;
577 }
578
579#ifdef CONFIG_PROC_FS
580 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
581 &ip_queue_proc_fops);
582 if (!proc) {
583 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
584 goto cleanup_ipqnl;
585 }
586#endif
587 register_netdevice_notifier(&ipq_dev_notifier);
588#ifdef CONFIG_SYSCTL
589 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
590#endif
591 status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
592 if (status < 0) {
593 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
594 goto cleanup_sysctl;
595 }
596 return status;
597
598cleanup_sysctl:
599#ifdef CONFIG_SYSCTL
600 unregister_sysctl_table(ipq_sysctl_header);
601#endif
602 unregister_netdevice_notifier(&ipq_dev_notifier);
603 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
604cleanup_ipqnl: __maybe_unused
605 netlink_kernel_release(ipqnl);
606 mutex_lock(&ipqnl_mutex);
607 mutex_unlock(&ipqnl_mutex);
608
609cleanup_netlink_notifier:
610 netlink_unregister_notifier(&ipq_nl_notifier);
611 return status;
612}
613
614static void __exit ip_queue_fini(void)
615{
616 nf_unregister_queue_handlers(&nfqh);
617
618 ipq_flush(NULL, 0);
619
620#ifdef CONFIG_SYSCTL
621 unregister_sysctl_table(ipq_sysctl_header);
622#endif
623 unregister_netdevice_notifier(&ipq_dev_notifier);
624 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
625
626 netlink_kernel_release(ipqnl);
627 mutex_lock(&ipqnl_mutex);
628 mutex_unlock(&ipqnl_mutex);
629
630 netlink_unregister_notifier(&ipq_nl_notifier);
631}
632
633MODULE_DESCRIPTION("IPv4 packet queue handler");
634MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
635MODULE_LICENSE("GPL");
636MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
637
638module_init(ip_queue_init);
639module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 24e556e83a3b..170b1fdd6b72 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -153,8 +153,7 @@ ip_checkentry(const struct ipt_ip *ip)
153static unsigned int 153static unsigned int
154ipt_error(struct sk_buff *skb, const struct xt_action_param *par) 154ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
155{ 155{
156 if (net_ratelimit()) 156 net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
157 pr_info("error: `%s'\n", (const char *)par->targinfo);
158 157
159 return NF_DROP; 158 return NF_DROP;
160} 159}
@@ -377,7 +376,7 @@ ipt_do_table(struct sk_buff *skb,
377 if (v < 0) { 376 if (v < 0) {
378 /* Pop from stack? */ 377 /* Pop from stack? */
379 if (v != XT_RETURN) { 378 if (v != XT_RETURN) {
380 verdict = (unsigned)(-v) - 1; 379 verdict = (unsigned int)(-v) - 1;
381 break; 380 break;
382 } 381 }
383 if (*stackptr <= origptr) { 382 if (*stackptr <= origptr) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a639967eb727..fe5daea5214d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -246,8 +246,7 @@ clusterip_hashfn(const struct sk_buff *skb,
246 dport = ports[1]; 246 dport = ports[1];
247 } 247 }
248 } else { 248 } else {
249 if (net_ratelimit()) 249 net_info_ratelimited("unknown protocol %u\n", iph->protocol);
250 pr_info("unknown protocol %u\n", iph->protocol);
251 } 250 }
252 251
253 switch (config->hash_mode) { 252 switch (config->hash_mode) {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cf73cc70ed2d..91747d4ebc26 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -311,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
311static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 311static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
312 const struct nf_conntrack_tuple *tuple) 312 const struct nf_conntrack_tuple *tuple)
313{ 313{
314 NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip); 314 if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
315 NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip); 315 nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
316 goto nla_put_failure;
316 return 0; 317 return 0;
317 318
318nla_put_failure: 319nla_put_failure:
@@ -364,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
364 .nla_policy = ipv4_nla_policy, 365 .nla_policy = ipv4_nla_policy,
365#endif 366#endif
366#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 367#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
367 .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, 368 .ctl_table_path = "net/ipv4/netfilter",
368 .ctl_table = ip_ct_sysctl_table, 369 .ctl_table = ip_ct_sysctl_table,
369#endif 370#endif
370 .me = THIS_MODULE, 371 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7cbe9cb261c2..0847e373d33c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
228static int icmp_tuple_to_nlattr(struct sk_buff *skb, 228static int icmp_tuple_to_nlattr(struct sk_buff *skb,
229 const struct nf_conntrack_tuple *t) 229 const struct nf_conntrack_tuple *t)
230{ 230{
231 NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id); 231 if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
232 NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type); 232 nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
233 NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code); 233 nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
234 234 goto nla_put_failure;
235 return 0; 235 return 0;
236 236
237nla_put_failure: 237nla_put_failure:
@@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
293{ 293{
294 const unsigned int *timeout = data; 294 const unsigned int *timeout = data;
295 295
296 NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); 296 if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
297 297 goto nla_put_failure;
298 return 0; 298 return 0;
299 299
300nla_put_failure: 300nla_put_failure:
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 82536701e3a3..cad29c121318 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -42,9 +42,7 @@ static int set_addr(struct sk_buff *skb,
42 if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 42 if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
43 addroff, sizeof(buf), 43 addroff, sizeof(buf),
44 (char *) &buf, sizeof(buf))) { 44 (char *) &buf, sizeof(buf))) {
45 if (net_ratelimit()) 45 net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
46 pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet"
47 " error\n");
48 return -1; 46 return -1;
49 } 47 }
50 48
@@ -58,9 +56,7 @@ static int set_addr(struct sk_buff *skb,
58 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 56 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
59 addroff, sizeof(buf), 57 addroff, sizeof(buf),
60 (char *) &buf, sizeof(buf))) { 58 (char *) &buf, sizeof(buf))) {
61 if (net_ratelimit()) 59 net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
62 pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet"
63 " error\n");
64 return -1; 60 return -1;
65 } 61 }
66 /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy 62 /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
@@ -214,8 +210,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
214 210
215 /* Run out of expectations */ 211 /* Run out of expectations */
216 if (i >= H323_RTP_CHANNEL_MAX) { 212 if (i >= H323_RTP_CHANNEL_MAX) {
217 if (net_ratelimit()) 213 net_notice_ratelimited("nf_nat_h323: out of expectations\n");
218 pr_notice("nf_nat_h323: out of expectations\n");
219 return 0; 214 return 0;
220 } 215 }
221 216
@@ -244,8 +239,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
244 } 239 }
245 240
246 if (nated_port == 0) { /* No port available */ 241 if (nated_port == 0) { /* No port available */
247 if (net_ratelimit()) 242 net_notice_ratelimited("nf_nat_h323: out of RTP ports\n");
248 pr_notice("nf_nat_h323: out of RTP ports\n");
249 return 0; 243 return 0;
250 } 244 }
251 245
@@ -308,8 +302,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
308 } 302 }
309 303
310 if (nated_port == 0) { /* No port available */ 304 if (nated_port == 0) { /* No port available */
311 if (net_ratelimit()) 305 net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
312 pr_notice("nf_nat_h323: out of TCP ports\n");
313 return 0; 306 return 0;
314 } 307 }
315 308
@@ -365,8 +358,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
365 } 358 }
366 359
367 if (nated_port == 0) { /* No port available */ 360 if (nated_port == 0) { /* No port available */
368 if (net_ratelimit()) 361 net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
369 pr_notice("nf_nat_q931: out of TCP ports\n");
370 return 0; 362 return 0;
371 } 363 }
372 364
@@ -456,8 +448,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
456 } 448 }
457 449
458 if (nated_port == 0) { /* No port available */ 450 if (nated_port == 0) { /* No port available */
459 if (net_ratelimit()) 451 net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
460 pr_notice("nf_nat_ras: out of TCP ports\n");
461 return 0; 452 return 0;
462 } 453 }
463 454
@@ -545,8 +536,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
545 } 536 }
546 537
547 if (nated_port == 0) { /* No port available */ 538 if (nated_port == 0) { /* No port available */
548 if (net_ratelimit()) 539 net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
549 pr_notice("nf_nat_q931: out of TCP ports\n");
550 return 0; 540 return 0;
551 } 541 }
552 542
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 57932c43960e..ea4a23813d26 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
283 __be32 newip; 283 __be32 newip;
284 u_int16_t port; 284 u_int16_t port;
285 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; 285 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
286 unsigned buflen; 286 unsigned int buflen;
287 287
288 /* Connection will come from reply */ 288 /* Connection will come from reply */
289 if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) 289 if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 2133c30a4a5f..746edec8b86e 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1206,8 +1206,7 @@ static int snmp_translate(struct nf_conn *ct,
1206 1206
1207 if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), 1207 if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
1208 paylen, &map, &udph->check)) { 1208 paylen, &map, &udph->check)) {
1209 if (net_ratelimit()) 1209 net_warn_ratelimited("bsalg: parser failed\n");
1210 printk(KERN_WARNING "bsalg: parser failed\n");
1211 return NF_DROP; 1210 return NF_DROP;
1212 } 1211 }
1213 return NF_ACCEPT; 1212 return NF_ACCEPT;
@@ -1241,9 +1240,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
1241 * can mess around with the payload. 1240 * can mess around with the payload.
1242 */ 1241 */
1243 if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { 1242 if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
1244 if (net_ratelimit()) 1243 net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
1245 printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", 1244 &iph->saddr, &iph->daddr);
1246 &iph->saddr, &iph->daddr);
1247 return NF_DROP; 1245 return NF_DROP;
1248 } 1246 }
1249 1247
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 50009c787bcd..6e930c7174dd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -51,15 +51,16 @@ static struct ping_table ping_table;
51 51
52static u16 ping_port_rover; 52static u16 ping_port_rover;
53 53
54static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) 54static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
55{ 55{
56 int res = (num + net_hash_mix(net)) & mask; 56 int res = (num + net_hash_mix(net)) & mask;
57
57 pr_debug("hash(%d) = %d\n", num, res); 58 pr_debug("hash(%d) = %d\n", num, res);
58 return res; 59 return res;
59} 60}
60 61
61static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, 62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
62 struct net *net, unsigned num) 63 struct net *net, unsigned int num)
63{ 64{
64 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; 65 return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
65} 66}
@@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
188 gid_t *high) 189 gid_t *high)
189{ 190{
190 gid_t *data = net->ipv4.sysctl_ping_group_range; 191 gid_t *data = net->ipv4.sysctl_ping_group_range;
191 unsigned seq; 192 unsigned int seq;
193
192 do { 194 do {
193 seq = read_seqbegin(&sysctl_local_ports.lock); 195 seq = read_seqbegin(&sysctl_local_ports.lock);
194 196
@@ -410,7 +412,7 @@ struct pingfakehdr {
410 __wsum wcheck; 412 __wsum wcheck;
411}; 413};
412 414
413static int ping_getfrag(void *from, char * to, 415static int ping_getfrag(void *from, char *to,
414 int offset, int fraglen, int odd, struct sk_buff *skb) 416 int offset, int fraglen, int odd, struct sk_buff *skb)
415{ 417{
416 struct pingfakehdr *pfh = (struct pingfakehdr *)from; 418 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bbd604c68e68..4032b818f3e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
288 read_unlock(&raw_v4_hashinfo.lock); 288 read_unlock(&raw_v4_hashinfo.lock);
289} 289}
290 290
291static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) 291static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
292{ 292{
293 /* Charge it to the socket. */ 293 /* Charge it to the socket. */
294 294
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 167ea10b521a..76e5880cdb07 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
109#include <net/rtnetlink.h> 109#include <net/rtnetlink.h>
110#ifdef CONFIG_SYSCTL 110#ifdef CONFIG_SYSCTL
111#include <linux/sysctl.h> 111#include <linux/sysctl.h>
112#include <linux/kmemleak.h>
112#endif 113#endif
113#include <net/secure_seq.h> 114#include <net/secure_seq.h>
114 115
@@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = {
229 TC_PRIO_INTERACTIVE_BULK, 230 TC_PRIO_INTERACTIVE_BULK,
230 ECN_OR_COST(INTERACTIVE_BULK) 231 ECN_OR_COST(INTERACTIVE_BULK)
231}; 232};
232 233EXPORT_SYMBOL(ip_tos2prio);
233 234
234/* 235/*
235 * Route cache. 236 * Route cache.
@@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void)
296#endif 297#endif
297 298
298static struct rt_hash_bucket *rt_hash_table __read_mostly; 299static struct rt_hash_bucket *rt_hash_table __read_mostly;
299static unsigned rt_hash_mask __read_mostly; 300static unsigned int rt_hash_mask __read_mostly;
300static unsigned int rt_hash_log __read_mostly; 301static unsigned int rt_hash_log __read_mostly;
301 302
302static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 303static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
@@ -959,8 +960,7 @@ void rt_cache_flush_batch(struct net *net)
959 960
960static void rt_emergency_hash_rebuild(struct net *net) 961static void rt_emergency_hash_rebuild(struct net *net)
961{ 962{
962 if (net_ratelimit()) 963 net_warn_ratelimited("Route hash chain too long!\n");
963 pr_warn("Route hash chain too long!\n");
964 rt_cache_invalidate(net); 964 rt_cache_invalidate(net);
965} 965}
966 966
@@ -1083,8 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
1083 goto out; 1083 goto out;
1084 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) 1084 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1085 goto out; 1085 goto out;
1086 if (net_ratelimit()) 1086 net_warn_ratelimited("dst cache overflow\n");
1087 pr_warn("dst cache overflow\n");
1088 RT_CACHE_STAT_INC(gc_dst_overflow); 1087 RT_CACHE_STAT_INC(gc_dst_overflow);
1089 return 1; 1088 return 1;
1090 1089
@@ -1143,7 +1142,7 @@ static int rt_bind_neighbour(struct rtable *rt)
1143 return 0; 1142 return 0;
1144} 1143}
1145 1144
1146static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, 1145static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt,
1147 struct sk_buff *skb, int ifindex) 1146 struct sk_buff *skb, int ifindex)
1148{ 1147{
1149 struct rtable *rth, *cand; 1148 struct rtable *rth, *cand;
@@ -1181,8 +1180,7 @@ restart:
1181 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { 1180 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1182 int err = rt_bind_neighbour(rt); 1181 int err = rt_bind_neighbour(rt);
1183 if (err) { 1182 if (err) {
1184 if (net_ratelimit()) 1183 net_warn_ratelimited("Neighbour table failure & not caching routes\n");
1185 pr_warn("Neighbour table failure & not caching routes\n");
1186 ip_rt_put(rt); 1184 ip_rt_put(rt);
1187 return ERR_PTR(err); 1185 return ERR_PTR(err);
1188 } 1186 }
@@ -1298,8 +1296,7 @@ restart:
1298 goto restart; 1296 goto restart;
1299 } 1297 }
1300 1298
1301 if (net_ratelimit()) 1299 net_warn_ratelimited("Neighbour table overflow\n");
1302 pr_warn("Neighbour table overflow\n");
1303 rt_drop(rt); 1300 rt_drop(rt);
1304 return ERR_PTR(-ENOBUFS); 1301 return ERR_PTR(-ENOBUFS);
1305 } 1302 }
@@ -1377,14 +1374,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1377 return; 1374 return;
1378 } 1375 }
1379 } else if (!rt) 1376 } else if (!rt)
1380 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1377 pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0));
1381 __builtin_return_address(0));
1382 1378
1383 ip_select_fb_ident(iph); 1379 ip_select_fb_ident(iph);
1384} 1380}
1385EXPORT_SYMBOL(__ip_select_ident); 1381EXPORT_SYMBOL(__ip_select_ident);
1386 1382
1387static void rt_del(unsigned hash, struct rtable *rt) 1383static void rt_del(unsigned int hash, struct rtable *rt)
1388{ 1384{
1389 struct rtable __rcu **rthp; 1385 struct rtable __rcu **rthp;
1390 struct rtable *aux; 1386 struct rtable *aux;
@@ -1502,11 +1498,11 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1502 1498
1503reject_redirect: 1499reject_redirect:
1504#ifdef CONFIG_IP_ROUTE_VERBOSE 1500#ifdef CONFIG_IP_ROUTE_VERBOSE
1505 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1501 if (IN_DEV_LOG_MARTIANS(in_dev))
1506 pr_info("Redirect from %pI4 on %s about %pI4 ignored\n" 1502 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
1507 " Advised path = %pI4 -> %pI4\n", 1503 " Advised path = %pI4 -> %pI4\n",
1508 &old_gw, dev->name, &new_gw, 1504 &old_gw, dev->name, &new_gw,
1509 &saddr, &daddr); 1505 &saddr, &daddr);
1510#endif 1506#endif
1511 ; 1507 ;
1512} 1508}
@@ -1538,7 +1534,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1538 ip_rt_put(rt); 1534 ip_rt_put(rt);
1539 ret = NULL; 1535 ret = NULL;
1540 } else if (rt->rt_flags & RTCF_REDIRECTED) { 1536 } else if (rt->rt_flags & RTCF_REDIRECTED) {
1541 unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, 1537 unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
1542 rt->rt_oif, 1538 rt->rt_oif,
1543 rt_genid(dev_net(dst->dev))); 1539 rt_genid(dev_net(dst->dev)));
1544 rt_del(hash, rt); 1540 rt_del(hash, rt);
@@ -1616,11 +1612,10 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1616 ++peer->rate_tokens; 1612 ++peer->rate_tokens;
1617#ifdef CONFIG_IP_ROUTE_VERBOSE 1613#ifdef CONFIG_IP_ROUTE_VERBOSE
1618 if (log_martians && 1614 if (log_martians &&
1619 peer->rate_tokens == ip_rt_redirect_number && 1615 peer->rate_tokens == ip_rt_redirect_number)
1620 net_ratelimit()) 1616 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
1621 pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", 1617 &ip_hdr(skb)->saddr, rt->rt_iif,
1622 &ip_hdr(skb)->saddr, rt->rt_iif, 1618 &rt->rt_dst, &rt->rt_gateway);
1623 &rt->rt_dst, &rt->rt_gateway);
1624#endif 1619#endif
1625 } 1620 }
1626} 1621}
@@ -1843,9 +1838,9 @@ static void ipv4_link_failure(struct sk_buff *skb)
1843 1838
1844static int ip_rt_bug(struct sk_buff *skb) 1839static int ip_rt_bug(struct sk_buff *skb)
1845{ 1840{
1846 printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", 1841 pr_debug("%s: %pI4 -> %pI4, %s\n",
1847 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1842 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1848 skb->dev ? skb->dev->name : "?"); 1843 skb->dev ? skb->dev->name : "?");
1849 kfree_skb(skb); 1844 kfree_skb(skb);
1850 WARN_ON(1); 1845 WARN_ON(1);
1851 return 0; 1846 return 0;
@@ -2134,8 +2129,7 @@ static int __mkroute_input(struct sk_buff *skb,
2134 /* get a working reference to the output device */ 2129 /* get a working reference to the output device */
2135 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); 2130 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
2136 if (out_dev == NULL) { 2131 if (out_dev == NULL) {
2137 if (net_ratelimit()) 2132 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
2138 pr_crit("Bug in ip_route_input_slow(). Please report.\n");
2139 return -EINVAL; 2133 return -EINVAL;
2140 } 2134 }
2141 2135
@@ -2215,9 +2209,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
2215 struct in_device *in_dev, 2209 struct in_device *in_dev,
2216 __be32 daddr, __be32 saddr, u32 tos) 2210 __be32 daddr, __be32 saddr, u32 tos)
2217{ 2211{
2218 struct rtable* rth = NULL; 2212 struct rtable *rth = NULL;
2219 int err; 2213 int err;
2220 unsigned hash; 2214 unsigned int hash;
2221 2215
2222#ifdef CONFIG_IP_ROUTE_MULTIPATH 2216#ifdef CONFIG_IP_ROUTE_MULTIPATH
2223 if (res->fi && res->fi->fib_nhs > 1) 2217 if (res->fi && res->fi->fib_nhs > 1)
@@ -2255,13 +2249,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2255 struct fib_result res; 2249 struct fib_result res;
2256 struct in_device *in_dev = __in_dev_get_rcu(dev); 2250 struct in_device *in_dev = __in_dev_get_rcu(dev);
2257 struct flowi4 fl4; 2251 struct flowi4 fl4;
2258 unsigned flags = 0; 2252 unsigned int flags = 0;
2259 u32 itag = 0; 2253 u32 itag = 0;
2260 struct rtable * rth; 2254 struct rtable *rth;
2261 unsigned hash; 2255 unsigned int hash;
2262 __be32 spec_dst; 2256 __be32 spec_dst;
2263 int err = -EINVAL; 2257 int err = -EINVAL;
2264 struct net * net = dev_net(dev); 2258 struct net *net = dev_net(dev);
2265 2259
2266 /* IP on this device is disabled. */ 2260 /* IP on this device is disabled. */
2267 2261
@@ -2406,9 +2400,9 @@ no_route:
2406martian_destination: 2400martian_destination:
2407 RT_CACHE_STAT_INC(in_martian_dst); 2401 RT_CACHE_STAT_INC(in_martian_dst);
2408#ifdef CONFIG_IP_ROUTE_VERBOSE 2402#ifdef CONFIG_IP_ROUTE_VERBOSE
2409 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2403 if (IN_DEV_LOG_MARTIANS(in_dev))
2410 pr_warn("martian destination %pI4 from %pI4, dev %s\n", 2404 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2411 &daddr, &saddr, dev->name); 2405 &daddr, &saddr, dev->name);
2412#endif 2406#endif
2413 2407
2414e_hostunreach: 2408e_hostunreach:
@@ -2433,8 +2427,8 @@ martian_source_keep_err:
2433int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2427int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2434 u8 tos, struct net_device *dev, bool noref) 2428 u8 tos, struct net_device *dev, bool noref)
2435{ 2429{
2436 struct rtable * rth; 2430 struct rtable *rth;
2437 unsigned hash; 2431 unsigned int hash;
2438 int iif = dev->ifindex; 2432 int iif = dev->ifindex;
2439 struct net *net; 2433 struct net *net;
2440 int res; 2434 int res;
@@ -2972,7 +2966,8 @@ static int rt_fill_info(struct net *net,
2972 r->rtm_src_len = 0; 2966 r->rtm_src_len = 0;
2973 r->rtm_tos = rt->rt_key_tos; 2967 r->rtm_tos = rt->rt_key_tos;
2974 r->rtm_table = RT_TABLE_MAIN; 2968 r->rtm_table = RT_TABLE_MAIN;
2975 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 2969 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2970 goto nla_put_failure;
2976 r->rtm_type = rt->rt_type; 2971 r->rtm_type = rt->rt_type;
2977 r->rtm_scope = RT_SCOPE_UNIVERSE; 2972 r->rtm_scope = RT_SCOPE_UNIVERSE;
2978 r->rtm_protocol = RTPROT_UNSPEC; 2973 r->rtm_protocol = RTPROT_UNSPEC;
@@ -2980,31 +2975,38 @@ static int rt_fill_info(struct net *net,
2980 if (rt->rt_flags & RTCF_NOTIFY) 2975 if (rt->rt_flags & RTCF_NOTIFY)
2981 r->rtm_flags |= RTM_F_NOTIFY; 2976 r->rtm_flags |= RTM_F_NOTIFY;
2982 2977
2983 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); 2978 if (nla_put_be32(skb, RTA_DST, rt->rt_dst))
2984 2979 goto nla_put_failure;
2985 if (rt->rt_key_src) { 2980 if (rt->rt_key_src) {
2986 r->rtm_src_len = 32; 2981 r->rtm_src_len = 32;
2987 NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); 2982 if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src))
2983 goto nla_put_failure;
2988 } 2984 }
2989 if (rt->dst.dev) 2985 if (rt->dst.dev &&
2990 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2986 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2987 goto nla_put_failure;
2991#ifdef CONFIG_IP_ROUTE_CLASSID 2988#ifdef CONFIG_IP_ROUTE_CLASSID
2992 if (rt->dst.tclassid) 2989 if (rt->dst.tclassid &&
2993 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2990 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2991 goto nla_put_failure;
2994#endif 2992#endif
2995 if (rt_is_input_route(rt)) 2993 if (rt_is_input_route(rt)) {
2996 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2994 if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst))
2997 else if (rt->rt_src != rt->rt_key_src) 2995 goto nla_put_failure;
2998 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2996 } else if (rt->rt_src != rt->rt_key_src) {
2999 2997 if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
3000 if (rt->rt_dst != rt->rt_gateway) 2998 goto nla_put_failure;
3001 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2999 }
3000 if (rt->rt_dst != rt->rt_gateway &&
3001 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
3002 goto nla_put_failure;
3002 3003
3003 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 3004 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
3004 goto nla_put_failure; 3005 goto nla_put_failure;
3005 3006
3006 if (rt->rt_mark) 3007 if (rt->rt_mark &&
3007 NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); 3008 nla_put_be32(skb, RTA_MARK, rt->rt_mark))
3009 goto nla_put_failure;
3008 3010
3009 error = rt->dst.error; 3011 error = rt->dst.error;
3010 if (peer) { 3012 if (peer) {
@@ -3045,7 +3047,8 @@ static int rt_fill_info(struct net *net,
3045 } 3047 }
3046 } else 3048 } else
3047#endif 3049#endif
3048 NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); 3050 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
3051 goto nla_put_failure;
3049 } 3052 }
3050 3053
3051 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 3054 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -3059,7 +3062,7 @@ nla_put_failure:
3059 return -EMSGSIZE; 3062 return -EMSGSIZE;
3060} 3063}
3061 3064
3062static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 3065static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
3063{ 3066{
3064 struct net *net = sock_net(in_skb->sk); 3067 struct net *net = sock_net(in_skb->sk);
3065 struct rtmsg *rtm; 3068 struct rtmsg *rtm;
@@ -3334,23 +3337,6 @@ static ctl_table ipv4_route_table[] = {
3334 { } 3337 { }
3335}; 3338};
3336 3339
3337static struct ctl_table empty[1];
3338
3339static struct ctl_table ipv4_skeleton[] =
3340{
3341 { .procname = "route",
3342 .mode = 0555, .child = ipv4_route_table},
3343 { .procname = "neigh",
3344 .mode = 0555, .child = empty},
3345 { }
3346};
3347
3348static __net_initdata struct ctl_path ipv4_path[] = {
3349 { .procname = "net", },
3350 { .procname = "ipv4", },
3351 { },
3352};
3353
3354static struct ctl_table ipv4_route_flush_table[] = { 3340static struct ctl_table ipv4_route_flush_table[] = {
3355 { 3341 {
3356 .procname = "flush", 3342 .procname = "flush",
@@ -3361,13 +3347,6 @@ static struct ctl_table ipv4_route_flush_table[] = {
3361 { }, 3347 { },
3362}; 3348};
3363 3349
3364static __net_initdata struct ctl_path ipv4_route_path[] = {
3365 { .procname = "net", },
3366 { .procname = "ipv4", },
3367 { .procname = "route", },
3368 { },
3369};
3370
3371static __net_init int sysctl_route_net_init(struct net *net) 3350static __net_init int sysctl_route_net_init(struct net *net)
3372{ 3351{
3373 struct ctl_table *tbl; 3352 struct ctl_table *tbl;
@@ -3380,8 +3359,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
3380 } 3359 }
3381 tbl[0].extra1 = net; 3360 tbl[0].extra1 = net;
3382 3361
3383 net->ipv4.route_hdr = 3362 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
3384 register_net_sysctl_table(net, ipv4_route_path, tbl);
3385 if (net->ipv4.route_hdr == NULL) 3363 if (net->ipv4.route_hdr == NULL)
3386 goto err_reg; 3364 goto err_reg;
3387 return 0; 3365 return 0;
@@ -3505,6 +3483,6 @@ int __init ip_rt_init(void)
3505 */ 3483 */
3506void __init ip_static_sysctl_init(void) 3484void __init ip_static_sysctl_init(void)
3507{ 3485{
3508 register_sysctl_paths(ipv4_path, ipv4_skeleton); 3486 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
3509} 3487}
3510#endif 3488#endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7a7724da9bff..ef32956ed655 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,6 +27,7 @@
27#include <net/tcp_memcontrol.h> 27#include <net/tcp_memcontrol.h>
28 28
29static int zero; 29static int zero;
30static int two = 2;
30static int tcp_retr1_max = 255; 31static int tcp_retr1_max = 255;
31static int ip_local_port_range_min[] = { 1, 1 }; 32static int ip_local_port_range_min[] = { 1, 1 };
32static int ip_local_port_range_max[] = { 65535, 65535 }; 33static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -78,7 +79,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
78static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) 79static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
79{ 80{
80 gid_t *data = table->data; 81 gid_t *data = table->data;
81 unsigned seq; 82 unsigned int seq;
82 do { 83 do {
83 seq = read_seqbegin(&sysctl_local_ports.lock); 84 seq = read_seqbegin(&sysctl_local_ports.lock);
84 85
@@ -677,6 +678,15 @@ static struct ctl_table ipv4_table[] = {
677 .proc_handler = proc_dointvec 678 .proc_handler = proc_dointvec
678 }, 679 },
679 { 680 {
681 .procname = "tcp_early_retrans",
682 .data = &sysctl_tcp_early_retrans,
683 .maxlen = sizeof(int),
684 .mode = 0644,
685 .proc_handler = proc_dointvec_minmax,
686 .extra1 = &zero,
687 .extra2 = &two,
688 },
689 {
680 .procname = "udp_mem", 690 .procname = "udp_mem",
681 .data = &sysctl_udp_mem, 691 .data = &sysctl_udp_mem,
682 .maxlen = sizeof(sysctl_udp_mem), 692 .maxlen = sizeof(sysctl_udp_mem),
@@ -768,13 +778,6 @@ static struct ctl_table ipv4_net_table[] = {
768 { } 778 { }
769}; 779};
770 780
771struct ctl_path net_ipv4_ctl_path[] = {
772 { .procname = "net", },
773 { .procname = "ipv4", },
774 { },
775};
776EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
777
778static __net_init int ipv4_sysctl_init_net(struct net *net) 781static __net_init int ipv4_sysctl_init_net(struct net *net)
779{ 782{
780 struct ctl_table *table; 783 struct ctl_table *table;
@@ -815,8 +818,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
815 818
816 tcp_init_mem(net); 819 tcp_init_mem(net);
817 820
818 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 821 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
819 net_ipv4_ctl_path, table);
820 if (net->ipv4.ipv4_hdr == NULL) 822 if (net->ipv4.ipv4_hdr == NULL)
821 goto err_reg; 823 goto err_reg;
822 824
@@ -857,12 +859,12 @@ static __init int sysctl_ipv4_init(void)
857 if (!i->procname) 859 if (!i->procname)
858 return -EINVAL; 860 return -EINVAL;
859 861
860 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); 862 hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
861 if (hdr == NULL) 863 if (hdr == NULL)
862 return -ENOMEM; 864 return -ENOMEM;
863 865
864 if (register_pernet_subsys(&ipv4_sysctl_ops)) { 866 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
865 unregister_sysctl_table(hdr); 867 unregister_net_sysctl_table(hdr);
866 return -ENOMEM; 868 return -ENOMEM;
867 } 869 }
868 870
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1272a88c2a63..feb2e25091b1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
363 return period; 363 return period;
364} 364}
365 365
366/* Address-family independent initialization for a tcp_sock.
367 *
368 * NOTE: A lot of things set to zero explicitly by call to
369 * sk_alloc() so need not be done here.
370 */
371void tcp_init_sock(struct sock *sk)
372{
373 struct inet_connection_sock *icsk = inet_csk(sk);
374 struct tcp_sock *tp = tcp_sk(sk);
375
376 skb_queue_head_init(&tp->out_of_order_queue);
377 tcp_init_xmit_timers(sk);
378 tcp_prequeue_init(tp);
379
380 icsk->icsk_rto = TCP_TIMEOUT_INIT;
381 tp->mdev = TCP_TIMEOUT_INIT;
382
383 /* So many TCP implementations out there (incorrectly) count the
384 * initial SYN frame in their delayed-ACK and congestion control
385 * algorithms that we must have the following bandaid to talk
386 * efficiently to them. -DaveM
387 */
388 tp->snd_cwnd = TCP_INIT_CWND;
389
390 /* See draft-stevens-tcpca-spec-01 for discussion of the
391 * initialization of these values.
392 */
393 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
394 tp->snd_cwnd_clamp = ~0;
395 tp->mss_cache = TCP_MSS_DEFAULT;
396
397 tp->reordering = sysctl_tcp_reordering;
398 tcp_enable_early_retrans(tp);
399 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
400
401 sk->sk_state = TCP_CLOSE;
402
403 sk->sk_write_space = sk_stream_write_space;
404 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
405
406 icsk->icsk_sync_mss = tcp_sync_mss;
407
408 /* TCP Cookie Transactions */
409 if (sysctl_tcp_cookie_size > 0) {
410 /* Default, cookies without s_data_payload. */
411 tp->cookie_values =
412 kzalloc(sizeof(*tp->cookie_values),
413 sk->sk_allocation);
414 if (tp->cookie_values != NULL)
415 kref_init(&tp->cookie_values->kref);
416 }
417 /* Presumed zeroed, in order of appearance:
418 * cookie_in_always, cookie_out_never,
419 * s_data_constant, s_data_in, s_data_out
420 */
421 sk->sk_sndbuf = sysctl_tcp_wmem[1];
422 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
423
424 local_bh_disable();
425 sock_update_memcg(sk);
426 sk_sockets_allocated_inc(sk);
427 local_bh_enable();
428}
429EXPORT_SYMBOL(tcp_init_sock);
430
366/* 431/*
367 * Wait for a TCP event. 432 * Wait for a TCP event.
368 * 433 *
@@ -784,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
784 while (psize > 0) { 849 while (psize > 0) {
785 struct sk_buff *skb = tcp_write_queue_tail(sk); 850 struct sk_buff *skb = tcp_write_queue_tail(sk);
786 struct page *page = pages[poffset / PAGE_SIZE]; 851 struct page *page = pages[poffset / PAGE_SIZE];
787 int copy, i, can_coalesce; 852 int copy, i;
788 int offset = poffset % PAGE_SIZE; 853 int offset = poffset % PAGE_SIZE;
789 int size = min_t(size_t, psize, PAGE_SIZE - offset); 854 int size = min_t(size_t, psize, PAGE_SIZE - offset);
855 bool can_coalesce;
790 856
791 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { 857 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
792new_segment: 858new_segment:
@@ -919,7 +985,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
919 struct tcp_sock *tp = tcp_sk(sk); 985 struct tcp_sock *tp = tcp_sk(sk);
920 struct sk_buff *skb; 986 struct sk_buff *skb;
921 int iovlen, flags, err, copied; 987 int iovlen, flags, err, copied;
922 int mss_now, size_goal; 988 int mss_now = 0, size_goal;
923 bool sg; 989 bool sg;
924 long timeo; 990 long timeo;
925 991
@@ -933,6 +999,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
933 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) 999 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
934 goto out_err; 1000 goto out_err;
935 1001
1002 if (unlikely(tp->repair)) {
1003 if (tp->repair_queue == TCP_RECV_QUEUE) {
1004 copied = tcp_send_rcvq(sk, msg, size);
1005 goto out;
1006 }
1007
1008 err = -EINVAL;
1009 if (tp->repair_queue == TCP_NO_QUEUE)
1010 goto out_err;
1011
1012 /* 'common' sending to sendq */
1013 }
1014
936 /* This should be in poll */ 1015 /* This should be in poll */
937 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1016 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
938 1017
@@ -1089,7 +1168,7 @@ new_segment:
1089 if ((seglen -= copy) == 0 && iovlen == 0) 1168 if ((seglen -= copy) == 0 && iovlen == 0)
1090 goto out; 1169 goto out;
1091 1170
1092 if (skb->len < max || (flags & MSG_OOB)) 1171 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
1093 continue; 1172 continue;
1094 1173
1095 if (forced_push(tp)) { 1174 if (forced_push(tp)) {
@@ -1102,7 +1181,7 @@ new_segment:
1102wait_for_sndbuf: 1181wait_for_sndbuf:
1103 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1182 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1104wait_for_memory: 1183wait_for_memory:
1105 if (copied) 1184 if (copied && likely(!tp->repair))
1106 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1185 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1107 1186
1108 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1187 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1113,7 +1192,7 @@ wait_for_memory:
1113 } 1192 }
1114 1193
1115out: 1194out:
1116 if (copied) 1195 if (copied && likely(!tp->repair))
1117 tcp_push(sk, flags, mss_now, tp->nonagle); 1196 tcp_push(sk, flags, mss_now, tp->nonagle);
1118 release_sock(sk); 1197 release_sock(sk);
1119 return copied; 1198 return copied;
@@ -1187,6 +1266,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1187 return -EAGAIN; 1266 return -EAGAIN;
1188} 1267}
1189 1268
1269static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
1270{
1271 struct sk_buff *skb;
1272 int copied = 0, err = 0;
1273
1274 /* XXX -- need to support SO_PEEK_OFF */
1275
1276 skb_queue_walk(&sk->sk_write_queue, skb) {
1277 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
1278 if (err)
1279 break;
1280
1281 copied += skb->len;
1282 }
1283
1284 return err ?: copied;
1285}
1286
1190/* Clean up the receive buffer for full frames taken by the user, 1287/* Clean up the receive buffer for full frames taken by the user,
1191 * then send an ACK if necessary. COPIED is the number of bytes 1288 * then send an ACK if necessary. COPIED is the number of bytes
1192 * tcp_recvmsg has given to the user so far, it speeds up the 1289 * tcp_recvmsg has given to the user so far, it speeds up the
@@ -1432,6 +1529,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1432 if (flags & MSG_OOB) 1529 if (flags & MSG_OOB)
1433 goto recv_urg; 1530 goto recv_urg;
1434 1531
1532 if (unlikely(tp->repair)) {
1533 err = -EPERM;
1534 if (!(flags & MSG_PEEK))
1535 goto out;
1536
1537 if (tp->repair_queue == TCP_SEND_QUEUE)
1538 goto recv_sndq;
1539
1540 err = -EINVAL;
1541 if (tp->repair_queue == TCP_NO_QUEUE)
1542 goto out;
1543
1544 /* 'common' recv queue MSG_PEEK-ing */
1545 }
1546
1435 seq = &tp->copied_seq; 1547 seq = &tp->copied_seq;
1436 if (flags & MSG_PEEK) { 1548 if (flags & MSG_PEEK) {
1437 peek_seq = tp->copied_seq; 1549 peek_seq = tp->copied_seq;
@@ -1633,9 +1745,9 @@ do_prequeue:
1633 } 1745 }
1634 if ((flags & MSG_PEEK) && 1746 if ((flags & MSG_PEEK) &&
1635 (peek_seq - copied - urg_hole != tp->copied_seq)) { 1747 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1636 if (net_ratelimit()) 1748 net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
1637 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", 1749 current->comm,
1638 current->comm, task_pid_nr(current)); 1750 task_pid_nr(current));
1639 peek_seq = tp->copied_seq; 1751 peek_seq = tp->copied_seq;
1640 } 1752 }
1641 continue; 1753 continue;
@@ -1783,6 +1895,10 @@ out:
1783recv_urg: 1895recv_urg:
1784 err = tcp_recv_urg(sk, msg, len, flags); 1896 err = tcp_recv_urg(sk, msg, len, flags);
1785 goto out; 1897 goto out;
1898
1899recv_sndq:
1900 err = tcp_peek_sndq(sk, msg, len);
1901 goto out;
1786} 1902}
1787EXPORT_SYMBOL(tcp_recvmsg); 1903EXPORT_SYMBOL(tcp_recvmsg);
1788 1904
@@ -1886,10 +2002,10 @@ bool tcp_check_oom(struct sock *sk, int shift)
1886 too_many_orphans = tcp_too_many_orphans(sk, shift); 2002 too_many_orphans = tcp_too_many_orphans(sk, shift);
1887 out_of_socket_memory = tcp_out_of_memory(sk); 2003 out_of_socket_memory = tcp_out_of_memory(sk);
1888 2004
1889 if (too_many_orphans && net_ratelimit()) 2005 if (too_many_orphans)
1890 pr_info("too many orphaned sockets\n"); 2006 net_info_ratelimited("too many orphaned sockets\n");
1891 if (out_of_socket_memory && net_ratelimit()) 2007 if (out_of_socket_memory)
1892 pr_info("out of memory -- consider tuning tcp_mem\n"); 2008 net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
1893 return too_many_orphans || out_of_socket_memory; 2009 return too_many_orphans || out_of_socket_memory;
1894} 2010}
1895 2011
@@ -1935,7 +2051,9 @@ void tcp_close(struct sock *sk, long timeout)
1935 * advertise a zero window, then kill -9 the FTP client, wheee... 2051 * advertise a zero window, then kill -9 the FTP client, wheee...
1936 * Note: timeout is always zero in such a case. 2052 * Note: timeout is always zero in such a case.
1937 */ 2053 */
1938 if (data_was_unread) { 2054 if (unlikely(tcp_sk(sk)->repair)) {
2055 sk->sk_prot->disconnect(sk, 0);
2056 } else if (data_was_unread) {
1939 /* Unread data was tossed, zap the connection. */ 2057 /* Unread data was tossed, zap the connection. */
1940 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 2058 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1941 tcp_set_state(sk, TCP_CLOSE); 2059 tcp_set_state(sk, TCP_CLOSE);
@@ -2074,6 +2192,8 @@ int tcp_disconnect(struct sock *sk, int flags)
2074 /* ABORT function of RFC793 */ 2192 /* ABORT function of RFC793 */
2075 if (old_state == TCP_LISTEN) { 2193 if (old_state == TCP_LISTEN) {
2076 inet_csk_listen_stop(sk); 2194 inet_csk_listen_stop(sk);
2195 } else if (unlikely(tp->repair)) {
2196 sk->sk_err = ECONNABORTED;
2077 } else if (tcp_need_reset(old_state) || 2197 } else if (tcp_need_reset(old_state) ||
2078 (tp->snd_nxt != tp->write_seq && 2198 (tp->snd_nxt != tp->write_seq &&
2079 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { 2199 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2245,54 @@ int tcp_disconnect(struct sock *sk, int flags)
2125} 2245}
2126EXPORT_SYMBOL(tcp_disconnect); 2246EXPORT_SYMBOL(tcp_disconnect);
2127 2247
2248static inline int tcp_can_repair_sock(struct sock *sk)
2249{
2250 return capable(CAP_NET_ADMIN) &&
2251 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2252}
2253
2254static int tcp_repair_options_est(struct tcp_sock *tp,
2255 struct tcp_repair_opt __user *optbuf, unsigned int len)
2256{
2257 struct tcp_repair_opt opt;
2258
2259 while (len >= sizeof(opt)) {
2260 if (copy_from_user(&opt, optbuf, sizeof(opt)))
2261 return -EFAULT;
2262
2263 optbuf++;
2264 len -= sizeof(opt);
2265
2266 switch (opt.opt_code) {
2267 case TCPOPT_MSS:
2268 tp->rx_opt.mss_clamp = opt.opt_val;
2269 break;
2270 case TCPOPT_WINDOW:
2271 if (opt.opt_val > 14)
2272 return -EFBIG;
2273
2274 tp->rx_opt.snd_wscale = opt.opt_val;
2275 break;
2276 case TCPOPT_SACK_PERM:
2277 if (opt.opt_val != 0)
2278 return -EINVAL;
2279
2280 tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
2281 if (sysctl_tcp_fack)
2282 tcp_enable_fack(tp);
2283 break;
2284 case TCPOPT_TIMESTAMP:
2285 if (opt.opt_val != 0)
2286 return -EINVAL;
2287
2288 tp->rx_opt.tstamp_ok = 1;
2289 break;
2290 }
2291 }
2292
2293 return 0;
2294}
2295
2128/* 2296/*
2129 * Socket option code for TCP. 2297 * Socket option code for TCP.
2130 */ 2298 */
@@ -2295,6 +2463,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2295 err = -EINVAL; 2463 err = -EINVAL;
2296 else 2464 else
2297 tp->thin_dupack = val; 2465 tp->thin_dupack = val;
2466 if (tp->thin_dupack)
2467 tcp_disable_early_retrans(tp);
2468 break;
2469
2470 case TCP_REPAIR:
2471 if (!tcp_can_repair_sock(sk))
2472 err = -EPERM;
2473 else if (val == 1) {
2474 tp->repair = 1;
2475 sk->sk_reuse = SK_FORCE_REUSE;
2476 tp->repair_queue = TCP_NO_QUEUE;
2477 } else if (val == 0) {
2478 tp->repair = 0;
2479 sk->sk_reuse = SK_NO_REUSE;
2480 tcp_send_window_probe(sk);
2481 } else
2482 err = -EINVAL;
2483
2484 break;
2485
2486 case TCP_REPAIR_QUEUE:
2487 if (!tp->repair)
2488 err = -EPERM;
2489 else if (val < TCP_QUEUES_NR)
2490 tp->repair_queue = val;
2491 else
2492 err = -EINVAL;
2493 break;
2494
2495 case TCP_QUEUE_SEQ:
2496 if (sk->sk_state != TCP_CLOSE)
2497 err = -EPERM;
2498 else if (tp->repair_queue == TCP_SEND_QUEUE)
2499 tp->write_seq = val;
2500 else if (tp->repair_queue == TCP_RECV_QUEUE)
2501 tp->rcv_nxt = val;
2502 else
2503 err = -EINVAL;
2504 break;
2505
2506 case TCP_REPAIR_OPTIONS:
2507 if (!tp->repair)
2508 err = -EINVAL;
2509 else if (sk->sk_state == TCP_ESTABLISHED)
2510 err = tcp_repair_options_est(tp,
2511 (struct tcp_repair_opt __user *)optval,
2512 optlen);
2513 else
2514 err = -EPERM;
2298 break; 2515 break;
2299 2516
2300 case TCP_CORK: 2517 case TCP_CORK:
@@ -2530,6 +2747,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2530 val = tp->mss_cache; 2747 val = tp->mss_cache;
2531 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) 2748 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2532 val = tp->rx_opt.user_mss; 2749 val = tp->rx_opt.user_mss;
2750 if (tp->repair)
2751 val = tp->rx_opt.mss_clamp;
2533 break; 2752 break;
2534 case TCP_NODELAY: 2753 case TCP_NODELAY:
2535 val = !!(tp->nonagle&TCP_NAGLE_OFF); 2754 val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -2632,6 +2851,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2632 val = tp->thin_dupack; 2851 val = tp->thin_dupack;
2633 break; 2852 break;
2634 2853
2854 case TCP_REPAIR:
2855 val = tp->repair;
2856 break;
2857
2858 case TCP_REPAIR_QUEUE:
2859 if (tp->repair)
2860 val = tp->repair_queue;
2861 else
2862 return -EINVAL;
2863 break;
2864
2865 case TCP_QUEUE_SEQ:
2866 if (tp->repair_queue == TCP_SEND_QUEUE)
2867 val = tp->write_seq;
2868 else if (tp->repair_queue == TCP_RECV_QUEUE)
2869 val = tp->rcv_nxt;
2870 else
2871 return -EINVAL;
2872 break;
2873
2635 case TCP_USER_TIMEOUT: 2874 case TCP_USER_TIMEOUT:
2636 val = jiffies_to_msecs(icsk->icsk_user_timeout); 2875 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2637 break; 2876 break;
@@ -2675,7 +2914,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2675{ 2914{
2676 struct sk_buff *segs = ERR_PTR(-EINVAL); 2915 struct sk_buff *segs = ERR_PTR(-EINVAL);
2677 struct tcphdr *th; 2916 struct tcphdr *th;
2678 unsigned thlen; 2917 unsigned int thlen;
2679 unsigned int seq; 2918 unsigned int seq;
2680 __be32 delta; 2919 __be32 delta;
2681 unsigned int oldlen; 2920 unsigned int oldlen;
@@ -3033,9 +3272,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3033 struct scatterlist sg; 3272 struct scatterlist sg;
3034 const struct tcphdr *tp = tcp_hdr(skb); 3273 const struct tcphdr *tp = tcp_hdr(skb);
3035 struct hash_desc *desc = &hp->md5_desc; 3274 struct hash_desc *desc = &hp->md5_desc;
3036 unsigned i; 3275 unsigned int i;
3037 const unsigned head_data_len = skb_headlen(skb) > header_len ? 3276 const unsigned int head_data_len = skb_headlen(skb) > header_len ?
3038 skb_headlen(skb) - header_len : 0; 3277 skb_headlen(skb) - header_len : 0;
3039 const struct skb_shared_info *shi = skb_shinfo(skb); 3278 const struct skb_shared_info *shi = skb_shinfo(skb);
3040 struct sk_buff *frag_iter; 3279 struct sk_buff *frag_iter;
3041 3280
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 257b61789eeb..eb97787be757 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
99 99
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_abc __read_mostly; 101int sysctl_tcp_abc __read_mostly;
102int sysctl_tcp_early_retrans __read_mostly = 2;
102 103
103#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 104#define FLAG_DATA 0x01 /* Incoming frame contained data. */
104#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 105#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -175,7 +176,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
175static void tcp_incr_quickack(struct sock *sk) 176static void tcp_incr_quickack(struct sock *sk)
176{ 177{
177 struct inet_connection_sock *icsk = inet_csk(sk); 178 struct inet_connection_sock *icsk = inet_csk(sk);
178 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); 179 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
179 180
180 if (quickacks == 0) 181 if (quickacks == 0)
181 quickacks = 2; 182 quickacks = 2;
@@ -906,6 +907,7 @@ static void tcp_init_metrics(struct sock *sk)
906 if (dst_metric(dst, RTAX_REORDERING) && 907 if (dst_metric(dst, RTAX_REORDERING) &&
907 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 908 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
908 tcp_disable_fack(tp); 909 tcp_disable_fack(tp);
910 tcp_disable_early_retrans(tp);
909 tp->reordering = dst_metric(dst, RTAX_REORDERING); 911 tp->reordering = dst_metric(dst, RTAX_REORDERING);
910 } 912 }
911 913
@@ -937,7 +939,7 @@ static void tcp_init_metrics(struct sock *sk)
937 tcp_set_rto(sk); 939 tcp_set_rto(sk);
938reset: 940reset:
939 if (tp->srtt == 0) { 941 if (tp->srtt == 0) {
940 /* RFC2988bis: We've failed to get a valid RTT sample from 942 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
941 * 3WHS. This is most likely due to retransmission, 943 * 3WHS. This is most likely due to retransmission,
942 * including spurious one. Reset the RTO back to 3secs 944 * including spurious one. Reset the RTO back to 3secs
943 * from the more aggressive 1sec to avoid more spurious 945 * from the more aggressive 1sec to avoid more spurious
@@ -947,7 +949,7 @@ reset:
947 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; 949 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
948 } 950 }
949 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been 951 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
950 * retransmitted. In light of RFC2988bis' more aggressive 1sec 952 * retransmitted. In light of RFC6298 more aggressive 1sec
951 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK 953 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
952 * retransmission has occurred. 954 * retransmission has occurred.
953 */ 955 */
@@ -979,15 +981,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
979 981
980 NET_INC_STATS_BH(sock_net(sk), mib_idx); 982 NET_INC_STATS_BH(sock_net(sk), mib_idx);
981#if FASTRETRANS_DEBUG > 1 983#if FASTRETRANS_DEBUG > 1
982 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", 984 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
983 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 985 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
984 tp->reordering, 986 tp->reordering,
985 tp->fackets_out, 987 tp->fackets_out,
986 tp->sacked_out, 988 tp->sacked_out,
987 tp->undo_marker ? tp->undo_retrans : 0); 989 tp->undo_marker ? tp->undo_retrans : 0);
988#endif 990#endif
989 tcp_disable_fack(tp); 991 tcp_disable_fack(tp);
990 } 992 }
993
994 if (metric > 0)
995 tcp_disable_early_retrans(tp);
991} 996}
992 997
993/* This must be called before lost_out is incremented */ 998/* This must be called before lost_out is incremented */
@@ -2339,6 +2344,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2339 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2344 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2340} 2345}
2341 2346
2347static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2348{
2349 struct tcp_sock *tp = tcp_sk(sk);
2350 unsigned long delay;
2351
2352 /* Delay early retransmit and entering fast recovery for
2353 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2354 * available, or RTO is scheduled to fire first.
2355 */
2356 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
2357 return false;
2358
2359 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2360 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2361 return false;
2362
2363 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
2364 tp->early_retrans_delayed = 1;
2365 return true;
2366}
2367
2342static inline int tcp_skb_timedout(const struct sock *sk, 2368static inline int tcp_skb_timedout(const struct sock *sk,
2343 const struct sk_buff *skb) 2369 const struct sk_buff *skb)
2344{ 2370{
@@ -2446,7 +2472,7 @@ static inline int tcp_head_timedout(const struct sock *sk)
2446 * Main question: may we further continue forward transmission 2472 * Main question: may we further continue forward transmission
2447 * with the same cwnd? 2473 * with the same cwnd?
2448 */ 2474 */
2449static int tcp_time_to_recover(struct sock *sk) 2475static int tcp_time_to_recover(struct sock *sk, int flag)
2450{ 2476{
2451 struct tcp_sock *tp = tcp_sk(sk); 2477 struct tcp_sock *tp = tcp_sk(sk);
2452 __u32 packets_out; 2478 __u32 packets_out;
@@ -2492,6 +2518,16 @@ static int tcp_time_to_recover(struct sock *sk)
2492 tcp_is_sack(tp) && !tcp_send_head(sk)) 2518 tcp_is_sack(tp) && !tcp_send_head(sk))
2493 return 1; 2519 return 1;
2494 2520
2521 /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
2522 * retransmissions due to small network reorderings, we implement
2523 * Mitigation A.3 in the RFC and delay the retransmission for a short
2524 * interval if appropriate.
2525 */
2526 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2527 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2528 !tcp_may_send_now(sk))
2529 return !tcp_pause_early_retransmit(sk, flag);
2530
2495 return 0; 2531 return 0;
2496} 2532}
2497 2533
@@ -2680,22 +2716,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2680 struct inet_sock *inet = inet_sk(sk); 2716 struct inet_sock *inet = inet_sk(sk);
2681 2717
2682 if (sk->sk_family == AF_INET) { 2718 if (sk->sk_family == AF_INET) {
2683 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", 2719 pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2684 msg, 2720 msg,
2685 &inet->inet_daddr, ntohs(inet->inet_dport), 2721 &inet->inet_daddr, ntohs(inet->inet_dport),
2686 tp->snd_cwnd, tcp_left_out(tp), 2722 tp->snd_cwnd, tcp_left_out(tp),
2687 tp->snd_ssthresh, tp->prior_ssthresh, 2723 tp->snd_ssthresh, tp->prior_ssthresh,
2688 tp->packets_out); 2724 tp->packets_out);
2689 } 2725 }
2690#if IS_ENABLED(CONFIG_IPV6) 2726#if IS_ENABLED(CONFIG_IPV6)
2691 else if (sk->sk_family == AF_INET6) { 2727 else if (sk->sk_family == AF_INET6) {
2692 struct ipv6_pinfo *np = inet6_sk(sk); 2728 struct ipv6_pinfo *np = inet6_sk(sk);
2693 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", 2729 pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2694 msg, 2730 msg,
2695 &np->daddr, ntohs(inet->inet_dport), 2731 &np->daddr, ntohs(inet->inet_dport),
2696 tp->snd_cwnd, tcp_left_out(tp), 2732 tp->snd_cwnd, tcp_left_out(tp),
2697 tp->snd_ssthresh, tp->prior_ssthresh, 2733 tp->snd_ssthresh, tp->prior_ssthresh,
2698 tp->packets_out); 2734 tp->packets_out);
2699 } 2735 }
2700#endif 2736#endif
2701} 2737}
@@ -3025,6 +3061,38 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
3025 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 3061 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
3026} 3062}
3027 3063
3064static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
3065{
3066 struct tcp_sock *tp = tcp_sk(sk);
3067 int mib_idx;
3068
3069 if (tcp_is_reno(tp))
3070 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3071 else
3072 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3073
3074 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3075
3076 tp->high_seq = tp->snd_nxt;
3077 tp->prior_ssthresh = 0;
3078 tp->undo_marker = tp->snd_una;
3079 tp->undo_retrans = tp->retrans_out;
3080
3081 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
3082 if (!ece_ack)
3083 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3084 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
3085 TCP_ECN_queue_cwr(tp);
3086 }
3087
3088 tp->bytes_acked = 0;
3089 tp->snd_cwnd_cnt = 0;
3090 tp->prior_cwnd = tp->snd_cwnd;
3091 tp->prr_delivered = 0;
3092 tp->prr_out = 0;
3093 tcp_set_ca_state(sk, TCP_CA_Recovery);
3094}
3095
3028/* Process an event, which can update packets-in-flight not trivially. 3096/* Process an event, which can update packets-in-flight not trivially.
3029 * Main goal of this function is to calculate new estimate for left_out, 3097 * Main goal of this function is to calculate new estimate for left_out,
3030 * taking into account both packets sitting in receiver's buffer and 3098 * taking into account both packets sitting in receiver's buffer and
@@ -3044,7 +3112,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3044 struct tcp_sock *tp = tcp_sk(sk); 3112 struct tcp_sock *tp = tcp_sk(sk);
3045 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 3113 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
3046 (tcp_fackets_out(tp) > tp->reordering)); 3114 (tcp_fackets_out(tp) > tp->reordering));
3047 int fast_rexmit = 0, mib_idx; 3115 int fast_rexmit = 0;
3048 3116
3049 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 3117 if (WARN_ON(!tp->packets_out && tp->sacked_out))
3050 tp->sacked_out = 0; 3118 tp->sacked_out = 0;
@@ -3128,7 +3196,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3128 if (icsk->icsk_ca_state <= TCP_CA_Disorder) 3196 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3129 tcp_try_undo_dsack(sk); 3197 tcp_try_undo_dsack(sk);
3130 3198
3131 if (!tcp_time_to_recover(sk)) { 3199 if (!tcp_time_to_recover(sk, flag)) {
3132 tcp_try_to_open(sk, flag); 3200 tcp_try_to_open(sk, flag);
3133 return; 3201 return;
3134 } 3202 }
@@ -3145,32 +3213,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3145 } 3213 }
3146 3214
3147 /* Otherwise enter Recovery state */ 3215 /* Otherwise enter Recovery state */
3148 3216 tcp_enter_recovery(sk, (flag & FLAG_ECE));
3149 if (tcp_is_reno(tp))
3150 mib_idx = LINUX_MIB_TCPRENORECOVERY;
3151 else
3152 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
3153
3154 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3155
3156 tp->high_seq = tp->snd_nxt;
3157 tp->prior_ssthresh = 0;
3158 tp->undo_marker = tp->snd_una;
3159 tp->undo_retrans = tp->retrans_out;
3160
3161 if (icsk->icsk_ca_state < TCP_CA_CWR) {
3162 if (!(flag & FLAG_ECE))
3163 tp->prior_ssthresh = tcp_current_ssthresh(sk);
3164 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
3165 TCP_ECN_queue_cwr(tp);
3166 }
3167
3168 tp->bytes_acked = 0;
3169 tp->snd_cwnd_cnt = 0;
3170 tp->prior_cwnd = tp->snd_cwnd;
3171 tp->prr_delivered = 0;
3172 tp->prr_out = 0;
3173 tcp_set_ca_state(sk, TCP_CA_Recovery);
3174 fast_rexmit = 1; 3217 fast_rexmit = 1;
3175 } 3218 }
3176 3219
@@ -3252,16 +3295,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3252/* Restart timer after forward progress on connection. 3295/* Restart timer after forward progress on connection.
3253 * RFC2988 recommends to restart timer to now+rto. 3296 * RFC2988 recommends to restart timer to now+rto.
3254 */ 3297 */
3255static void tcp_rearm_rto(struct sock *sk) 3298void tcp_rearm_rto(struct sock *sk)
3256{ 3299{
3257 const struct tcp_sock *tp = tcp_sk(sk); 3300 struct tcp_sock *tp = tcp_sk(sk);
3258 3301
3259 if (!tp->packets_out) { 3302 if (!tp->packets_out) {
3260 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3303 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3261 } else { 3304 } else {
3262 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 3305 u32 rto = inet_csk(sk)->icsk_rto;
3263 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 3306 /* Offset the time elapsed after installing regular RTO */
3307 if (tp->early_retrans_delayed) {
3308 struct sk_buff *skb = tcp_write_queue_head(sk);
3309 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3310 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3311 /* delta may not be positive if the socket is locked
3312 * when the delayed ER timer fires and is rescheduled.
3313 */
3314 if (delta > 0)
3315 rto = delta;
3316 }
3317 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3318 TCP_RTO_MAX);
3264 } 3319 }
3320 tp->early_retrans_delayed = 0;
3321}
3322
3323/* This function is called when the delayed ER timer fires. TCP enters
3324 * fast recovery and performs fast-retransmit.
3325 */
3326void tcp_resume_early_retransmit(struct sock *sk)
3327{
3328 struct tcp_sock *tp = tcp_sk(sk);
3329
3330 tcp_rearm_rto(sk);
3331
3332 /* Stop if ER is disabled after the delayed ER timer is scheduled */
3333 if (!tp->do_early_retrans)
3334 return;
3335
3336 tcp_enter_recovery(sk, false);
3337 tcp_update_scoreboard(sk, 1);
3338 tcp_xmit_retransmit_queue(sk);
3265} 3339}
3266 3340
3267/* If we get here, the whole TSO packet has not been acked. */ 3341/* If we get here, the whole TSO packet has not been acked. */
@@ -3437,18 +3511,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3437 if (!tp->packets_out && tcp_is_sack(tp)) { 3511 if (!tp->packets_out && tcp_is_sack(tp)) {
3438 icsk = inet_csk(sk); 3512 icsk = inet_csk(sk);
3439 if (tp->lost_out) { 3513 if (tp->lost_out) {
3440 printk(KERN_DEBUG "Leak l=%u %d\n", 3514 pr_debug("Leak l=%u %d\n",
3441 tp->lost_out, icsk->icsk_ca_state); 3515 tp->lost_out, icsk->icsk_ca_state);
3442 tp->lost_out = 0; 3516 tp->lost_out = 0;
3443 } 3517 }
3444 if (tp->sacked_out) { 3518 if (tp->sacked_out) {
3445 printk(KERN_DEBUG "Leak s=%u %d\n", 3519 pr_debug("Leak s=%u %d\n",
3446 tp->sacked_out, icsk->icsk_ca_state); 3520 tp->sacked_out, icsk->icsk_ca_state);
3447 tp->sacked_out = 0; 3521 tp->sacked_out = 0;
3448 } 3522 }
3449 if (tp->retrans_out) { 3523 if (tp->retrans_out) {
3450 printk(KERN_DEBUG "Leak r=%u %d\n", 3524 pr_debug("Leak r=%u %d\n",
3451 tp->retrans_out, icsk->icsk_ca_state); 3525 tp->retrans_out, icsk->icsk_ca_state);
3452 tp->retrans_out = 0; 3526 tp->retrans_out = 0;
3453 } 3527 }
3454 } 3528 }
@@ -3710,6 +3784,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3710 if (after(ack, tp->snd_nxt)) 3784 if (after(ack, tp->snd_nxt))
3711 goto invalid_ack; 3785 goto invalid_ack;
3712 3786
3787 if (tp->early_retrans_delayed)
3788 tcp_rearm_rto(sk);
3789
3713 if (after(ack, prior_snd_una)) 3790 if (after(ack, prior_snd_una))
3714 flag |= FLAG_SND_UNA_ADVANCED; 3791 flag |= FLAG_SND_UNA_ADVANCED;
3715 3792
@@ -3875,10 +3952,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3875 __u8 snd_wscale = *(__u8 *)ptr; 3952 __u8 snd_wscale = *(__u8 *)ptr;
3876 opt_rx->wscale_ok = 1; 3953 opt_rx->wscale_ok = 1;
3877 if (snd_wscale > 14) { 3954 if (snd_wscale > 14) {
3878 if (net_ratelimit()) 3955 net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
3879 pr_info("%s: Illegal window scaling value %d >14 received\n", 3956 __func__,
3880 __func__, 3957 snd_wscale);
3881 snd_wscale);
3882 snd_wscale = 14; 3958 snd_wscale = 14;
3883 } 3959 }
3884 opt_rx->snd_wscale = snd_wscale; 3960 opt_rx->snd_wscale = snd_wscale;
@@ -4434,7 +4510,7 @@ static void tcp_ofo_queue(struct sock *sk)
4434static int tcp_prune_ofo_queue(struct sock *sk); 4510static int tcp_prune_ofo_queue(struct sock *sk);
4435static int tcp_prune_queue(struct sock *sk); 4511static int tcp_prune_queue(struct sock *sk);
4436 4512
4437static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) 4513static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4438{ 4514{
4439 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 4515 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4440 !sk_rmem_schedule(sk, size)) { 4516 !sk_rmem_schedule(sk, size)) {
@@ -4453,6 +4529,102 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4453 return 0; 4529 return 0;
4454} 4530}
4455 4531
4532/**
4533 * tcp_try_coalesce - try to merge skb to prior one
4534 * @sk: socket
4535 * @to: prior buffer
4536 * @from: buffer to add in queue
4537 * @fragstolen: pointer to boolean
4538 *
4539 * Before queueing skb @from after @to, try to merge them
4540 * to reduce overall memory use and queue lengths, if cost is small.
4541 * Packets in ofo or receive queues can stay a long time.
4542 * Better try to coalesce them right now to avoid future collapses.
4543 * Returns true if caller should free @from instead of queueing it
4544 */
4545static bool tcp_try_coalesce(struct sock *sk,
4546 struct sk_buff *to,
4547 struct sk_buff *from,
4548 bool *fragstolen)
4549{
4550 int i, delta, len = from->len;
4551
4552 *fragstolen = false;
4553
4554 if (tcp_hdr(from)->fin || skb_cloned(to))
4555 return false;
4556
4557 if (len <= skb_tailroom(to)) {
4558 BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
4559 goto merge;
4560 }
4561
4562 if (skb_has_frag_list(to) || skb_has_frag_list(from))
4563 return false;
4564
4565 if (skb_headlen(from) != 0) {
4566 struct page *page;
4567 unsigned int offset;
4568
4569 if (skb_shinfo(to)->nr_frags +
4570 skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
4571 return false;
4572
4573 if (skb_head_is_locked(from))
4574 return false;
4575
4576 delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
4577
4578 page = virt_to_head_page(from->head);
4579 offset = from->data - (unsigned char *)page_address(page);
4580
4581 skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
4582 page, offset, skb_headlen(from));
4583 *fragstolen = true;
4584 } else {
4585 if (skb_shinfo(to)->nr_frags +
4586 skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
4587 return false;
4588
4589 delta = from->truesize -
4590 SKB_TRUESIZE(skb_end_pointer(from) - from->head);
4591 }
4592
4593 WARN_ON_ONCE(delta < len);
4594
4595 memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
4596 skb_shinfo(from)->frags,
4597 skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
4598 skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
4599
4600 if (!skb_cloned(from))
4601 skb_shinfo(from)->nr_frags = 0;
4602
4603 /* if the skb is cloned this does nothing since we set nr_frags to 0 */
4604 for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
4605 skb_frag_ref(from, i);
4606
4607 to->truesize += delta;
4608 atomic_add(delta, &sk->sk_rmem_alloc);
4609 sk_mem_charge(sk, delta);
4610 to->len += len;
4611 to->data_len += len;
4612
4613merge:
4614 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4615 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4616 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4617 return true;
4618}
4619
4620static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
4621{
4622 if (head_stolen)
4623 kmem_cache_free(skbuff_head_cache, skb);
4624 else
4625 __kfree_skb(skb);
4626}
4627
4456static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4628static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4457{ 4629{
4458 struct tcp_sock *tp = tcp_sk(sk); 4630 struct tcp_sock *tp = tcp_sk(sk);
@@ -4491,23 +4663,13 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4491 end_seq = TCP_SKB_CB(skb)->end_seq; 4663 end_seq = TCP_SKB_CB(skb)->end_seq;
4492 4664
4493 if (seq == TCP_SKB_CB(skb1)->end_seq) { 4665 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4494 /* Packets in ofo can stay in queue a long time. 4666 bool fragstolen;
4495 * Better try to coalesce them right now 4667
4496 * to avoid future tcp_collapse_ofo_queue(), 4668 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4497 * probably the most expensive function in tcp stack.
4498 */
4499 if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
4500 NET_INC_STATS_BH(sock_net(sk),
4501 LINUX_MIB_TCPRCVCOALESCE);
4502 BUG_ON(skb_copy_bits(skb, 0,
4503 skb_put(skb1, skb->len),
4504 skb->len));
4505 TCP_SKB_CB(skb1)->end_seq = end_seq;
4506 TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
4507 __kfree_skb(skb);
4508 skb = NULL;
4509 } else {
4510 __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4669 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4670 } else {
4671 kfree_skb_partial(skb, fragstolen);
4672 skb = NULL;
4511 } 4673 }
4512 4674
4513 if (!tp->rx_opt.num_sacks || 4675 if (!tp->rx_opt.num_sacks ||
@@ -4583,12 +4745,65 @@ end:
4583 skb_set_owner_r(skb, sk); 4745 skb_set_owner_r(skb, sk);
4584} 4746}
4585 4747
4748static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
4749 bool *fragstolen)
4750{
4751 int eaten;
4752 struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
4753
4754 __skb_pull(skb, hdrlen);
4755 eaten = (tail &&
4756 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4757 tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4758 if (!eaten) {
4759 __skb_queue_tail(&sk->sk_receive_queue, skb);
4760 skb_set_owner_r(skb, sk);
4761 }
4762 return eaten;
4763}
4764
4765int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4766{
4767 struct sk_buff *skb;
4768 struct tcphdr *th;
4769 bool fragstolen;
4770
4771 if (tcp_try_rmem_schedule(sk, size + sizeof(*th)))
4772 goto err;
4773
4774 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4775 if (!skb)
4776 goto err;
4777
4778 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4779 skb_reset_transport_header(skb);
4780 memset(th, 0, sizeof(*th));
4781
4782 if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
4783 goto err_free;
4784
4785 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
4786 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
4787 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4788
4789 if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
4790 WARN_ON_ONCE(fragstolen); /* should not happen */
4791 __kfree_skb(skb);
4792 }
4793 return size;
4794
4795err_free:
4796 kfree_skb(skb);
4797err:
4798 return -ENOMEM;
4799}
4586 4800
4587static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4801static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4588{ 4802{
4589 const struct tcphdr *th = tcp_hdr(skb); 4803 const struct tcphdr *th = tcp_hdr(skb);
4590 struct tcp_sock *tp = tcp_sk(sk); 4804 struct tcp_sock *tp = tcp_sk(sk);
4591 int eaten = -1; 4805 int eaten = -1;
4806 bool fragstolen = false;
4592 4807
4593 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 4808 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4594 goto drop; 4809 goto drop;
@@ -4633,8 +4848,7 @@ queue_and_out:
4633 tcp_try_rmem_schedule(sk, skb->truesize)) 4848 tcp_try_rmem_schedule(sk, skb->truesize))
4634 goto drop; 4849 goto drop;
4635 4850
4636 skb_set_owner_r(skb, sk); 4851 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4637 __skb_queue_tail(&sk->sk_receive_queue, skb);
4638 } 4852 }
4639 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4853 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4640 if (skb->len) 4854 if (skb->len)
@@ -4658,7 +4872,7 @@ queue_and_out:
4658 tcp_fast_path_check(sk); 4872 tcp_fast_path_check(sk);
4659 4873
4660 if (eaten > 0) 4874 if (eaten > 0)
4661 __kfree_skb(skb); 4875 kfree_skb_partial(skb, fragstolen);
4662 else if (!sock_flag(sk, SOCK_DEAD)) 4876 else if (!sock_flag(sk, SOCK_DEAD))
4663 sk->sk_data_ready(sk, 0); 4877 sk->sk_data_ready(sk, 0);
4664 return; 4878 return;
@@ -5437,6 +5651,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5437 } else { 5651 } else {
5438 int eaten = 0; 5652 int eaten = 0;
5439 int copied_early = 0; 5653 int copied_early = 0;
5654 bool fragstolen = false;
5440 5655
5441 if (tp->copied_seq == tp->rcv_nxt && 5656 if (tp->copied_seq == tp->rcv_nxt &&
5442 len - tcp_header_len <= tp->ucopy.len) { 5657 len - tcp_header_len <= tp->ucopy.len) {
@@ -5494,10 +5709,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5494 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); 5709 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
5495 5710
5496 /* Bulk data transfer: receiver */ 5711 /* Bulk data transfer: receiver */
5497 __skb_pull(skb, tcp_header_len); 5712 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5498 __skb_queue_tail(&sk->sk_receive_queue, skb); 5713 &fragstolen);
5499 skb_set_owner_r(skb, sk);
5500 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5501 } 5714 }
5502 5715
5503 tcp_event_data_recv(sk, skb); 5716 tcp_event_data_recv(sk, skb);
@@ -5519,7 +5732,7 @@ no_ack:
5519 else 5732 else
5520#endif 5733#endif
5521 if (eaten) 5734 if (eaten)
5522 __kfree_skb(skb); 5735 kfree_skb_partial(skb, fragstolen);
5523 else 5736 else
5524 sk->sk_data_ready(sk, 0); 5737 sk->sk_data_ready(sk, 0);
5525 return 0; 5738 return 0;
@@ -5563,6 +5776,44 @@ discard:
5563} 5776}
5564EXPORT_SYMBOL(tcp_rcv_established); 5777EXPORT_SYMBOL(tcp_rcv_established);
5565 5778
5779void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5780{
5781 struct tcp_sock *tp = tcp_sk(sk);
5782 struct inet_connection_sock *icsk = inet_csk(sk);
5783
5784 tcp_set_state(sk, TCP_ESTABLISHED);
5785
5786 if (skb != NULL)
5787 security_inet_conn_established(sk, skb);
5788
5789 /* Make sure socket is routed, for correct metrics. */
5790 icsk->icsk_af_ops->rebuild_header(sk);
5791
5792 tcp_init_metrics(sk);
5793
5794 tcp_init_congestion_control(sk);
5795
5796 /* Prevent spurious tcp_cwnd_restart() on first data
5797 * packet.
5798 */
5799 tp->lsndtime = tcp_time_stamp;
5800
5801 tcp_init_buffer_space(sk);
5802
5803 if (sock_flag(sk, SOCK_KEEPOPEN))
5804 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5805
5806 if (!tp->rx_opt.snd_wscale)
5807 __tcp_fast_path_on(tp, tp->snd_wnd);
5808 else
5809 tp->pred_flags = 0;
5810
5811 if (!sock_flag(sk, SOCK_DEAD)) {
5812 sk->sk_state_change(sk);
5813 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5814 }
5815}
5816
5566static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5817static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5567 const struct tcphdr *th, unsigned int len) 5818 const struct tcphdr *th, unsigned int len)
5568{ 5819{
@@ -5695,36 +5946,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5695 } 5946 }
5696 5947
5697 smp_mb(); 5948 smp_mb();
5698 tcp_set_state(sk, TCP_ESTABLISHED);
5699 5949
5700 security_inet_conn_established(sk, skb); 5950 tcp_finish_connect(sk, skb);
5701
5702 /* Make sure socket is routed, for correct metrics. */
5703 icsk->icsk_af_ops->rebuild_header(sk);
5704
5705 tcp_init_metrics(sk);
5706
5707 tcp_init_congestion_control(sk);
5708
5709 /* Prevent spurious tcp_cwnd_restart() on first data
5710 * packet.
5711 */
5712 tp->lsndtime = tcp_time_stamp;
5713
5714 tcp_init_buffer_space(sk);
5715
5716 if (sock_flag(sk, SOCK_KEEPOPEN))
5717 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5718
5719 if (!tp->rx_opt.snd_wscale)
5720 __tcp_fast_path_on(tp, tp->snd_wnd);
5721 else
5722 tp->pred_flags = 0;
5723
5724 if (!sock_flag(sk, SOCK_DEAD)) {
5725 sk->sk_state_change(sk);
5726 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5727 }
5728 5951
5729 if (sk->sk_write_pending || 5952 if (sk->sk_write_pending ||
5730 icsk->icsk_accept_queue.rskq_defer_accept || 5953 icsk->icsk_accept_queue.rskq_defer_accept ||
@@ -5738,8 +5961,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5738 */ 5961 */
5739 inet_csk_schedule_ack(sk); 5962 inet_csk_schedule_ack(sk);
5740 icsk->icsk_ack.lrcvtime = tcp_time_stamp; 5963 icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5741 icsk->icsk_ack.ato = TCP_ATO_MIN;
5742 tcp_incr_quickack(sk);
5743 tcp_enter_quickack_mode(sk); 5964 tcp_enter_quickack_mode(sk);
5744 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5965 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5745 TCP_DELACK_MAX, TCP_RTO_MAX); 5966 TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0cb86ceb652f..2e76ffb66d7c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138} 138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique); 139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
141/* This will initiate an outgoing connection. */ 149/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{ 151{
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
196 /* Reset inherited state */ 204 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0; 205 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0; 206 tp->rx_opt.ts_recent_stamp = 0;
199 tp->write_seq = 0; 207 if (likely(!tp->repair))
208 tp->write_seq = 0;
200 } 209 }
201 210
202 if (tcp_death_row.sysctl_tw_recycle && 211 if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
247 sk->sk_gso_type = SKB_GSO_TCPV4; 256 sk->sk_gso_type = SKB_GSO_TCPV4;
248 sk_setup_caps(sk, &rt->dst); 257 sk_setup_caps(sk, &rt->dst);
249 258
250 if (!tp->write_seq) 259 if (!tp->write_seq && likely(!tp->repair))
251 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 260 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252 inet->inet_daddr, 261 inet->inet_daddr,
253 inet->inet_sport, 262 inet->inet_sport,
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
255 264
256 inet->inet_id = tp->write_seq ^ jiffies; 265 inet->inet_id = tp->write_seq ^ jiffies;
257 266
258 err = tcp_connect(sk); 267 if (likely(!tp->repair))
268 err = tcp_connect(sk);
269 else
270 err = tcp_repair_connect(sk);
271
259 rt = NULL; 272 rt = NULL;
260 if (err) 273 if (err)
261 goto failure; 274 goto failure;
@@ -1226,12 +1239,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1226 NULL, NULL, skb); 1239 NULL, NULL, skb);
1227 1240
1228 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1241 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1229 if (net_ratelimit()) { 1242 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1230 pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1243 &iph->saddr, ntohs(th->source),
1231 &iph->saddr, ntohs(th->source), 1244 &iph->daddr, ntohs(th->dest),
1232 &iph->daddr, ntohs(th->dest), 1245 genhash ? " tcp_v4_calc_md5_hash failed"
1233 genhash ? " tcp_v4_calc_md5_hash failed" : ""); 1246 : "");
1234 }
1235 return 1; 1247 return 1;
1236 } 1248 }
1237 return 0; 1249 return 0;
@@ -1355,7 +1367,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1355 goto drop_and_free; 1367 goto drop_and_free;
1356 1368
1357 if (!want_cookie || tmp_opt.tstamp_ok) 1369 if (!want_cookie || tmp_opt.tstamp_ok)
1358 TCP_ECN_create_request(req, tcp_hdr(skb)); 1370 TCP_ECN_create_request(req, skb);
1359 1371
1360 if (want_cookie) { 1372 if (want_cookie) {
1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1373 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1739,7 +1751,8 @@ process:
1739 if (!tcp_prequeue(sk, skb)) 1751 if (!tcp_prequeue(sk, skb))
1740 ret = tcp_v4_do_rcv(sk, skb); 1752 ret = tcp_v4_do_rcv(sk, skb);
1741 } 1753 }
1742 } else if (unlikely(sk_add_backlog(sk, skb))) { 1754 } else if (unlikely(sk_add_backlog(sk, skb,
1755 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1743 bh_unlock_sock(sk); 1756 bh_unlock_sock(sk);
1744 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1757 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1745 goto discard_and_relse; 1758 goto discard_and_relse;
@@ -1875,64 +1888,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1875static int tcp_v4_init_sock(struct sock *sk) 1888static int tcp_v4_init_sock(struct sock *sk)
1876{ 1889{
1877 struct inet_connection_sock *icsk = inet_csk(sk); 1890 struct inet_connection_sock *icsk = inet_csk(sk);
1878 struct tcp_sock *tp = tcp_sk(sk);
1879
1880 skb_queue_head_init(&tp->out_of_order_queue);
1881 tcp_init_xmit_timers(sk);
1882 tcp_prequeue_init(tp);
1883
1884 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1885 tp->mdev = TCP_TIMEOUT_INIT;
1886 1891
1887 /* So many TCP implementations out there (incorrectly) count the 1892 tcp_init_sock(sk);
1888 * initial SYN frame in their delayed-ACK and congestion control
1889 * algorithms that we must have the following bandaid to talk
1890 * efficiently to them. -DaveM
1891 */
1892 tp->snd_cwnd = TCP_INIT_CWND;
1893
1894 /* See draft-stevens-tcpca-spec-01 for discussion of the
1895 * initialization of these values.
1896 */
1897 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1898 tp->snd_cwnd_clamp = ~0;
1899 tp->mss_cache = TCP_MSS_DEFAULT;
1900
1901 tp->reordering = sysctl_tcp_reordering;
1902 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1903
1904 sk->sk_state = TCP_CLOSE;
1905
1906 sk->sk_write_space = sk_stream_write_space;
1907 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1908 1893
1909 icsk->icsk_af_ops = &ipv4_specific; 1894 icsk->icsk_af_ops = &ipv4_specific;
1910 icsk->icsk_sync_mss = tcp_sync_mss; 1895
1911#ifdef CONFIG_TCP_MD5SIG 1896#ifdef CONFIG_TCP_MD5SIG
1912 tp->af_specific = &tcp_sock_ipv4_specific; 1897 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1913#endif 1898#endif
1914 1899
1915 /* TCP Cookie Transactions */
1916 if (sysctl_tcp_cookie_size > 0) {
1917 /* Default, cookies without s_data_payload. */
1918 tp->cookie_values =
1919 kzalloc(sizeof(*tp->cookie_values),
1920 sk->sk_allocation);
1921 if (tp->cookie_values != NULL)
1922 kref_init(&tp->cookie_values->kref);
1923 }
1924 /* Presumed zeroed, in order of appearance:
1925 * cookie_in_always, cookie_out_never,
1926 * s_data_constant, s_data_in, s_data_out
1927 */
1928 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1929 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1930
1931 local_bh_disable();
1932 sock_update_memcg(sk);
1933 sk_sockets_allocated_inc(sk);
1934 local_bh_enable();
1935
1936 return 0; 1900 return 0;
1937} 1901}
1938 1902
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3cabafb5cdd1..6f6a91832826 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -482,6 +482,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
482 newtp->sacked_out = 0; 482 newtp->sacked_out = 0;
483 newtp->fackets_out = 0; 483 newtp->fackets_out = 0;
484 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 484 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
485 tcp_enable_early_retrans(newtp);
485 486
486 /* So many TCP implementations out there (incorrectly) count the 487 /* So many TCP implementations out there (incorrectly) count the
487 * initial SYN frame in their delayed-ACK and congestion control 488 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ac6423117ad..1a630825c45b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -34,6 +34,8 @@
34 * 34 *
35 */ 35 */
36 36
37#define pr_fmt(fmt) "TCP: " fmt
38
37#include <net/tcp.h> 39#include <net/tcp.h>
38 40
39#include <linux/compiler.h> 41#include <linux/compiler.h>
@@ -78,9 +80,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
78 tp->frto_counter = 3; 80 tp->frto_counter = 3;
79 81
80 tp->packets_out += tcp_skb_pcount(skb); 82 tp->packets_out += tcp_skb_pcount(skb);
81 if (!prior_packets) 83 if (!prior_packets || tp->early_retrans_delayed)
82 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 84 tcp_rearm_rto(sk);
83 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
84} 85}
85 86
86/* SND.NXT, if window was not shrunk. 87/* SND.NXT, if window was not shrunk.
@@ -563,13 +564,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
563/* Compute TCP options for SYN packets. This is not the final 564/* Compute TCP options for SYN packets. This is not the final
564 * network wire format yet. 565 * network wire format yet.
565 */ 566 */
566static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, 567static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
567 struct tcp_out_options *opts, 568 struct tcp_out_options *opts,
568 struct tcp_md5sig_key **md5) 569 struct tcp_md5sig_key **md5)
569{ 570{
570 struct tcp_sock *tp = tcp_sk(sk); 571 struct tcp_sock *tp = tcp_sk(sk);
571 struct tcp_cookie_values *cvp = tp->cookie_values; 572 struct tcp_cookie_values *cvp = tp->cookie_values;
572 unsigned remaining = MAX_TCP_OPTION_SPACE; 573 unsigned int remaining = MAX_TCP_OPTION_SPACE;
573 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? 574 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
574 tcp_cookie_size_check(cvp->cookie_desired) : 575 tcp_cookie_size_check(cvp->cookie_desired) :
575 0; 576 0;
@@ -663,15 +664,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
663} 664}
664 665
665/* Set up TCP options for SYN-ACKs. */ 666/* Set up TCP options for SYN-ACKs. */
666static unsigned tcp_synack_options(struct sock *sk, 667static unsigned int tcp_synack_options(struct sock *sk,
667 struct request_sock *req, 668 struct request_sock *req,
668 unsigned mss, struct sk_buff *skb, 669 unsigned int mss, struct sk_buff *skb,
669 struct tcp_out_options *opts, 670 struct tcp_out_options *opts,
670 struct tcp_md5sig_key **md5, 671 struct tcp_md5sig_key **md5,
671 struct tcp_extend_values *xvp) 672 struct tcp_extend_values *xvp)
672{ 673{
673 struct inet_request_sock *ireq = inet_rsk(req); 674 struct inet_request_sock *ireq = inet_rsk(req);
674 unsigned remaining = MAX_TCP_OPTION_SPACE; 675 unsigned int remaining = MAX_TCP_OPTION_SPACE;
675 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? 676 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
676 xvp->cookie_plus : 677 xvp->cookie_plus :
677 0; 678 0;
@@ -742,13 +743,13 @@ static unsigned tcp_synack_options(struct sock *sk,
742/* Compute TCP options for ESTABLISHED sockets. This is not the 743/* Compute TCP options for ESTABLISHED sockets. This is not the
743 * final wire format yet. 744 * final wire format yet.
744 */ 745 */
745static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, 746static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
746 struct tcp_out_options *opts, 747 struct tcp_out_options *opts,
747 struct tcp_md5sig_key **md5) 748 struct tcp_md5sig_key **md5)
748{ 749{
749 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; 750 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
750 struct tcp_sock *tp = tcp_sk(sk); 751 struct tcp_sock *tp = tcp_sk(sk);
751 unsigned size = 0; 752 unsigned int size = 0;
752 unsigned int eff_sacks; 753 unsigned int eff_sacks;
753 754
754#ifdef CONFIG_TCP_MD5SIG 755#ifdef CONFIG_TCP_MD5SIG
@@ -770,9 +771,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
770 771
771 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; 772 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
772 if (unlikely(eff_sacks)) { 773 if (unlikely(eff_sacks)) {
773 const unsigned remaining = MAX_TCP_OPTION_SPACE - size; 774 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
774 opts->num_sack_blocks = 775 opts->num_sack_blocks =
775 min_t(unsigned, eff_sacks, 776 min_t(unsigned int, eff_sacks,
776 (remaining - TCPOLEN_SACK_BASE_ALIGNED) / 777 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
777 TCPOLEN_SACK_PERBLOCK); 778 TCPOLEN_SACK_PERBLOCK);
778 size += TCPOLEN_SACK_BASE_ALIGNED + 779 size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -801,7 +802,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
801 struct tcp_sock *tp; 802 struct tcp_sock *tp;
802 struct tcp_skb_cb *tcb; 803 struct tcp_skb_cb *tcb;
803 struct tcp_out_options opts; 804 struct tcp_out_options opts;
804 unsigned tcp_options_size, tcp_header_size; 805 unsigned int tcp_options_size, tcp_header_size;
805 struct tcp_md5sig_key *md5; 806 struct tcp_md5sig_key *md5;
806 struct tcphdr *th; 807 struct tcphdr *th;
807 int err; 808 int err;
@@ -1150,7 +1151,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1150} 1151}
1151 1152
1152/* Calculate MSS. Not accounting for SACKs here. */ 1153/* Calculate MSS. Not accounting for SACKs here. */
1153int tcp_mtu_to_mss(const struct sock *sk, int pmtu) 1154int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1154{ 1155{
1155 const struct tcp_sock *tp = tcp_sk(sk); 1156 const struct tcp_sock *tp = tcp_sk(sk);
1156 const struct inet_connection_sock *icsk = inet_csk(sk); 1157 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1161,6 +1162,14 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1161 */ 1162 */
1162 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); 1163 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1163 1164
1165 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1166 if (icsk->icsk_af_ops->net_frag_header_len) {
1167 const struct dst_entry *dst = __sk_dst_get(sk);
1168
1169 if (dst && dst_allfrag(dst))
1170 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1171 }
1172
1164 /* Clamp it (mss_clamp does not include tcp options) */ 1173 /* Clamp it (mss_clamp does not include tcp options) */
1165 if (mss_now > tp->rx_opt.mss_clamp) 1174 if (mss_now > tp->rx_opt.mss_clamp)
1166 mss_now = tp->rx_opt.mss_clamp; 1175 mss_now = tp->rx_opt.mss_clamp;
@@ -1179,7 +1188,7 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
1179} 1188}
1180 1189
1181/* Inverse of above */ 1190/* Inverse of above */
1182int tcp_mss_to_mtu(const struct sock *sk, int mss) 1191int tcp_mss_to_mtu(struct sock *sk, int mss)
1183{ 1192{
1184 const struct tcp_sock *tp = tcp_sk(sk); 1193 const struct tcp_sock *tp = tcp_sk(sk);
1185 const struct inet_connection_sock *icsk = inet_csk(sk); 1194 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1190,6 +1199,13 @@ int tcp_mss_to_mtu(const struct sock *sk, int mss)
1190 icsk->icsk_ext_hdr_len + 1199 icsk->icsk_ext_hdr_len +
1191 icsk->icsk_af_ops->net_header_len; 1200 icsk->icsk_af_ops->net_header_len;
1192 1201
1202 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1203 if (icsk->icsk_af_ops->net_frag_header_len) {
1204 const struct dst_entry *dst = __sk_dst_get(sk);
1205
1206 if (dst && dst_allfrag(dst))
1207 mtu += icsk->icsk_af_ops->net_frag_header_len;
1208 }
1193 return mtu; 1209 return mtu;
1194} 1210}
1195 1211
@@ -1259,7 +1275,7 @@ unsigned int tcp_current_mss(struct sock *sk)
1259 const struct tcp_sock *tp = tcp_sk(sk); 1275 const struct tcp_sock *tp = tcp_sk(sk);
1260 const struct dst_entry *dst = __sk_dst_get(sk); 1276 const struct dst_entry *dst = __sk_dst_get(sk);
1261 u32 mss_now; 1277 u32 mss_now;
1262 unsigned header_len; 1278 unsigned int header_len;
1263 struct tcp_out_options opts; 1279 struct tcp_out_options opts;
1264 struct tcp_md5sig_key *md5; 1280 struct tcp_md5sig_key *md5;
1265 1281
@@ -1390,7 +1406,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
1390 */ 1406 */
1391static inline int tcp_nagle_check(const struct tcp_sock *tp, 1407static inline int tcp_nagle_check(const struct tcp_sock *tp,
1392 const struct sk_buff *skb, 1408 const struct sk_buff *skb,
1393 unsigned mss_now, int nonagle) 1409 unsigned int mss_now, int nonagle)
1394{ 1410{
1395 return skb->len < mss_now && 1411 return skb->len < mss_now &&
1396 ((nonagle & TCP_NAGLE_CORK) || 1412 ((nonagle & TCP_NAGLE_CORK) ||
@@ -2167,8 +2183,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2167 2183
2168#if FASTRETRANS_DEBUG > 0 2184#if FASTRETRANS_DEBUG > 0
2169 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2185 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2170 if (net_ratelimit()) 2186 net_dbg_ratelimited("retrans_out leaked\n");
2171 printk(KERN_DEBUG "retrans_out leaked.\n");
2172 } 2187 }
2173#endif 2188#endif
2174 if (!tp->retrans_out) 2189 if (!tp->retrans_out)
@@ -2402,7 +2417,7 @@ int tcp_send_synack(struct sock *sk)
2402 2417
2403 skb = tcp_write_queue_head(sk); 2418 skb = tcp_write_queue_head(sk);
2404 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 2419 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2405 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2420 pr_debug("%s: wrong queue state\n", __func__);
2406 return -EFAULT; 2421 return -EFAULT;
2407 } 2422 }
2408 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 2423 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
@@ -2562,7 +2577,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2562EXPORT_SYMBOL(tcp_make_synack); 2577EXPORT_SYMBOL(tcp_make_synack);
2563 2578
2564/* Do all connect socket setups that can be done AF independent. */ 2579/* Do all connect socket setups that can be done AF independent. */
2565static void tcp_connect_init(struct sock *sk) 2580void tcp_connect_init(struct sock *sk)
2566{ 2581{
2567 const struct dst_entry *dst = __sk_dst_get(sk); 2582 const struct dst_entry *dst = __sk_dst_get(sk);
2568 struct tcp_sock *tp = tcp_sk(sk); 2583 struct tcp_sock *tp = tcp_sk(sk);
@@ -2617,9 +2632,12 @@ static void tcp_connect_init(struct sock *sk)
2617 tp->snd_una = tp->write_seq; 2632 tp->snd_una = tp->write_seq;
2618 tp->snd_sml = tp->write_seq; 2633 tp->snd_sml = tp->write_seq;
2619 tp->snd_up = tp->write_seq; 2634 tp->snd_up = tp->write_seq;
2620 tp->rcv_nxt = 0; 2635 tp->snd_nxt = tp->write_seq;
2621 tp->rcv_wup = 0; 2636
2622 tp->copied_seq = 0; 2637 if (likely(!tp->repair))
2638 tp->rcv_nxt = 0;
2639 tp->rcv_wup = tp->rcv_nxt;
2640 tp->copied_seq = tp->rcv_nxt;
2623 2641
2624 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; 2642 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2625 inet_csk(sk)->icsk_retransmits = 0; 2643 inet_csk(sk)->icsk_retransmits = 0;
@@ -2642,7 +2660,6 @@ int tcp_connect(struct sock *sk)
2642 /* Reserve space for headers. */ 2660 /* Reserve space for headers. */
2643 skb_reserve(buff, MAX_TCP_HEADER); 2661 skb_reserve(buff, MAX_TCP_HEADER);
2644 2662
2645 tp->snd_nxt = tp->write_seq;
2646 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 2663 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2647 TCP_ECN_send_syn(sk, buff); 2664 TCP_ECN_send_syn(sk, buff);
2648 2665
@@ -2791,6 +2808,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2791 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2808 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2792} 2809}
2793 2810
2811void tcp_send_window_probe(struct sock *sk)
2812{
2813 if (sk->sk_state == TCP_ESTABLISHED) {
2814 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
2815 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
2816 tcp_xmit_probe_skb(sk, 0);
2817 }
2818}
2819
2794/* Initiate keepalive or window probe from timer. */ 2820/* Initiate keepalive or window probe from timer. */
2795int tcp_write_wakeup(struct sock *sk) 2821int tcp_write_wakeup(struct sock *sk)
2796{ 2822{
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index a981cdc0a6e9..4526fe68e60e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void)
91 * Note: arguments must match tcp_rcv_established()! 91 * Note: arguments must match tcp_rcv_established()!
92 */ 92 */
93static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, 93static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94 struct tcphdr *th, unsigned len) 94 struct tcphdr *th, unsigned int len)
95{ 95{
96 const struct tcp_sock *tp = tcp_sk(sk); 96 const struct tcp_sock *tp = tcp_sk(sk);
97 const struct inet_sock *inet = inet_sk(sk); 97 const struct inet_sock *inet = inet_sk(sk);
@@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = {
138 .entry = jtcp_rcv_established, 138 .entry = jtcp_rcv_established,
139}; 139};
140 140
141static int tcpprobe_open(struct inode * inode, struct file * file) 141static int tcpprobe_open(struct inode *inode, struct file *file)
142{ 142{
143 /* Reset (empty) log */ 143 /* Reset (empty) log */
144 spin_lock_bh(&tcp_probe.lock); 144 spin_lock_bh(&tcp_probe.lock);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 34d4a02c2f16..e911e6c523ec 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk)
319 struct tcp_sock *tp = tcp_sk(sk); 319 struct tcp_sock *tp = tcp_sk(sk);
320 struct inet_connection_sock *icsk = inet_csk(sk); 320 struct inet_connection_sock *icsk = inet_csk(sk);
321 321
322 if (tp->early_retrans_delayed) {
323 tcp_resume_early_retransmit(sk);
324 return;
325 }
326
322 if (!tp->packets_out) 327 if (!tp->packets_out)
323 goto out; 328 goto out;
324 329
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fe141052a1be..279fd0846302 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -107,6 +107,7 @@
107#include <net/checksum.h> 107#include <net/checksum.h>
108#include <net/xfrm.h> 108#include <net/xfrm.h>
109#include <trace/events/udp.h> 109#include <trace/events/udp.h>
110#include <linux/static_key.h>
110#include "udp_impl.h" 111#include "udp_impl.h"
111 112
112struct udp_table udp_table __read_mostly; 113struct udp_table udp_table __read_mostly;
@@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
206 207
207 if (!snum) { 208 if (!snum) {
208 int low, high, remaining; 209 int low, high, remaining;
209 unsigned rand; 210 unsigned int rand;
210 unsigned short first, last; 211 unsigned short first, last;
211 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 212 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
212 213
@@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
846 * Get and verify the address. 847 * Get and verify the address.
847 */ 848 */
848 if (msg->msg_name) { 849 if (msg->msg_name) {
849 struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; 850 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
850 if (msg->msg_namelen < sizeof(*usin)) 851 if (msg->msg_namelen < sizeof(*usin))
851 return -EINVAL; 852 return -EINVAL;
852 if (usin->sin_family != AF_INET) { 853 if (usin->sin_family != AF_INET) {
@@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1379 1380
1380} 1381}
1381 1382
1383static struct static_key udp_encap_needed __read_mostly;
1384void udp_encap_enable(void)
1385{
1386 if (!static_key_enabled(&udp_encap_needed))
1387 static_key_slow_inc(&udp_encap_needed);
1388}
1389EXPORT_SYMBOL(udp_encap_enable);
1390
1382/* returns: 1391/* returns:
1383 * -1: error 1392 * -1: error
1384 * 0: success 1393 * 0: success
@@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1400 goto drop; 1409 goto drop;
1401 nf_reset(skb); 1410 nf_reset(skb);
1402 1411
1403 if (up->encap_type) { 1412 if (static_key_false(&udp_encap_needed) && up->encap_type) {
1404 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 1413 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
1405 1414
1406 /* 1415 /*
@@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1470 goto drop; 1479 goto drop;
1471 1480
1472 1481
1473 if (sk_rcvqueues_full(sk, skb)) 1482 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
1474 goto drop; 1483 goto drop;
1475 1484
1476 rc = 0; 1485 rc = 0;
@@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1479 bh_lock_sock(sk); 1488 bh_lock_sock(sk);
1480 if (!sock_owned_by_user(sk)) 1489 if (!sock_owned_by_user(sk))
1481 rc = __udp_queue_rcv_skb(sk, skb); 1490 rc = __udp_queue_rcv_skb(sk, skb);
1482 else if (sk_add_backlog(sk, skb)) { 1491 else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
1483 bh_unlock_sock(sk); 1492 bh_unlock_sock(sk);
1484 goto drop; 1493 goto drop;
1485 } 1494 }
@@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1760 /* FALLTHROUGH */ 1769 /* FALLTHROUGH */
1761 case UDP_ENCAP_L2TPINUDP: 1770 case UDP_ENCAP_L2TPINUDP:
1762 up->encap_type = val; 1771 up->encap_type = val;
1772 udp_encap_enable();
1763 break; 1773 break;
1764 default: 1774 default:
1765 err = -ENOPROTOOPT; 1775 err = -ENOPROTOOPT;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index aaad650d47d9..5a681e298b90 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
25 size_t len, int noblock, int flags, int *addr_len); 25 size_t len, int noblock, int flags, int *addr_len);
26extern int udp_sendpage(struct sock *sk, struct page *page, int offset, 26extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
27 size_t size, int flags); 27 size_t size, int flags);
28extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); 28extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
29extern void udp_destroy_sock(struct sock *sk); 29extern void udp_destroy_sock(struct sock *sk);
30 30
31#ifdef CONFIG_PROC_FS 31#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a0b4c5da8d43..0d3426cb5c4f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
152 152
153 case IPPROTO_AH: 153 case IPPROTO_AH:
154 if (pskb_may_pull(skb, xprth + 8 - skb->data)) { 154 if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
155 __be32 *ah_hdr = (__be32*)xprth; 155 __be32 *ah_hdr = (__be32 *)xprth;
156 156
157 fl4->fl4_ipsec_spi = ah_hdr[1]; 157 fl4->fl4_ipsec_spi = ah_hdr[1];
158 } 158 }
@@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size)
298 xfrm4_state_init(); 298 xfrm4_state_init();
299 xfrm4_policy_init(); 299 xfrm4_policy_init();
300#ifdef CONFIG_SYSCTL 300#ifdef CONFIG_SYSCTL
301 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, 301 sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4",
302 xfrm4_policy_table); 302 xfrm4_policy_table);
303#endif 303#endif
304} 304}
305 305