diff options
Diffstat (limited to 'net/ipv4')
49 files changed, 1088 insertions, 1293 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d183262943d9..20f1cb5c8aba 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -262,8 +262,8 @@ config ARPD | |||
262 | bool "IP: ARP daemon support" | 262 | bool "IP: ARP daemon support" |
263 | ---help--- | 263 | ---help--- |
264 | The kernel maintains an internal cache which maps IP addresses to | 264 | The kernel maintains an internal cache which maps IP addresses to |
265 | hardware addresses on the local network, so that Ethernet/Token Ring/ | 265 | hardware addresses on the local network, so that Ethernet |
266 | etc. frames are sent to the proper address on the physical networking | 266 | frames are sent to the proper address on the physical networking |
267 | layer. Normally, kernel uses the ARP protocol to resolve these | 267 | layer. Normally, kernel uses the ARP protocol to resolve these |
268 | mappings. | 268 | mappings. |
269 | 269 | ||
@@ -312,7 +312,7 @@ config SYN_COOKIES | |||
312 | 312 | ||
313 | config INET_AH | 313 | config INET_AH |
314 | tristate "IP: AH transformation" | 314 | tristate "IP: AH transformation" |
315 | select XFRM | 315 | select XFRM_ALGO |
316 | select CRYPTO | 316 | select CRYPTO |
317 | select CRYPTO_HMAC | 317 | select CRYPTO_HMAC |
318 | select CRYPTO_MD5 | 318 | select CRYPTO_MD5 |
@@ -324,7 +324,7 @@ config INET_AH | |||
324 | 324 | ||
325 | config INET_ESP | 325 | config INET_ESP |
326 | tristate "IP: ESP transformation" | 326 | tristate "IP: ESP transformation" |
327 | select XFRM | 327 | select XFRM_ALGO |
328 | select CRYPTO | 328 | select CRYPTO |
329 | select CRYPTO_AUTHENC | 329 | select CRYPTO_AUTHENC |
330 | select CRYPTO_HMAC | 330 | select CRYPTO_HMAC |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 10e3751466b5..c8f7aee587d1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -350,7 +350,7 @@ lookup_protocol: | |||
350 | err = 0; | 350 | err = 0; |
351 | sk->sk_no_check = answer_no_check; | 351 | sk->sk_no_check = answer_no_check; |
352 | if (INET_PROTOSW_REUSE & answer_flags) | 352 | if (INET_PROTOSW_REUSE & answer_flags) |
353 | sk->sk_reuse = 1; | 353 | sk->sk_reuse = SK_CAN_REUSE; |
354 | 354 | ||
355 | inet = inet_sk(sk); | 355 | inet = inet_sk(sk); |
356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; | 356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; |
@@ -541,7 +541,7 @@ out: | |||
541 | } | 541 | } |
542 | EXPORT_SYMBOL(inet_bind); | 542 | EXPORT_SYMBOL(inet_bind); |
543 | 543 | ||
544 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | 544 | int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, |
545 | int addr_len, int flags) | 545 | int addr_len, int flags) |
546 | { | 546 | { |
547 | struct sock *sk = sock->sk; | 547 | struct sock *sk = sock->sk; |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index fd508b526014..e8f2617ecd47 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, | |||
77 | 77 | ||
78 | static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) | 78 | static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) |
79 | { | 79 | { |
80 | unsigned char * optptr = (unsigned char*)(iph+1); | 80 | unsigned char *optptr = (unsigned char *)(iph+1); |
81 | int l = iph->ihl*4 - sizeof(struct iphdr); | 81 | int l = iph->ihl*4 - sizeof(struct iphdr); |
82 | int optlen; | 82 | int optlen; |
83 | 83 | ||
@@ -406,8 +406,8 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
406 | ah->spi, IPPROTO_AH, AF_INET); | 406 | ah->spi, IPPROTO_AH, AF_INET); |
407 | if (!x) | 407 | if (!x) |
408 | return; | 408 | return; |
409 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 409 | pr_debug("pmtu discovery on SA AH/%08x/%08x\n", |
410 | ntohl(ah->spi), ntohl(iph->daddr)); | 410 | ntohl(ah->spi), ntohl(iph->daddr)); |
411 | xfrm_state_put(x); | 411 | xfrm_state_put(x); |
412 | } | 412 | } |
413 | 413 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 18d9b81ecb1a..cda37be02f8d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -73,6 +73,8 @@ | |||
73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. | 73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. |
74 | */ | 74 | */ |
75 | 75 | ||
76 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
77 | |||
76 | #include <linux/module.h> | 78 | #include <linux/module.h> |
77 | #include <linux/types.h> | 79 | #include <linux/types.h> |
78 | #include <linux/string.h> | 80 | #include <linux/string.h> |
@@ -89,7 +91,6 @@ | |||
89 | #include <linux/etherdevice.h> | 91 | #include <linux/etherdevice.h> |
90 | #include <linux/fddidevice.h> | 92 | #include <linux/fddidevice.h> |
91 | #include <linux/if_arp.h> | 93 | #include <linux/if_arp.h> |
92 | #include <linux/trdevice.h> | ||
93 | #include <linux/skbuff.h> | 94 | #include <linux/skbuff.h> |
94 | #include <linux/proc_fs.h> | 95 | #include <linux/proc_fs.h> |
95 | #include <linux/seq_file.h> | 96 | #include <linux/seq_file.h> |
@@ -193,9 +194,6 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) | |||
193 | case ARPHRD_IEEE802: | 194 | case ARPHRD_IEEE802: |
194 | ip_eth_mc_map(addr, haddr); | 195 | ip_eth_mc_map(addr, haddr); |
195 | return 0; | 196 | return 0; |
196 | case ARPHRD_IEEE802_TR: | ||
197 | ip_tr_mc_map(addr, haddr); | ||
198 | return 0; | ||
199 | case ARPHRD_INFINIBAND: | 197 | case ARPHRD_INFINIBAND: |
200 | ip_ib_mc_map(addr, dev->broadcast, haddr); | 198 | ip_ib_mc_map(addr, dev->broadcast, haddr); |
201 | return 0; | 199 | return 0; |
@@ -364,8 +362,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
364 | probes -= neigh->parms->ucast_probes; | 362 | probes -= neigh->parms->ucast_probes; |
365 | if (probes < 0) { | 363 | if (probes < 0) { |
366 | if (!(neigh->nud_state & NUD_VALID)) | 364 | if (!(neigh->nud_state & NUD_VALID)) |
367 | printk(KERN_DEBUG | 365 | pr_debug("trying to ucast probe in NUD_INVALID\n"); |
368 | "trying to ucast probe in NUD_INVALID\n"); | ||
369 | dst_ha = neigh->ha; | 366 | dst_ha = neigh->ha; |
370 | read_lock_bh(&neigh->lock); | 367 | read_lock_bh(&neigh->lock); |
371 | } else { | 368 | } else { |
@@ -452,7 +449,7 @@ static int arp_set_predefined(int addr_hint, unsigned char *haddr, | |||
452 | { | 449 | { |
453 | switch (addr_hint) { | 450 | switch (addr_hint) { |
454 | case RTN_LOCAL: | 451 | case RTN_LOCAL: |
455 | printk(KERN_DEBUG "ARP: arp called for own IP address\n"); | 452 | pr_debug("arp called for own IP address\n"); |
456 | memcpy(haddr, dev->dev_addr, dev->addr_len); | 453 | memcpy(haddr, dev->dev_addr, dev->addr_len); |
457 | return 1; | 454 | return 1; |
458 | case RTN_MULTICAST: | 455 | case RTN_MULTICAST: |
@@ -473,7 +470,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
473 | struct neighbour *n; | 470 | struct neighbour *n; |
474 | 471 | ||
475 | if (!skb_dst(skb)) { | 472 | if (!skb_dst(skb)) { |
476 | printk(KERN_DEBUG "arp_find is called with dst==NULL\n"); | 473 | pr_debug("arp_find is called with dst==NULL\n"); |
477 | kfree_skb(skb); | 474 | kfree_skb(skb); |
478 | return 1; | 475 | return 1; |
479 | } | 476 | } |
@@ -648,12 +645,6 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
648 | arp->ar_pro = htons(ETH_P_IP); | 645 | arp->ar_pro = htons(ETH_P_IP); |
649 | break; | 646 | break; |
650 | #endif | 647 | #endif |
651 | #if IS_ENABLED(CONFIG_TR) | ||
652 | case ARPHRD_IEEE802_TR: | ||
653 | arp->ar_hrd = htons(ARPHRD_IEEE802); | ||
654 | arp->ar_pro = htons(ETH_P_IP); | ||
655 | break; | ||
656 | #endif | ||
657 | } | 648 | } |
658 | 649 | ||
659 | arp->ar_hln = dev->addr_len; | 650 | arp->ar_hln = dev->addr_len; |
@@ -751,11 +742,10 @@ static int arp_process(struct sk_buff *skb) | |||
751 | goto out; | 742 | goto out; |
752 | break; | 743 | break; |
753 | case ARPHRD_ETHER: | 744 | case ARPHRD_ETHER: |
754 | case ARPHRD_IEEE802_TR: | ||
755 | case ARPHRD_FDDI: | 745 | case ARPHRD_FDDI: |
756 | case ARPHRD_IEEE802: | 746 | case ARPHRD_IEEE802: |
757 | /* | 747 | /* |
758 | * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802 | 748 | * ETHERNET, and Fibre Channel (which are IEEE 802 |
759 | * devices, according to RFC 2625) devices will accept ARP | 749 | * devices, according to RFC 2625) devices will accept ARP |
760 | * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2). | 750 | * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2). |
761 | * This is the case also of FDDI, where the RFC 1390 says that | 751 | * This is the case also of FDDI, where the RFC 1390 says that |
@@ -1059,7 +1049,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1059 | neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); | 1049 | neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); |
1060 | err = PTR_ERR(neigh); | 1050 | err = PTR_ERR(neigh); |
1061 | if (!IS_ERR(neigh)) { | 1051 | if (!IS_ERR(neigh)) { |
1062 | unsigned state = NUD_STALE; | 1052 | unsigned int state = NUD_STALE; |
1063 | if (r->arp_flags & ATF_PERM) | 1053 | if (r->arp_flags & ATF_PERM) |
1064 | state = NUD_PERMANENT; | 1054 | state = NUD_PERMANENT; |
1065 | err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? | 1055 | err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? |
@@ -1071,7 +1061,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1071 | return err; | 1061 | return err; |
1072 | } | 1062 | } |
1073 | 1063 | ||
1074 | static unsigned arp_state_to_flags(struct neighbour *neigh) | 1064 | static unsigned int arp_state_to_flags(struct neighbour *neigh) |
1075 | { | 1065 | { |
1076 | if (neigh->nud_state&NUD_PERMANENT) | 1066 | if (neigh->nud_state&NUD_PERMANENT) |
1077 | return ATF_PERM | ATF_COM; | 1067 | return ATF_PERM | ATF_COM; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6e447ff94dfa..10e15a144e95 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -217,8 +217,7 @@ void in_dev_finish_destroy(struct in_device *idev) | |||
217 | WARN_ON(idev->ifa_list); | 217 | WARN_ON(idev->ifa_list); |
218 | WARN_ON(idev->mc_list); | 218 | WARN_ON(idev->mc_list); |
219 | #ifdef NET_REFCNT_DEBUG | 219 | #ifdef NET_REFCNT_DEBUG |
220 | printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", | 220 | pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); |
221 | idev, dev ? dev->name : "NIL"); | ||
222 | #endif | 221 | #endif |
223 | dev_put(dev); | 222 | dev_put(dev); |
224 | if (!idev->dead) | 223 | if (!idev->dead) |
@@ -1125,7 +1124,7 @@ skip: | |||
1125 | } | 1124 | } |
1126 | } | 1125 | } |
1127 | 1126 | ||
1128 | static inline bool inetdev_valid_mtu(unsigned mtu) | 1127 | static inline bool inetdev_valid_mtu(unsigned int mtu) |
1129 | { | 1128 | { |
1130 | return mtu >= 68; | 1129 | return mtu >= 68; |
1131 | } | 1130 | } |
@@ -1174,7 +1173,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1174 | 1173 | ||
1175 | switch (event) { | 1174 | switch (event) { |
1176 | case NETDEV_REGISTER: | 1175 | case NETDEV_REGISTER: |
1177 | printk(KERN_DEBUG "inetdev_event: bug\n"); | 1176 | pr_debug("%s: bug\n", __func__); |
1178 | RCU_INIT_POINTER(dev->ip_ptr, NULL); | 1177 | RCU_INIT_POINTER(dev->ip_ptr, NULL); |
1179 | break; | 1178 | break; |
1180 | case NETDEV_UP: | 1179 | case NETDEV_UP: |
@@ -1266,17 +1265,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, | |||
1266 | ifm->ifa_scope = ifa->ifa_scope; | 1265 | ifm->ifa_scope = ifa->ifa_scope; |
1267 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; | 1266 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; |
1268 | 1267 | ||
1269 | if (ifa->ifa_address) | 1268 | if ((ifa->ifa_address && |
1270 | NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); | 1269 | nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || |
1271 | 1270 | (ifa->ifa_local && | |
1272 | if (ifa->ifa_local) | 1271 | nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || |
1273 | NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); | 1272 | (ifa->ifa_broadcast && |
1274 | 1273 | nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || | |
1275 | if (ifa->ifa_broadcast) | 1274 | (ifa->ifa_label[0] && |
1276 | NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); | 1275 | nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) |
1277 | 1276 | goto nla_put_failure; | |
1278 | if (ifa->ifa_label[0]) | ||
1279 | NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); | ||
1280 | 1277 | ||
1281 | return nlmsg_end(skb, nlh); | 1278 | return nlmsg_end(skb, nlh); |
1282 | 1279 | ||
@@ -1587,7 +1584,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
1587 | static struct devinet_sysctl_table { | 1584 | static struct devinet_sysctl_table { |
1588 | struct ctl_table_header *sysctl_header; | 1585 | struct ctl_table_header *sysctl_header; |
1589 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; | 1586 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; |
1590 | char *dev_name; | ||
1591 | } devinet_sysctl = { | 1587 | } devinet_sysctl = { |
1592 | .devinet_vars = { | 1588 | .devinet_vars = { |
1593 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", | 1589 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", |
@@ -1629,16 +1625,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1629 | { | 1625 | { |
1630 | int i; | 1626 | int i; |
1631 | struct devinet_sysctl_table *t; | 1627 | struct devinet_sysctl_table *t; |
1632 | 1628 | char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; | |
1633 | #define DEVINET_CTL_PATH_DEV 3 | ||
1634 | |||
1635 | struct ctl_path devinet_ctl_path[] = { | ||
1636 | { .procname = "net", }, | ||
1637 | { .procname = "ipv4", }, | ||
1638 | { .procname = "conf", }, | ||
1639 | { /* to be set */ }, | ||
1640 | { }, | ||
1641 | }; | ||
1642 | 1629 | ||
1643 | t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); | 1630 | t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); |
1644 | if (!t) | 1631 | if (!t) |
@@ -1650,27 +1637,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1650 | t->devinet_vars[i].extra2 = net; | 1637 | t->devinet_vars[i].extra2 = net; |
1651 | } | 1638 | } |
1652 | 1639 | ||
1653 | /* | 1640 | snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); |
1654 | * Make a copy of dev_name, because '.procname' is regarded as const | ||
1655 | * by sysctl and we wouldn't want anyone to change it under our feet | ||
1656 | * (see SIOCSIFNAME). | ||
1657 | */ | ||
1658 | t->dev_name = kstrdup(dev_name, GFP_KERNEL); | ||
1659 | if (!t->dev_name) | ||
1660 | goto free; | ||
1661 | |||
1662 | devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; | ||
1663 | 1641 | ||
1664 | t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, | 1642 | t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); |
1665 | t->devinet_vars); | ||
1666 | if (!t->sysctl_header) | 1643 | if (!t->sysctl_header) |
1667 | goto free_procname; | 1644 | goto free; |
1668 | 1645 | ||
1669 | p->sysctl = t; | 1646 | p->sysctl = t; |
1670 | return 0; | 1647 | return 0; |
1671 | 1648 | ||
1672 | free_procname: | ||
1673 | kfree(t->dev_name); | ||
1674 | free: | 1649 | free: |
1675 | kfree(t); | 1650 | kfree(t); |
1676 | out: | 1651 | out: |
@@ -1686,7 +1661,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) | |||
1686 | 1661 | ||
1687 | cnf->sysctl = NULL; | 1662 | cnf->sysctl = NULL; |
1688 | unregister_net_sysctl_table(t->sysctl_header); | 1663 | unregister_net_sysctl_table(t->sysctl_header); |
1689 | kfree(t->dev_name); | ||
1690 | kfree(t); | 1664 | kfree(t); |
1691 | } | 1665 | } |
1692 | 1666 | ||
@@ -1716,12 +1690,6 @@ static struct ctl_table ctl_forward_entry[] = { | |||
1716 | }, | 1690 | }, |
1717 | { }, | 1691 | { }, |
1718 | }; | 1692 | }; |
1719 | |||
1720 | static __net_initdata struct ctl_path net_ipv4_path[] = { | ||
1721 | { .procname = "net", }, | ||
1722 | { .procname = "ipv4", }, | ||
1723 | { }, | ||
1724 | }; | ||
1725 | #endif | 1693 | #endif |
1726 | 1694 | ||
1727 | static __net_init int devinet_init_net(struct net *net) | 1695 | static __net_init int devinet_init_net(struct net *net) |
@@ -1767,7 +1735,7 @@ static __net_init int devinet_init_net(struct net *net) | |||
1767 | goto err_reg_dflt; | 1735 | goto err_reg_dflt; |
1768 | 1736 | ||
1769 | err = -ENOMEM; | 1737 | err = -ENOMEM; |
1770 | forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); | 1738 | forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); |
1771 | if (forw_hdr == NULL) | 1739 | if (forw_hdr == NULL) |
1772 | goto err_reg_ctl; | 1740 | goto err_reg_ctl; |
1773 | net->ipv4.forw_hdr = forw_hdr; | 1741 | net->ipv4.forw_hdr = forw_hdr; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cbe3a68507cf..3854411fa37c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -136,13 +136,13 @@ static void fib_flush(struct net *net) | |||
136 | * Find address type as if only "dev" was present in the system. If | 136 | * Find address type as if only "dev" was present in the system. If |
137 | * on_dev is NULL then all interfaces are taken into consideration. | 137 | * on_dev is NULL then all interfaces are taken into consideration. |
138 | */ | 138 | */ |
139 | static inline unsigned __inet_dev_addr_type(struct net *net, | 139 | static inline unsigned int __inet_dev_addr_type(struct net *net, |
140 | const struct net_device *dev, | 140 | const struct net_device *dev, |
141 | __be32 addr) | 141 | __be32 addr) |
142 | { | 142 | { |
143 | struct flowi4 fl4 = { .daddr = addr }; | 143 | struct flowi4 fl4 = { .daddr = addr }; |
144 | struct fib_result res; | 144 | struct fib_result res; |
145 | unsigned ret = RTN_BROADCAST; | 145 | unsigned int ret = RTN_BROADCAST; |
146 | struct fib_table *local_table; | 146 | struct fib_table *local_table; |
147 | 147 | ||
148 | if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) | 148 | if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) |
@@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) | |||
740 | #define BRD_OK 2 | 740 | #define BRD_OK 2 |
741 | #define BRD0_OK 4 | 741 | #define BRD0_OK 4 |
742 | #define BRD1_OK 8 | 742 | #define BRD1_OK 8 |
743 | unsigned ok = 0; | 743 | unsigned int ok = 0; |
744 | int subnet = 0; /* Primary network */ | 744 | int subnet = 0; /* Primary network */ |
745 | int gone = 1; /* Address is missing */ | 745 | int gone = 1; /* Address is missing */ |
746 | int same_prefsrc = 0; /* Another primary with same IP */ | 746 | int same_prefsrc = 0; /* Another primary with same IP */ |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 799fc790b3cf..2d043f71ef70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
221 | frh->src_len = rule4->src_len; | 221 | frh->src_len = rule4->src_len; |
222 | frh->tos = rule4->tos; | 222 | frh->tos = rule4->tos; |
223 | 223 | ||
224 | if (rule4->dst_len) | 224 | if ((rule4->dst_len && |
225 | NLA_PUT_BE32(skb, FRA_DST, rule4->dst); | 225 | nla_put_be32(skb, FRA_DST, rule4->dst)) || |
226 | 226 | (rule4->src_len && | |
227 | if (rule4->src_len) | 227 | nla_put_be32(skb, FRA_SRC, rule4->src))) |
228 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 228 | goto nla_put_failure; |
229 | |||
230 | #ifdef CONFIG_IP_ROUTE_CLASSID | 229 | #ifdef CONFIG_IP_ROUTE_CLASSID |
231 | if (rule4->tclassid) | 230 | if (rule4->tclassid && |
232 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 231 | nla_put_u32(skb, FRA_FLOW, rule4->tclassid)) |
232 | goto nla_put_failure; | ||
233 | #endif | 233 | #endif |
234 | return 0; | 234 | return 0; |
235 | 235 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5063fa38ac7b..a8bdf7405433 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
931 | rtm->rtm_table = tb_id; | 931 | rtm->rtm_table = tb_id; |
932 | else | 932 | else |
933 | rtm->rtm_table = RT_TABLE_COMPAT; | 933 | rtm->rtm_table = RT_TABLE_COMPAT; |
934 | NLA_PUT_U32(skb, RTA_TABLE, tb_id); | 934 | if (nla_put_u32(skb, RTA_TABLE, tb_id)) |
935 | goto nla_put_failure; | ||
935 | rtm->rtm_type = type; | 936 | rtm->rtm_type = type; |
936 | rtm->rtm_flags = fi->fib_flags; | 937 | rtm->rtm_flags = fi->fib_flags; |
937 | rtm->rtm_scope = fi->fib_scope; | 938 | rtm->rtm_scope = fi->fib_scope; |
938 | rtm->rtm_protocol = fi->fib_protocol; | 939 | rtm->rtm_protocol = fi->fib_protocol; |
939 | 940 | ||
940 | if (rtm->rtm_dst_len) | 941 | if (rtm->rtm_dst_len && |
941 | NLA_PUT_BE32(skb, RTA_DST, dst); | 942 | nla_put_be32(skb, RTA_DST, dst)) |
942 | 943 | goto nla_put_failure; | |
943 | if (fi->fib_priority) | 944 | if (fi->fib_priority && |
944 | NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); | 945 | nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) |
945 | 946 | goto nla_put_failure; | |
946 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) | 947 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) |
947 | goto nla_put_failure; | 948 | goto nla_put_failure; |
948 | 949 | ||
949 | if (fi->fib_prefsrc) | 950 | if (fi->fib_prefsrc && |
950 | NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); | 951 | nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) |
951 | 952 | goto nla_put_failure; | |
952 | if (fi->fib_nhs == 1) { | 953 | if (fi->fib_nhs == 1) { |
953 | if (fi->fib_nh->nh_gw) | 954 | if (fi->fib_nh->nh_gw && |
954 | NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); | 955 | nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) |
955 | 956 | goto nla_put_failure; | |
956 | if (fi->fib_nh->nh_oif) | 957 | if (fi->fib_nh->nh_oif && |
957 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 958 | nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) |
959 | goto nla_put_failure; | ||
958 | #ifdef CONFIG_IP_ROUTE_CLASSID | 960 | #ifdef CONFIG_IP_ROUTE_CLASSID |
959 | if (fi->fib_nh[0].nh_tclassid) | 961 | if (fi->fib_nh[0].nh_tclassid && |
960 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 962 | nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) |
963 | goto nla_put_failure; | ||
961 | #endif | 964 | #endif |
962 | } | 965 | } |
963 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 966 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
978 | rtnh->rtnh_hops = nh->nh_weight - 1; | 981 | rtnh->rtnh_hops = nh->nh_weight - 1; |
979 | rtnh->rtnh_ifindex = nh->nh_oif; | 982 | rtnh->rtnh_ifindex = nh->nh_oif; |
980 | 983 | ||
981 | if (nh->nh_gw) | 984 | if (nh->nh_gw && |
982 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 985 | nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) |
986 | goto nla_put_failure; | ||
983 | #ifdef CONFIG_IP_ROUTE_CLASSID | 987 | #ifdef CONFIG_IP_ROUTE_CLASSID |
984 | if (nh->nh_tclassid) | 988 | if (nh->nh_tclassid && |
985 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 989 | nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) |
990 | goto nla_put_failure; | ||
986 | #endif | 991 | #endif |
987 | /* length of rtnetlink header + attributes */ | 992 | /* length of rtnetlink header + attributes */ |
988 | rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; | 993 | rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2cb2bf845641..c75efbdc71cb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -713,11 +713,10 @@ static void icmp_unreach(struct sk_buff *skb) | |||
713 | 713 | ||
714 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && | 714 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && |
715 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { | 715 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { |
716 | if (net_ratelimit()) | 716 | net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", |
717 | pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", | 717 | &ip_hdr(skb)->saddr, |
718 | &ip_hdr(skb)->saddr, | 718 | icmph->type, icmph->code, |
719 | icmph->type, icmph->code, | 719 | &iph->daddr, skb->dev->name); |
720 | &iph->daddr, skb->dev->name); | ||
721 | goto out; | 720 | goto out; |
722 | } | 721 | } |
723 | 722 | ||
@@ -906,8 +905,7 @@ out_err: | |||
906 | static void icmp_address(struct sk_buff *skb) | 905 | static void icmp_address(struct sk_buff *skb) |
907 | { | 906 | { |
908 | #if 0 | 907 | #if 0 |
909 | if (net_ratelimit()) | 908 | net_dbg_ratelimited("a guy asks for address mask. Who is it?\n"); |
910 | printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); | ||
911 | #endif | 909 | #endif |
912 | } | 910 | } |
913 | 911 | ||
@@ -943,10 +941,10 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
943 | inet_ifa_match(ip_hdr(skb)->saddr, ifa)) | 941 | inet_ifa_match(ip_hdr(skb)->saddr, ifa)) |
944 | break; | 942 | break; |
945 | } | 943 | } |
946 | if (!ifa && net_ratelimit()) { | 944 | if (!ifa) |
947 | pr_info("Wrong address mask %pI4 from %s/%pI4\n", | 945 | net_info_ratelimited("Wrong address mask %pI4 from %s/%pI4\n", |
948 | mp, dev->name, &ip_hdr(skb)->saddr); | 946 | mp, |
949 | } | 947 | dev->name, &ip_hdr(skb)->saddr); |
950 | } | 948 | } |
951 | } | 949 | } |
952 | 950 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5dfecfd7d5e9..6699f23e6f55 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
344 | pip->protocol = IPPROTO_IGMP; | 344 | pip->protocol = IPPROTO_IGMP; |
345 | pip->tot_len = 0; /* filled in later */ | 345 | pip->tot_len = 0; /* filled in later */ |
346 | ip_select_ident(pip, &rt->dst, NULL); | 346 | ip_select_ident(pip, &rt->dst, NULL); |
347 | ((u8*)&pip[1])[0] = IPOPT_RA; | 347 | ((u8 *)&pip[1])[0] = IPOPT_RA; |
348 | ((u8*)&pip[1])[1] = 4; | 348 | ((u8 *)&pip[1])[1] = 4; |
349 | ((u8*)&pip[1])[2] = 0; | 349 | ((u8 *)&pip[1])[2] = 0; |
350 | ((u8*)&pip[1])[3] = 0; | 350 | ((u8 *)&pip[1])[3] = 0; |
351 | 351 | ||
352 | skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; | 352 | skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; |
353 | skb_put(skb, sizeof(*pig)); | 353 | skb_put(skb, sizeof(*pig)); |
@@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
688 | iph->saddr = fl4.saddr; | 688 | iph->saddr = fl4.saddr; |
689 | iph->protocol = IPPROTO_IGMP; | 689 | iph->protocol = IPPROTO_IGMP; |
690 | ip_select_ident(iph, &rt->dst, NULL); | 690 | ip_select_ident(iph, &rt->dst, NULL); |
691 | ((u8*)&iph[1])[0] = IPOPT_RA; | 691 | ((u8 *)&iph[1])[0] = IPOPT_RA; |
692 | ((u8*)&iph[1])[1] = 4; | 692 | ((u8 *)&iph[1])[1] = 4; |
693 | ((u8*)&iph[1])[2] = 0; | 693 | ((u8 *)&iph[1])[2] = 0; |
694 | ((u8*)&iph[1])[3] = 0; | 694 | ((u8 *)&iph[1])[3] = 0; |
695 | 695 | ||
696 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); | 696 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); |
697 | ih->type = type; | 697 | ih->type = type; |
@@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) | |||
774 | if (psf->sf_count[MCAST_INCLUDE] || | 774 | if (psf->sf_count[MCAST_INCLUDE] || |
775 | pmc->sfcount[MCAST_EXCLUDE] != | 775 | pmc->sfcount[MCAST_EXCLUDE] != |
776 | psf->sf_count[MCAST_EXCLUDE]) | 776 | psf->sf_count[MCAST_EXCLUDE]) |
777 | continue; | 777 | break; |
778 | if (srcs[i] == psf->sf_inaddr) { | 778 | if (srcs[i] == psf->sf_inaddr) { |
779 | scount++; | 779 | scount++; |
780 | break; | 780 | break; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 19d66cefd7d3..95e61596e605 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports); | |||
42 | 42 | ||
43 | void inet_get_local_port_range(int *low, int *high) | 43 | void inet_get_local_port_range(int *low, int *high) |
44 | { | 44 | { |
45 | unsigned seq; | 45 | unsigned int seq; |
46 | |||
46 | do { | 47 | do { |
47 | seq = read_seqbegin(&sysctl_local_ports.lock); | 48 | seq = read_seqbegin(&sysctl_local_ports.lock); |
48 | 49 | ||
@@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high) | |||
53 | EXPORT_SYMBOL(inet_get_local_port_range); | 54 | EXPORT_SYMBOL(inet_get_local_port_range); |
54 | 55 | ||
55 | int inet_csk_bind_conflict(const struct sock *sk, | 56 | int inet_csk_bind_conflict(const struct sock *sk, |
56 | const struct inet_bind_bucket *tb) | 57 | const struct inet_bind_bucket *tb, bool relax) |
57 | { | 58 | { |
58 | struct sock *sk2; | 59 | struct sock *sk2; |
59 | struct hlist_node *node; | 60 | struct hlist_node *node; |
@@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | 80 | sk2_rcv_saddr == sk_rcv_saddr(sk)) |
80 | break; | 81 | break; |
81 | } | 82 | } |
83 | if (!relax && reuse && sk2->sk_reuse && | ||
84 | sk2->sk_state != TCP_LISTEN) { | ||
85 | const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); | ||
86 | |||
87 | if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || | ||
88 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | ||
89 | break; | ||
90 | } | ||
82 | } | 91 | } |
83 | } | 92 | } |
84 | return node != NULL; | 93 | return node != NULL; |
@@ -122,12 +131,13 @@ again: | |||
122 | (tb->num_owners < smallest_size || smallest_size == -1)) { | 131 | (tb->num_owners < smallest_size || smallest_size == -1)) { |
123 | smallest_size = tb->num_owners; | 132 | smallest_size = tb->num_owners; |
124 | smallest_rover = rover; | 133 | smallest_rover = rover; |
125 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { | 134 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && |
135 | !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { | ||
126 | snum = smallest_rover; | 136 | snum = smallest_rover; |
127 | goto tb_found; | 137 | goto tb_found; |
128 | } | 138 | } |
129 | } | 139 | } |
130 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 140 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { |
131 | snum = rover; | 141 | snum = rover; |
132 | goto tb_found; | 142 | goto tb_found; |
133 | } | 143 | } |
@@ -172,18 +182,22 @@ have_snum: | |||
172 | goto tb_not_found; | 182 | goto tb_not_found; |
173 | tb_found: | 183 | tb_found: |
174 | if (!hlist_empty(&tb->owners)) { | 184 | if (!hlist_empty(&tb->owners)) { |
185 | if (sk->sk_reuse == SK_FORCE_REUSE) | ||
186 | goto success; | ||
187 | |||
175 | if (tb->fastreuse > 0 && | 188 | if (tb->fastreuse > 0 && |
176 | sk->sk_reuse && sk->sk_state != TCP_LISTEN && | 189 | sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
177 | smallest_size == -1) { | 190 | smallest_size == -1) { |
178 | goto success; | 191 | goto success; |
179 | } else { | 192 | } else { |
180 | ret = 1; | 193 | ret = 1; |
181 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 194 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { |
182 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && | 195 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
183 | smallest_size != -1 && --attempts >= 0) { | 196 | smallest_size != -1 && --attempts >= 0) { |
184 | spin_unlock(&head->lock); | 197 | spin_unlock(&head->lock); |
185 | goto again; | 198 | goto again; |
186 | } | 199 | } |
200 | |||
187 | goto fail_unlock; | 201 | goto fail_unlock; |
188 | } | 202 | } |
189 | } | 203 | } |
@@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
514 | 528 | ||
515 | /* Normally all the openreqs are young and become mature | 529 | /* Normally all the openreqs are young and become mature |
516 | * (i.e. converted to established socket) for first timeout. | 530 | * (i.e. converted to established socket) for first timeout. |
517 | * If synack was not acknowledged for 3 seconds, it means | 531 | * If synack was not acknowledged for 1 second, it means |
518 | * one of the following things: synack was lost, ack was lost, | 532 | * one of the following things: synack was lost, ack was lost, |
519 | * rtt is high or nobody planned to ack (i.e. synflood). | 533 | * rtt is high or nobody planned to ack (i.e. synflood). |
520 | * When server is a bit loaded, queue is populated with old | 534 | * When server is a bit loaded, queue is populated with old |
@@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
555 | syn_ack_recalc(req, thresh, max_retries, | 569 | syn_ack_recalc(req, thresh, max_retries, |
556 | queue->rskq_defer_accept, | 570 | queue->rskq_defer_accept, |
557 | &expire, &resend); | 571 | &expire, &resend); |
558 | if (req->rsk_ops->syn_ack_timeout) | 572 | req->rsk_ops->syn_ack_timeout(parent, req); |
559 | req->rsk_ops->syn_ack_timeout(parent, req); | ||
560 | if (!expire && | 573 | if (!expire && |
561 | (!resend || | 574 | (!resend || |
562 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || | 575 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8f8db724bfaf..46d1e7199a8c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -999,12 +999,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) | |||
999 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); | 999 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | static struct sock_diag_handler inet_diag_handler = { | 1002 | static const struct sock_diag_handler inet_diag_handler = { |
1003 | .family = AF_INET, | 1003 | .family = AF_INET, |
1004 | .dump = inet_diag_handler_dump, | 1004 | .dump = inet_diag_handler_dump, |
1005 | }; | 1005 | }; |
1006 | 1006 | ||
1007 | static struct sock_diag_handler inet6_diag_handler = { | 1007 | static const struct sock_diag_handler inet6_diag_handler = { |
1008 | .family = AF_INET6, | 1008 | .family = AF_INET6, |
1009 | .dump = inet_diag_handler_dump, | 1009 | .dump = inet_diag_handler_dump, |
1010 | }; | 1010 | }; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 984ec656b03b..7880af970208 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -217,7 +217,7 @@ begin: | |||
217 | } | 217 | } |
218 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 218 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
219 | 219 | ||
220 | struct sock * __inet_lookup_established(struct net *net, | 220 | struct sock *__inet_lookup_established(struct net *net, |
221 | struct inet_hashinfo *hashinfo, | 221 | struct inet_hashinfo *hashinfo, |
222 | const __be32 saddr, const __be16 sport, | 222 | const __be32 saddr, const __be16 sport, |
223 | const __be32 daddr, const u16 hnum, | 223 | const __be32 daddr, const u16 hnum, |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 89168c6351ff..2784db3155fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -89,8 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
89 | 89 | ||
90 | #ifdef SOCK_REFCNT_DEBUG | 90 | #ifdef SOCK_REFCNT_DEBUG |
91 | if (atomic_read(&tw->tw_refcnt) != 1) { | 91 | if (atomic_read(&tw->tw_refcnt) != 1) { |
92 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", | 92 | pr_debug("%s timewait_sock %p refcnt=%d\n", |
93 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | 93 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); |
94 | } | 94 | } |
95 | #endif | 95 | #endif |
96 | while (refcnt) { | 96 | while (refcnt) { |
@@ -263,7 +263,7 @@ rescan: | |||
263 | void inet_twdr_hangman(unsigned long data) | 263 | void inet_twdr_hangman(unsigned long data) |
264 | { | 264 | { |
265 | struct inet_timewait_death_row *twdr; | 265 | struct inet_timewait_death_row *twdr; |
266 | int unsigned need_timer; | 266 | unsigned int need_timer; |
267 | 267 | ||
268 | twdr = (struct inet_timewait_death_row *)data; | 268 | twdr = (struct inet_timewait_death_row *)data; |
269 | spin_lock(&twdr->death_lock); | 269 | spin_lock(&twdr->death_lock); |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 29a07b6c7168..e5c44fc586ab 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | static int ip_forward_finish(struct sk_buff *skb) | 42 | static int ip_forward_finish(struct sk_buff *skb) |
43 | { | 43 | { |
44 | struct ip_options * opt = &(IPCB(skb)->opt); | 44 | struct ip_options *opt = &(IPCB(skb)->opt); |
45 | 45 | ||
46 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); | 46 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); |
47 | 47 | ||
@@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb) | |||
55 | { | 55 | { |
56 | struct iphdr *iph; /* Our header */ | 56 | struct iphdr *iph; /* Our header */ |
57 | struct rtable *rt; /* Route we use */ | 57 | struct rtable *rt; /* Route we use */ |
58 | struct ip_options * opt = &(IPCB(skb)->opt); | 58 | struct ip_options *opt = &(IPCB(skb)->opt); |
59 | 59 | ||
60 | if (skb_warn_if_lro(skb)) | 60 | if (skb_warn_if_lro(skb)) |
61 | goto drop; | 61 | goto drop; |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3727e234c884..9f9bd139335f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -569,7 +569,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
569 | skb_morph(head, qp->q.fragments); | 569 | skb_morph(head, qp->q.fragments); |
570 | head->next = qp->q.fragments->next; | 570 | head->next = qp->q.fragments->next; |
571 | 571 | ||
572 | kfree_skb(qp->q.fragments); | 572 | consume_skb(qp->q.fragments); |
573 | qp->q.fragments = head; | 573 | qp->q.fragments = head; |
574 | } | 574 | } |
575 | 575 | ||
@@ -644,8 +644,7 @@ out_nomem: | |||
644 | err = -ENOMEM; | 644 | err = -ENOMEM; |
645 | goto out_fail; | 645 | goto out_fail; |
646 | out_oversize: | 646 | out_oversize: |
647 | if (net_ratelimit()) | 647 | net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); |
648 | pr_info("Oversized IP packet from %pI4\n", &qp->saddr); | ||
649 | out_fail: | 648 | out_fail: |
650 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 649 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
651 | return err; | 650 | return err; |
@@ -782,7 +781,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) | |||
782 | table[2].data = &net->ipv4.frags.timeout; | 781 | table[2].data = &net->ipv4.frags.timeout; |
783 | } | 782 | } |
784 | 783 | ||
785 | hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); | 784 | hdr = register_net_sysctl(net, "net/ipv4", table); |
786 | if (hdr == NULL) | 785 | if (hdr == NULL) |
787 | goto err_reg; | 786 | goto err_reg; |
788 | 787 | ||
@@ -807,7 +806,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) | |||
807 | 806 | ||
808 | static void ip4_frags_ctl_register(void) | 807 | static void ip4_frags_ctl_register(void) |
809 | { | 808 | { |
810 | register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); | 809 | register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); |
811 | } | 810 | } |
812 | #else | 811 | #else |
813 | static inline int ip4_frags_ns_ctl_register(struct net *net) | 812 | static inline int ip4_frags_ns_ctl_register(struct net *net) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b57532d4742c..f49047b79609 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -169,37 +169,56 @@ struct ipgre_net { | |||
169 | 169 | ||
170 | /* often modified stats are per cpu, other are shared (netdev->stats) */ | 170 | /* often modified stats are per cpu, other are shared (netdev->stats) */ |
171 | struct pcpu_tstats { | 171 | struct pcpu_tstats { |
172 | unsigned long rx_packets; | 172 | u64 rx_packets; |
173 | unsigned long rx_bytes; | 173 | u64 rx_bytes; |
174 | unsigned long tx_packets; | 174 | u64 tx_packets; |
175 | unsigned long tx_bytes; | 175 | u64 tx_bytes; |
176 | } __attribute__((aligned(4*sizeof(unsigned long)))); | 176 | struct u64_stats_sync syncp; |
177 | }; | ||
177 | 178 | ||
178 | static struct net_device_stats *ipgre_get_stats(struct net_device *dev) | 179 | static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, |
180 | struct rtnl_link_stats64 *tot) | ||
179 | { | 181 | { |
180 | struct pcpu_tstats sum = { 0 }; | ||
181 | int i; | 182 | int i; |
182 | 183 | ||
183 | for_each_possible_cpu(i) { | 184 | for_each_possible_cpu(i) { |
184 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | 185 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); |
185 | 186 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | |
186 | sum.rx_packets += tstats->rx_packets; | 187 | unsigned int start; |
187 | sum.rx_bytes += tstats->rx_bytes; | 188 | |
188 | sum.tx_packets += tstats->tx_packets; | 189 | do { |
189 | sum.tx_bytes += tstats->tx_bytes; | 190 | start = u64_stats_fetch_begin_bh(&tstats->syncp); |
191 | rx_packets = tstats->rx_packets; | ||
192 | tx_packets = tstats->tx_packets; | ||
193 | rx_bytes = tstats->rx_bytes; | ||
194 | tx_bytes = tstats->tx_bytes; | ||
195 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
196 | |||
197 | tot->rx_packets += rx_packets; | ||
198 | tot->tx_packets += tx_packets; | ||
199 | tot->rx_bytes += rx_bytes; | ||
200 | tot->tx_bytes += tx_bytes; | ||
190 | } | 201 | } |
191 | dev->stats.rx_packets = sum.rx_packets; | 202 | |
192 | dev->stats.rx_bytes = sum.rx_bytes; | 203 | tot->multicast = dev->stats.multicast; |
193 | dev->stats.tx_packets = sum.tx_packets; | 204 | tot->rx_crc_errors = dev->stats.rx_crc_errors; |
194 | dev->stats.tx_bytes = sum.tx_bytes; | 205 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; |
195 | return &dev->stats; | 206 | tot->rx_length_errors = dev->stats.rx_length_errors; |
207 | tot->rx_errors = dev->stats.rx_errors; | ||
208 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
209 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
210 | tot->tx_dropped = dev->stats.tx_dropped; | ||
211 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
212 | tot->tx_errors = dev->stats.tx_errors; | ||
213 | |||
214 | return tot; | ||
196 | } | 215 | } |
197 | 216 | ||
198 | /* Given src, dst and key, find appropriate for input tunnel. */ | 217 | /* Given src, dst and key, find appropriate for input tunnel. */ |
199 | 218 | ||
200 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | 219 | static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, |
201 | __be32 remote, __be32 local, | 220 | __be32 remote, __be32 local, |
202 | __be32 key, __be16 gre_proto) | 221 | __be32 key, __be16 gre_proto) |
203 | { | 222 | { |
204 | struct net *net = dev_net(dev); | 223 | struct net *net = dev_net(dev); |
205 | int link = dev->ifindex; | 224 | int link = dev->ifindex; |
@@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
464 | */ | 483 | */ |
465 | 484 | ||
466 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 485 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
467 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 486 | __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); |
468 | int grehlen = (iph->ihl<<2) + 4; | 487 | int grehlen = (iph->ihl<<2) + 4; |
469 | const int type = icmp_hdr(skb)->type; | 488 | const int type = icmp_hdr(skb)->type; |
470 | const int code = icmp_hdr(skb)->code; | 489 | const int code = icmp_hdr(skb)->code; |
@@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
574 | 593 | ||
575 | iph = ip_hdr(skb); | 594 | iph = ip_hdr(skb); |
576 | h = skb->data; | 595 | h = skb->data; |
577 | flags = *(__be16*)h; | 596 | flags = *(__be16 *)h; |
578 | 597 | ||
579 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { | 598 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { |
580 | /* - Version must be 0. | 599 | /* - Version must be 0. |
@@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
598 | offset += 4; | 617 | offset += 4; |
599 | } | 618 | } |
600 | if (flags&GRE_KEY) { | 619 | if (flags&GRE_KEY) { |
601 | key = *(__be32*)(h + offset); | 620 | key = *(__be32 *)(h + offset); |
602 | offset += 4; | 621 | offset += 4; |
603 | } | 622 | } |
604 | if (flags&GRE_SEQ) { | 623 | if (flags&GRE_SEQ) { |
605 | seqno = ntohl(*(__be32*)(h + offset)); | 624 | seqno = ntohl(*(__be32 *)(h + offset)); |
606 | offset += 4; | 625 | offset += 4; |
607 | } | 626 | } |
608 | } | 627 | } |
@@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
672 | } | 691 | } |
673 | 692 | ||
674 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 693 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
694 | u64_stats_update_begin(&tstats->syncp); | ||
675 | tstats->rx_packets++; | 695 | tstats->rx_packets++; |
676 | tstats->rx_bytes += skb->len; | 696 | tstats->rx_bytes += skb->len; |
697 | u64_stats_update_end(&tstats->syncp); | ||
677 | 698 | ||
678 | __skb_tunnel_rx(skb, tunnel->dev); | 699 | __skb_tunnel_rx(skb, tunnel->dev); |
679 | 700 | ||
@@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
900 | htons(ETH_P_TEB) : skb->protocol; | 921 | htons(ETH_P_TEB) : skb->protocol; |
901 | 922 | ||
902 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { | 923 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { |
903 | __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); | 924 | __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); |
904 | 925 | ||
905 | if (tunnel->parms.o_flags&GRE_SEQ) { | 926 | if (tunnel->parms.o_flags&GRE_SEQ) { |
906 | ++tunnel->o_seqno; | 927 | ++tunnel->o_seqno; |
@@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
913 | } | 934 | } |
914 | if (tunnel->parms.o_flags&GRE_CSUM) { | 935 | if (tunnel->parms.o_flags&GRE_CSUM) { |
915 | *ptr = 0; | 936 | *ptr = 0; |
916 | *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); | 937 | *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); |
917 | } | 938 | } |
918 | } | 939 | } |
919 | 940 | ||
@@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
1169 | { | 1190 | { |
1170 | struct ip_tunnel *t = netdev_priv(dev); | 1191 | struct ip_tunnel *t = netdev_priv(dev); |
1171 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); | 1192 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); |
1172 | __be16 *p = (__be16*)(iph+1); | 1193 | __be16 *p = (__be16 *)(iph+1); |
1173 | 1194 | ||
1174 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); | 1195 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); |
1175 | p[0] = t->parms.o_flags; | 1196 | p[0] = t->parms.o_flags; |
@@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = { | |||
1253 | .ndo_start_xmit = ipgre_tunnel_xmit, | 1274 | .ndo_start_xmit = ipgre_tunnel_xmit, |
1254 | .ndo_do_ioctl = ipgre_tunnel_ioctl, | 1275 | .ndo_do_ioctl = ipgre_tunnel_ioctl, |
1255 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1276 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1256 | .ndo_get_stats = ipgre_get_stats, | 1277 | .ndo_get_stats64 = ipgre_get_stats64, |
1257 | }; | 1278 | }; |
1258 | 1279 | ||
1259 | static void ipgre_dev_free(struct net_device *dev) | 1280 | static void ipgre_dev_free(struct net_device *dev) |
@@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = { | |||
1507 | .ndo_set_mac_address = eth_mac_addr, | 1528 | .ndo_set_mac_address = eth_mac_addr, |
1508 | .ndo_validate_addr = eth_validate_addr, | 1529 | .ndo_validate_addr = eth_validate_addr, |
1509 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1530 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1510 | .ndo_get_stats = ipgre_get_stats, | 1531 | .ndo_get_stats64 = ipgre_get_stats64, |
1511 | }; | 1532 | }; |
1512 | 1533 | ||
1513 | static void ipgre_tap_setup(struct net_device *dev) | 1534 | static void ipgre_tap_setup(struct net_device *dev) |
@@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) | |||
1654 | struct ip_tunnel *t = netdev_priv(dev); | 1675 | struct ip_tunnel *t = netdev_priv(dev); |
1655 | struct ip_tunnel_parm *p = &t->parms; | 1676 | struct ip_tunnel_parm *p = &t->parms; |
1656 | 1677 | ||
1657 | NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); | 1678 | if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || |
1658 | NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); | 1679 | nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || |
1659 | NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); | 1680 | nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || |
1660 | NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); | 1681 | nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || |
1661 | NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); | 1682 | nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || |
1662 | NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); | 1683 | nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || |
1663 | NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); | 1684 | nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || |
1664 | NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); | 1685 | nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || |
1665 | NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); | 1686 | nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || |
1666 | NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); | 1687 | nla_put_u8(skb, IFLA_GRE_PMTUDISC, |
1667 | 1688 | !!(p->iph.frag_off & htons(IP_DF)))) | |
1689 | goto nla_put_failure; | ||
1668 | return 0; | 1690 | return 0; |
1669 | 1691 | ||
1670 | nla_put_failure: | 1692 | nla_put_failure: |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 26eccc5bab1c..8590144ca330 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -210,9 +210,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb) | |||
210 | int ret; | 210 | int ret; |
211 | 211 | ||
212 | if (!net_eq(net, &init_net) && !ipprot->netns_ok) { | 212 | if (!net_eq(net, &init_net) && !ipprot->netns_ok) { |
213 | if (net_ratelimit()) | 213 | net_info_ratelimited("%s: proto %d isn't netns-ready\n", |
214 | printk("%s: proto %d isn't netns-ready\n", | 214 | __func__, protocol); |
215 | __func__, protocol); | ||
216 | kfree_skb(skb); | 215 | kfree_skb(skb); |
217 | goto out; | 216 | goto out; |
218 | } | 217 | } |
@@ -298,10 +297,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb) | |||
298 | 297 | ||
299 | if (in_dev) { | 298 | if (in_dev) { |
300 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | 299 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { |
301 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 300 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
302 | net_ratelimit()) | 301 | net_info_ratelimited("source route option %pI4 -> %pI4\n", |
303 | pr_info("source route option %pI4 -> %pI4\n", | 302 | &iph->saddr, |
304 | &iph->saddr, &iph->daddr); | 303 | &iph->daddr); |
305 | goto drop; | 304 | goto drop; |
306 | } | 305 | } |
307 | } | 306 | } |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index a0d0d9d9b870..708b99494e23 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) | |||
210 | * Simple and stupid 8), but the most efficient way. | 210 | * Simple and stupid 8), but the most efficient way. |
211 | */ | 211 | */ |
212 | 212 | ||
213 | void ip_options_fragment(struct sk_buff * skb) | 213 | void ip_options_fragment(struct sk_buff *skb) |
214 | { | 214 | { |
215 | unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); | 215 | unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); |
216 | struct ip_options * opt = &(IPCB(skb)->opt); | 216 | struct ip_options *opt = &(IPCB(skb)->opt); |
217 | int l = opt->optlen; | 217 | int l = opt->optlen; |
218 | int optlen; | 218 | int optlen; |
219 | 219 | ||
@@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb) | |||
248 | */ | 248 | */ |
249 | 249 | ||
250 | int ip_options_compile(struct net *net, | 250 | int ip_options_compile(struct net *net, |
251 | struct ip_options * opt, struct sk_buff * skb) | 251 | struct ip_options *opt, struct sk_buff *skb) |
252 | { | 252 | { |
253 | int l; | 253 | int l; |
254 | unsigned char * iph; | 254 | unsigned char *iph; |
255 | unsigned char * optptr; | 255 | unsigned char *optptr; |
256 | int optlen; | 256 | int optlen; |
257 | unsigned char * pp_ptr = NULL; | 257 | unsigned char *pp_ptr = NULL; |
258 | struct rtable *rt = NULL; | 258 | struct rtable *rt = NULL; |
259 | 259 | ||
260 | if (skb != NULL) { | 260 | if (skb != NULL) { |
@@ -413,7 +413,7 @@ int ip_options_compile(struct net *net, | |||
413 | opt->is_changed = 1; | 413 | opt->is_changed = 1; |
414 | } | 414 | } |
415 | } else { | 415 | } else { |
416 | unsigned overflow = optptr[3]>>4; | 416 | unsigned int overflow = optptr[3]>>4; |
417 | if (overflow == 15) { | 417 | if (overflow == 15) { |
418 | pp_ptr = optptr + 3; | 418 | pp_ptr = optptr + 3; |
419 | goto error; | 419 | goto error; |
@@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile); | |||
473 | * Undo all the changes done by ip_options_compile(). | 473 | * Undo all the changes done by ip_options_compile(). |
474 | */ | 474 | */ |
475 | 475 | ||
476 | void ip_options_undo(struct ip_options * opt) | 476 | void ip_options_undo(struct ip_options *opt) |
477 | { | 477 | { |
478 | if (opt->srr) { | 478 | if (opt->srr) { |
479 | unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); | 479 | unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr); |
480 | memmove(optptr+7, optptr+3, optptr[1]-7); | 480 | memmove(optptr+7, optptr+3, optptr[1]-7); |
481 | memcpy(optptr+3, &opt->faddr, 4); | 481 | memcpy(optptr+3, &opt->faddr, 4); |
482 | } | 482 | } |
483 | if (opt->rr_needaddr) { | 483 | if (opt->rr_needaddr) { |
484 | unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); | 484 | unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr); |
485 | optptr[2] -= 4; | 485 | optptr[2] -= 4; |
486 | memset(&optptr[optptr[2]-1], 0, 4); | 486 | memset(&optptr[optptr[2]-1], 0, 4); |
487 | } | 487 | } |
488 | if (opt->ts) { | 488 | if (opt->ts) { |
489 | unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); | 489 | unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr); |
490 | if (opt->ts_needtime) { | 490 | if (opt->ts_needtime) { |
491 | optptr[2] -= 4; | 491 | optptr[2] -= 4; |
492 | memset(&optptr[optptr[2]-1], 0, 4); | 492 | memset(&optptr[optptr[2]-1], 0, 4); |
@@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp, | |||
549 | 549 | ||
550 | void ip_forward_options(struct sk_buff *skb) | 550 | void ip_forward_options(struct sk_buff *skb) |
551 | { | 551 | { |
552 | struct ip_options * opt = &(IPCB(skb)->opt); | 552 | struct ip_options *opt = &(IPCB(skb)->opt); |
553 | unsigned char * optptr; | 553 | unsigned char *optptr; |
554 | struct rtable *rt = skb_rtable(skb); | 554 | struct rtable *rt = skb_rtable(skb); |
555 | unsigned char *raw = skb_network_header(skb); | 555 | unsigned char *raw = skb_network_header(skb); |
556 | 556 | ||
@@ -578,8 +578,10 @@ void ip_forward_options(struct sk_buff *skb) | |||
578 | ip_hdr(skb)->daddr = opt->nexthop; | 578 | ip_hdr(skb)->daddr = opt->nexthop; |
579 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | 579 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); |
580 | optptr[2] = srrptr+4; | 580 | optptr[2] = srrptr+4; |
581 | } else if (net_ratelimit()) | 581 | } else { |
582 | pr_crit("%s(): Argh! Destination lost!\n", __func__); | 582 | net_crit_ratelimited("%s(): Argh! Destination lost!\n", |
583 | __func__); | ||
584 | } | ||
583 | if (opt->ts_needaddr) { | 585 | if (opt->ts_needaddr) { |
584 | optptr = raw + opt->ts; | 586 | optptr = raw + opt->ts; |
585 | ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); | 587 | ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4910176d24ed..451f97c42eb4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -214,8 +214,8 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
214 | } | 214 | } |
215 | rcu_read_unlock(); | 215 | rcu_read_unlock(); |
216 | 216 | ||
217 | if (net_ratelimit()) | 217 | net_dbg_ratelimited("%s: No header cache and no neighbour!\n", |
218 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); | 218 | __func__); |
219 | kfree_skb(skb); | 219 | kfree_skb(skb); |
220 | return -EINVAL; | 220 | return -EINVAL; |
221 | } | 221 | } |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2fd0fba77124..0d11f234d615 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) | |||
90 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) | 90 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) |
91 | { | 91 | { |
92 | unsigned char optbuf[sizeof(struct ip_options) + 40]; | 92 | unsigned char optbuf[sizeof(struct ip_options) + 40]; |
93 | struct ip_options * opt = (struct ip_options *)optbuf; | 93 | struct ip_options *opt = (struct ip_options *)optbuf; |
94 | 94 | ||
95 | if (IPCB(skb)->opt.optlen == 0) | 95 | if (IPCB(skb)->opt.optlen == 0) |
96 | return; | 96 | return; |
@@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) | |||
147 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) | 147 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) |
148 | { | 148 | { |
149 | struct inet_sock *inet = inet_sk(skb->sk); | 149 | struct inet_sock *inet = inet_sk(skb->sk); |
150 | unsigned flags = inet->cmsg_flags; | 150 | unsigned int flags = inet->cmsg_flags; |
151 | 151 | ||
152 | /* Ordered by supposed usage frequency */ | 152 | /* Ordered by supposed usage frequency */ |
153 | if (flags & 1) | 153 | if (flags & 1) |
@@ -673,10 +673,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
673 | break; | 673 | break; |
674 | } else { | 674 | } else { |
675 | memset(&mreq, 0, sizeof(mreq)); | 675 | memset(&mreq, 0, sizeof(mreq)); |
676 | if (optlen >= sizeof(struct in_addr) && | 676 | if (optlen >= sizeof(struct ip_mreq)) { |
677 | copy_from_user(&mreq.imr_address, optval, | 677 | if (copy_from_user(&mreq, optval, |
678 | sizeof(struct in_addr))) | 678 | sizeof(struct ip_mreq))) |
679 | break; | 679 | break; |
680 | } else if (optlen >= sizeof(struct in_addr)) { | ||
681 | if (copy_from_user(&mreq.imr_address, optval, | ||
682 | sizeof(struct in_addr))) | ||
683 | break; | ||
684 | } | ||
680 | } | 685 | } |
681 | 686 | ||
682 | if (!mreq.imr_ifindex) { | 687 | if (!mreq.imr_ifindex) { |
@@ -1094,7 +1099,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); | |||
1094 | */ | 1099 | */ |
1095 | 1100 | ||
1096 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, | 1101 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, |
1097 | char __user *optval, int __user *optlen, unsigned flags) | 1102 | char __user *optval, int __user *optlen, unsigned int flags) |
1098 | { | 1103 | { |
1099 | struct inet_sock *inet = inet_sk(sk); | 1104 | struct inet_sock *inet = inet_sk(sk); |
1100 | int val; | 1105 | int val; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 92ac7e7363a0..430015010e57 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -808,8 +808,6 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d | |||
808 | b->op = BOOTP_REQUEST; | 808 | b->op = BOOTP_REQUEST; |
809 | if (dev->type < 256) /* check for false types */ | 809 | if (dev->type < 256) /* check for false types */ |
810 | b->htype = dev->type; | 810 | b->htype = dev->type; |
811 | else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */ | ||
812 | b->htype = ARPHRD_IEEE802; | ||
813 | else if (dev->type == ARPHRD_FDDI) | 811 | else if (dev->type == ARPHRD_FDDI) |
814 | b->htype = ARPHRD_ETHER; | 812 | b->htype = ARPHRD_ETHER; |
815 | else { | 813 | else { |
@@ -955,8 +953,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
955 | 953 | ||
956 | /* Fragments are not supported */ | 954 | /* Fragments are not supported */ |
957 | if (ip_is_fragment(h)) { | 955 | if (ip_is_fragment(h)) { |
958 | if (net_ratelimit()) | 956 | net_err_ratelimited("DHCP/BOOTP: Ignoring fragmented reply\n"); |
959 | pr_err("DHCP/BOOTP: Ignoring fragmented reply\n"); | ||
960 | goto drop; | 957 | goto drop; |
961 | } | 958 | } |
962 | 959 | ||
@@ -1004,16 +1001,14 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
1004 | /* Is it a reply to our BOOTP request? */ | 1001 | /* Is it a reply to our BOOTP request? */ |
1005 | if (b->op != BOOTP_REPLY || | 1002 | if (b->op != BOOTP_REPLY || |
1006 | b->xid != d->xid) { | 1003 | b->xid != d->xid) { |
1007 | if (net_ratelimit()) | 1004 | net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", |
1008 | pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", | 1005 | b->op, b->xid); |
1009 | b->op, b->xid); | ||
1010 | goto drop_unlock; | 1006 | goto drop_unlock; |
1011 | } | 1007 | } |
1012 | 1008 | ||
1013 | /* Is it a reply for the device we are configuring? */ | 1009 | /* Is it a reply for the device we are configuring? */ |
1014 | if (b->xid != ic_dev_xid) { | 1010 | if (b->xid != ic_dev_xid) { |
1015 | if (net_ratelimit()) | 1011 | net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); |
1016 | pr_err("DHCP/BOOTP: Ignoring delayed packet\n"); | ||
1017 | goto drop_unlock; | 1012 | goto drop_unlock; |
1018 | } | 1013 | } |
1019 | 1014 | ||
@@ -1198,7 +1193,7 @@ static int __init ic_dynamic(void) | |||
1198 | d = ic_first_dev; | 1193 | d = ic_first_dev; |
1199 | retries = CONF_SEND_RETRIES; | 1194 | retries = CONF_SEND_RETRIES; |
1200 | get_random_bytes(&timeout, sizeof(timeout)); | 1195 | get_random_bytes(&timeout, sizeof(timeout)); |
1201 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); | 1196 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); |
1202 | for (;;) { | 1197 | for (;;) { |
1203 | /* Track the device we are configuring */ | 1198 | /* Track the device we are configuring */ |
1204 | ic_dev_xid = d->xid; | 1199 | ic_dev_xid = d->xid; |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ae1413e3f2f8..2d0f99bf61b3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev); | |||
144 | 144 | ||
145 | /* often modified stats are per cpu, other are shared (netdev->stats) */ | 145 | /* often modified stats are per cpu, other are shared (netdev->stats) */ |
146 | struct pcpu_tstats { | 146 | struct pcpu_tstats { |
147 | unsigned long rx_packets; | 147 | u64 rx_packets; |
148 | unsigned long rx_bytes; | 148 | u64 rx_bytes; |
149 | unsigned long tx_packets; | 149 | u64 tx_packets; |
150 | unsigned long tx_bytes; | 150 | u64 tx_bytes; |
151 | } __attribute__((aligned(4*sizeof(unsigned long)))); | 151 | struct u64_stats_sync syncp; |
152 | }; | ||
152 | 153 | ||
153 | static struct net_device_stats *ipip_get_stats(struct net_device *dev) | 154 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, |
155 | struct rtnl_link_stats64 *tot) | ||
154 | { | 156 | { |
155 | struct pcpu_tstats sum = { 0 }; | ||
156 | int i; | 157 | int i; |
157 | 158 | ||
158 | for_each_possible_cpu(i) { | 159 | for_each_possible_cpu(i) { |
159 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | 160 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); |
160 | 161 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | |
161 | sum.rx_packets += tstats->rx_packets; | 162 | unsigned int start; |
162 | sum.rx_bytes += tstats->rx_bytes; | 163 | |
163 | sum.tx_packets += tstats->tx_packets; | 164 | do { |
164 | sum.tx_bytes += tstats->tx_bytes; | 165 | start = u64_stats_fetch_begin_bh(&tstats->syncp); |
166 | rx_packets = tstats->rx_packets; | ||
167 | tx_packets = tstats->tx_packets; | ||
168 | rx_bytes = tstats->rx_bytes; | ||
169 | tx_bytes = tstats->tx_bytes; | ||
170 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
171 | |||
172 | tot->rx_packets += rx_packets; | ||
173 | tot->tx_packets += tx_packets; | ||
174 | tot->rx_bytes += rx_bytes; | ||
175 | tot->tx_bytes += tx_bytes; | ||
165 | } | 176 | } |
166 | dev->stats.rx_packets = sum.rx_packets; | 177 | |
167 | dev->stats.rx_bytes = sum.rx_bytes; | 178 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; |
168 | dev->stats.tx_packets = sum.tx_packets; | 179 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; |
169 | dev->stats.tx_bytes = sum.tx_bytes; | 180 | tot->tx_dropped = dev->stats.tx_dropped; |
170 | return &dev->stats; | 181 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; |
182 | tot->tx_errors = dev->stats.tx_errors; | ||
183 | tot->collisions = dev->stats.collisions; | ||
184 | |||
185 | return tot; | ||
171 | } | 186 | } |
172 | 187 | ||
173 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, | 188 | static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, |
174 | __be32 remote, __be32 local) | 189 | __be32 remote, __be32 local) |
175 | { | 190 | { |
176 | unsigned int h0 = HASH(remote); | 191 | unsigned int h0 = HASH(remote); |
@@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | |||
245 | rcu_assign_pointer(*tp, t); | 260 | rcu_assign_pointer(*tp, t); |
246 | } | 261 | } |
247 | 262 | ||
248 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | 263 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, |
249 | struct ip_tunnel_parm *parms, int create) | 264 | struct ip_tunnel_parm *parms, int create) |
250 | { | 265 | { |
251 | __be32 remote = parms->iph.daddr; | 266 | __be32 remote = parms->iph.daddr; |
@@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb) | |||
404 | skb->pkt_type = PACKET_HOST; | 419 | skb->pkt_type = PACKET_HOST; |
405 | 420 | ||
406 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 421 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
422 | u64_stats_update_begin(&tstats->syncp); | ||
407 | tstats->rx_packets++; | 423 | tstats->rx_packets++; |
408 | tstats->rx_bytes += skb->len; | 424 | tstats->rx_bytes += skb->len; |
425 | u64_stats_update_end(&tstats->syncp); | ||
409 | 426 | ||
410 | __skb_tunnel_rx(skb, tunnel->dev); | 427 | __skb_tunnel_rx(skb, tunnel->dev); |
411 | 428 | ||
@@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = { | |||
730 | .ndo_start_xmit = ipip_tunnel_xmit, | 747 | .ndo_start_xmit = ipip_tunnel_xmit, |
731 | .ndo_do_ioctl = ipip_tunnel_ioctl, | 748 | .ndo_do_ioctl = ipip_tunnel_ioctl, |
732 | .ndo_change_mtu = ipip_tunnel_change_mtu, | 749 | .ndo_change_mtu = ipip_tunnel_change_mtu, |
733 | .ndo_get_stats = ipip_get_stats, | 750 | .ndo_get_stats64 = ipip_get_stats64, |
734 | }; | 751 | }; |
735 | 752 | ||
736 | static void ipip_dev_free(struct net_device *dev) | 753 | static void ipip_dev_free(struct net_device *dev) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 960fbfc3e976..a9e519ad6db5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -949,8 +949,7 @@ static int ipmr_cache_report(struct mr_table *mrt, | |||
949 | ret = sock_queue_rcv_skb(mroute_sk, skb); | 949 | ret = sock_queue_rcv_skb(mroute_sk, skb); |
950 | rcu_read_unlock(); | 950 | rcu_read_unlock(); |
951 | if (ret < 0) { | 951 | if (ret < 0) { |
952 | if (net_ratelimit()) | 952 | net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); |
953 | pr_warn("mroute: pending queue full, dropping entries\n"); | ||
954 | kfree_skb(skb); | 953 | kfree_skb(skb); |
955 | } | 954 | } |
956 | 955 | ||
@@ -2119,15 +2118,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | |||
2119 | rtm->rtm_src_len = 32; | 2118 | rtm->rtm_src_len = 32; |
2120 | rtm->rtm_tos = 0; | 2119 | rtm->rtm_tos = 0; |
2121 | rtm->rtm_table = mrt->id; | 2120 | rtm->rtm_table = mrt->id; |
2122 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); | 2121 | if (nla_put_u32(skb, RTA_TABLE, mrt->id)) |
2122 | goto nla_put_failure; | ||
2123 | rtm->rtm_type = RTN_MULTICAST; | 2123 | rtm->rtm_type = RTN_MULTICAST; |
2124 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | 2124 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; |
2125 | rtm->rtm_protocol = RTPROT_UNSPEC; | 2125 | rtm->rtm_protocol = RTPROT_UNSPEC; |
2126 | rtm->rtm_flags = 0; | 2126 | rtm->rtm_flags = 0; |
2127 | 2127 | ||
2128 | NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); | 2128 | if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || |
2129 | NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); | 2129 | nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) |
2130 | 2130 | goto nla_put_failure; | |
2131 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) | 2131 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) |
2132 | goto nla_put_failure; | 2132 | goto nla_put_failure; |
2133 | 2133 | ||
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4f47e064e262..ed1b36783192 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <net/netfilter/nf_queue.h> | 12 | #include <net/netfilter/nf_queue.h> |
13 | 13 | ||
14 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | 14 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
15 | int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | 15 | int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) |
16 | { | 16 | { |
17 | struct net *net = dev_net(skb_dst(skb)->dev); | 17 | struct net *net = dev_net(skb_dst(skb)->dev); |
18 | const struct iphdr *iph = ip_hdr(skb); | 18 | const struct iphdr *iph = ip_hdr(skb); |
@@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void) | |||
237 | 237 | ||
238 | module_init(ipv4_netfilter_init); | 238 | module_init(ipv4_netfilter_init); |
239 | module_exit(ipv4_netfilter_fini); | 239 | module_exit(ipv4_netfilter_fini); |
240 | |||
241 | #ifdef CONFIG_SYSCTL | ||
242 | struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { | ||
243 | { .procname = "net", }, | ||
244 | { .procname = "ipv4", }, | ||
245 | { .procname = "netfilter", }, | ||
246 | { } | ||
247 | }; | ||
248 | EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); | ||
249 | #endif /* CONFIG_SYSCTL */ | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 240b68469a7a..c20674dc9452 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
66 | 66 | ||
67 | # just filtering instance of ARP tables for now | 67 | # just filtering instance of ARP tables for now |
68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o |
69 | |||
70 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | ||
71 | |||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fd7a3f68917f..97e61eadf580 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -221,9 +221,8 @@ static inline int arp_checkentry(const struct arpt_arp *arp) | |||
221 | static unsigned int | 221 | static unsigned int |
222 | arpt_error(struct sk_buff *skb, const struct xt_action_param *par) | 222 | arpt_error(struct sk_buff *skb, const struct xt_action_param *par) |
223 | { | 223 | { |
224 | if (net_ratelimit()) | 224 | net_err_ratelimited("arp_tables: error: '%s'\n", |
225 | pr_err("arp_tables: error: '%s'\n", | 225 | (const char *)par->targinfo); |
226 | (const char *)par->targinfo); | ||
227 | 226 | ||
228 | return NF_DROP; | 227 | return NF_DROP; |
229 | } | 228 | } |
@@ -303,7 +302,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
303 | if (v < 0) { | 302 | if (v < 0) { |
304 | /* Pop from stack? */ | 303 | /* Pop from stack? */ |
305 | if (v != XT_RETURN) { | 304 | if (v != XT_RETURN) { |
306 | verdict = (unsigned)(-v) - 1; | 305 | verdict = (unsigned int)(-v) - 1; |
307 | break; | 306 | break; |
308 | } | 307 | } |
309 | e = back; | 308 | e = back; |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c deleted file mode 100644 index 94d45e1f8882..000000000000 --- a/net/ipv4/netfilter/ip_queue.c +++ /dev/null | |||
@@ -1,639 +0,0 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing IPv4 packets and | ||
3 | * communicating with userspace via netlink. | ||
4 | * | ||
5 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
6 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/notifier.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/netfilter_ipv4/ip_queue.h> | ||
20 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
21 | #include <linux/netlink.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/proc_fs.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | #include <linux/security.h> | ||
27 | #include <linux/net.h> | ||
28 | #include <linux/mutex.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <net/net_namespace.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/route.h> | ||
33 | #include <net/netfilter/nf_queue.h> | ||
34 | #include <net/ip.h> | ||
35 | |||
36 | #define IPQ_QMAX_DEFAULT 1024 | ||
37 | #define IPQ_PROC_FS_NAME "ip_queue" | ||
38 | #define NET_IPQ_QMAX 2088 | ||
39 | #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" | ||
40 | |||
41 | typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
42 | |||
43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | ||
44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | ||
45 | static DEFINE_SPINLOCK(queue_lock); | ||
46 | static int peer_pid __read_mostly; | ||
47 | static unsigned int copy_range __read_mostly; | ||
48 | static unsigned int queue_total; | ||
49 | static unsigned int queue_dropped = 0; | ||
50 | static unsigned int queue_user_dropped = 0; | ||
51 | static struct sock *ipqnl __read_mostly; | ||
52 | static LIST_HEAD(queue_list); | ||
53 | static DEFINE_MUTEX(ipqnl_mutex); | ||
54 | |||
55 | static inline void | ||
56 | __ipq_enqueue_entry(struct nf_queue_entry *entry) | ||
57 | { | ||
58 | list_add_tail(&entry->list, &queue_list); | ||
59 | queue_total++; | ||
60 | } | ||
61 | |||
62 | static inline int | ||
63 | __ipq_set_mode(unsigned char mode, unsigned int range) | ||
64 | { | ||
65 | int status = 0; | ||
66 | |||
67 | switch(mode) { | ||
68 | case IPQ_COPY_NONE: | ||
69 | case IPQ_COPY_META: | ||
70 | copy_mode = mode; | ||
71 | copy_range = 0; | ||
72 | break; | ||
73 | |||
74 | case IPQ_COPY_PACKET: | ||
75 | if (range > 0xFFFF) | ||
76 | range = 0xFFFF; | ||
77 | copy_range = range; | ||
78 | copy_mode = mode; | ||
79 | break; | ||
80 | |||
81 | default: | ||
82 | status = -EINVAL; | ||
83 | |||
84 | } | ||
85 | return status; | ||
86 | } | ||
87 | |||
88 | static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); | ||
89 | |||
90 | static inline void | ||
91 | __ipq_reset(void) | ||
92 | { | ||
93 | peer_pid = 0; | ||
94 | net_disable_timestamp(); | ||
95 | __ipq_set_mode(IPQ_COPY_NONE, 0); | ||
96 | __ipq_flush(NULL, 0); | ||
97 | } | ||
98 | |||
99 | static struct nf_queue_entry * | ||
100 | ipq_find_dequeue_entry(unsigned long id) | ||
101 | { | ||
102 | struct nf_queue_entry *entry = NULL, *i; | ||
103 | |||
104 | spin_lock_bh(&queue_lock); | ||
105 | |||
106 | list_for_each_entry(i, &queue_list, list) { | ||
107 | if ((unsigned long)i == id) { | ||
108 | entry = i; | ||
109 | break; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (entry) { | ||
114 | list_del(&entry->list); | ||
115 | queue_total--; | ||
116 | } | ||
117 | |||
118 | spin_unlock_bh(&queue_lock); | ||
119 | return entry; | ||
120 | } | ||
121 | |||
122 | static void | ||
123 | __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
124 | { | ||
125 | struct nf_queue_entry *entry, *next; | ||
126 | |||
127 | list_for_each_entry_safe(entry, next, &queue_list, list) { | ||
128 | if (!cmpfn || cmpfn(entry, data)) { | ||
129 | list_del(&entry->list); | ||
130 | queue_total--; | ||
131 | nf_reinject(entry, NF_DROP); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static void | ||
137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
138 | { | ||
139 | spin_lock_bh(&queue_lock); | ||
140 | __ipq_flush(cmpfn, data); | ||
141 | spin_unlock_bh(&queue_lock); | ||
142 | } | ||
143 | |||
144 | static struct sk_buff * | ||
145 | ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | ||
146 | { | ||
147 | sk_buff_data_t old_tail; | ||
148 | size_t size = 0; | ||
149 | size_t data_len = 0; | ||
150 | struct sk_buff *skb; | ||
151 | struct ipq_packet_msg *pmsg; | ||
152 | struct nlmsghdr *nlh; | ||
153 | struct timeval tv; | ||
154 | |||
155 | switch (ACCESS_ONCE(copy_mode)) { | ||
156 | case IPQ_COPY_META: | ||
157 | case IPQ_COPY_NONE: | ||
158 | size = NLMSG_SPACE(sizeof(*pmsg)); | ||
159 | break; | ||
160 | |||
161 | case IPQ_COPY_PACKET: | ||
162 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && | ||
163 | (*errp = skb_checksum_help(entry->skb))) | ||
164 | return NULL; | ||
165 | |||
166 | data_len = ACCESS_ONCE(copy_range); | ||
167 | if (data_len == 0 || data_len > entry->skb->len) | ||
168 | data_len = entry->skb->len; | ||
169 | |||
170 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | ||
171 | break; | ||
172 | |||
173 | default: | ||
174 | *errp = -EINVAL; | ||
175 | return NULL; | ||
176 | } | ||
177 | |||
178 | skb = alloc_skb(size, GFP_ATOMIC); | ||
179 | if (!skb) | ||
180 | goto nlmsg_failure; | ||
181 | |||
182 | old_tail = skb->tail; | ||
183 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); | ||
184 | pmsg = NLMSG_DATA(nlh); | ||
185 | memset(pmsg, 0, sizeof(*pmsg)); | ||
186 | |||
187 | pmsg->packet_id = (unsigned long )entry; | ||
188 | pmsg->data_len = data_len; | ||
189 | tv = ktime_to_timeval(entry->skb->tstamp); | ||
190 | pmsg->timestamp_sec = tv.tv_sec; | ||
191 | pmsg->timestamp_usec = tv.tv_usec; | ||
192 | pmsg->mark = entry->skb->mark; | ||
193 | pmsg->hook = entry->hook; | ||
194 | pmsg->hw_protocol = entry->skb->protocol; | ||
195 | |||
196 | if (entry->indev) | ||
197 | strcpy(pmsg->indev_name, entry->indev->name); | ||
198 | else | ||
199 | pmsg->indev_name[0] = '\0'; | ||
200 | |||
201 | if (entry->outdev) | ||
202 | strcpy(pmsg->outdev_name, entry->outdev->name); | ||
203 | else | ||
204 | pmsg->outdev_name[0] = '\0'; | ||
205 | |||
206 | if (entry->indev && entry->skb->dev && | ||
207 | entry->skb->mac_header != entry->skb->network_header) { | ||
208 | pmsg->hw_type = entry->skb->dev->type; | ||
209 | pmsg->hw_addrlen = dev_parse_header(entry->skb, | ||
210 | pmsg->hw_addr); | ||
211 | } | ||
212 | |||
213 | if (data_len) | ||
214 | if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) | ||
215 | BUG(); | ||
216 | |||
217 | nlh->nlmsg_len = skb->tail - old_tail; | ||
218 | return skb; | ||
219 | |||
220 | nlmsg_failure: | ||
221 | kfree_skb(skb); | ||
222 | *errp = -EINVAL; | ||
223 | printk(KERN_ERR "ip_queue: error creating packet message\n"); | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | static int | ||
228 | ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
229 | { | ||
230 | int status = -EINVAL; | ||
231 | struct sk_buff *nskb; | ||
232 | |||
233 | if (copy_mode == IPQ_COPY_NONE) | ||
234 | return -EAGAIN; | ||
235 | |||
236 | nskb = ipq_build_packet_message(entry, &status); | ||
237 | if (nskb == NULL) | ||
238 | return status; | ||
239 | |||
240 | spin_lock_bh(&queue_lock); | ||
241 | |||
242 | if (!peer_pid) | ||
243 | goto err_out_free_nskb; | ||
244 | |||
245 | if (queue_total >= queue_maxlen) { | ||
246 | queue_dropped++; | ||
247 | status = -ENOSPC; | ||
248 | if (net_ratelimit()) | ||
249 | printk (KERN_WARNING "ip_queue: full at %d entries, " | ||
250 | "dropping packets(s). Dropped: %d\n", queue_total, | ||
251 | queue_dropped); | ||
252 | goto err_out_free_nskb; | ||
253 | } | ||
254 | |||
255 | /* netlink_unicast will either free the nskb or attach it to a socket */ | ||
256 | status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); | ||
257 | if (status < 0) { | ||
258 | queue_user_dropped++; | ||
259 | goto err_out_unlock; | ||
260 | } | ||
261 | |||
262 | __ipq_enqueue_entry(entry); | ||
263 | |||
264 | spin_unlock_bh(&queue_lock); | ||
265 | return status; | ||
266 | |||
267 | err_out_free_nskb: | ||
268 | kfree_skb(nskb); | ||
269 | |||
270 | err_out_unlock: | ||
271 | spin_unlock_bh(&queue_lock); | ||
272 | return status; | ||
273 | } | ||
274 | |||
275 | static int | ||
276 | ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) | ||
277 | { | ||
278 | int diff; | ||
279 | struct iphdr *user_iph = (struct iphdr *)v->payload; | ||
280 | struct sk_buff *nskb; | ||
281 | |||
282 | if (v->data_len < sizeof(*user_iph)) | ||
283 | return 0; | ||
284 | diff = v->data_len - e->skb->len; | ||
285 | if (diff < 0) { | ||
286 | if (pskb_trim(e->skb, v->data_len)) | ||
287 | return -ENOMEM; | ||
288 | } else if (diff > 0) { | ||
289 | if (v->data_len > 0xFFFF) | ||
290 | return -EINVAL; | ||
291 | if (diff > skb_tailroom(e->skb)) { | ||
292 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
293 | diff, GFP_ATOMIC); | ||
294 | if (!nskb) { | ||
295 | printk(KERN_WARNING "ip_queue: error " | ||
296 | "in mangle, dropping packet\n"); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | kfree_skb(e->skb); | ||
300 | e->skb = nskb; | ||
301 | } | ||
302 | skb_put(e->skb, diff); | ||
303 | } | ||
304 | if (!skb_make_writable(e->skb, v->data_len)) | ||
305 | return -ENOMEM; | ||
306 | skb_copy_to_linear_data(e->skb, v->payload, v->data_len); | ||
307 | e->skb->ip_summed = CHECKSUM_NONE; | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int | ||
313 | ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) | ||
314 | { | ||
315 | struct nf_queue_entry *entry; | ||
316 | |||
317 | if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) | ||
318 | return -EINVAL; | ||
319 | |||
320 | entry = ipq_find_dequeue_entry(vmsg->id); | ||
321 | if (entry == NULL) | ||
322 | return -ENOENT; | ||
323 | else { | ||
324 | int verdict = vmsg->value; | ||
325 | |||
326 | if (vmsg->data_len && vmsg->data_len == len) | ||
327 | if (ipq_mangle_ipv4(vmsg, entry) < 0) | ||
328 | verdict = NF_DROP; | ||
329 | |||
330 | nf_reinject(entry, verdict); | ||
331 | return 0; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static int | ||
336 | ipq_set_mode(unsigned char mode, unsigned int range) | ||
337 | { | ||
338 | int status; | ||
339 | |||
340 | spin_lock_bh(&queue_lock); | ||
341 | status = __ipq_set_mode(mode, range); | ||
342 | spin_unlock_bh(&queue_lock); | ||
343 | return status; | ||
344 | } | ||
345 | |||
346 | static int | ||
347 | ipq_receive_peer(struct ipq_peer_msg *pmsg, | ||
348 | unsigned char type, unsigned int len) | ||
349 | { | ||
350 | int status = 0; | ||
351 | |||
352 | if (len < sizeof(*pmsg)) | ||
353 | return -EINVAL; | ||
354 | |||
355 | switch (type) { | ||
356 | case IPQM_MODE: | ||
357 | status = ipq_set_mode(pmsg->msg.mode.value, | ||
358 | pmsg->msg.mode.range); | ||
359 | break; | ||
360 | |||
361 | case IPQM_VERDICT: | ||
362 | status = ipq_set_verdict(&pmsg->msg.verdict, | ||
363 | len - sizeof(*pmsg)); | ||
364 | break; | ||
365 | default: | ||
366 | status = -EINVAL; | ||
367 | } | ||
368 | return status; | ||
369 | } | ||
370 | |||
371 | static int | ||
372 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
373 | { | ||
374 | if (entry->indev) | ||
375 | if (entry->indev->ifindex == ifindex) | ||
376 | return 1; | ||
377 | if (entry->outdev) | ||
378 | if (entry->outdev->ifindex == ifindex) | ||
379 | return 1; | ||
380 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
381 | if (entry->skb->nf_bridge) { | ||
382 | if (entry->skb->nf_bridge->physindev && | ||
383 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
384 | return 1; | ||
385 | if (entry->skb->nf_bridge->physoutdev && | ||
386 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
387 | return 1; | ||
388 | } | ||
389 | #endif | ||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | ipq_dev_drop(int ifindex) | ||
395 | { | ||
396 | ipq_flush(dev_cmp, ifindex); | ||
397 | } | ||
398 | |||
399 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
400 | |||
401 | static inline void | ||
402 | __ipq_rcv_skb(struct sk_buff *skb) | ||
403 | { | ||
404 | int status, type, pid, flags; | ||
405 | unsigned int nlmsglen, skblen; | ||
406 | struct nlmsghdr *nlh; | ||
407 | bool enable_timestamp = false; | ||
408 | |||
409 | skblen = skb->len; | ||
410 | if (skblen < sizeof(*nlh)) | ||
411 | return; | ||
412 | |||
413 | nlh = nlmsg_hdr(skb); | ||
414 | nlmsglen = nlh->nlmsg_len; | ||
415 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) | ||
416 | return; | ||
417 | |||
418 | pid = nlh->nlmsg_pid; | ||
419 | flags = nlh->nlmsg_flags; | ||
420 | |||
421 | if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) | ||
422 | RCV_SKB_FAIL(-EINVAL); | ||
423 | |||
424 | if (flags & MSG_TRUNC) | ||
425 | RCV_SKB_FAIL(-ECOMM); | ||
426 | |||
427 | type = nlh->nlmsg_type; | ||
428 | if (type < NLMSG_NOOP || type >= IPQM_MAX) | ||
429 | RCV_SKB_FAIL(-EINVAL); | ||
430 | |||
431 | if (type <= IPQM_BASE) | ||
432 | return; | ||
433 | |||
434 | if (!capable(CAP_NET_ADMIN)) | ||
435 | RCV_SKB_FAIL(-EPERM); | ||
436 | |||
437 | spin_lock_bh(&queue_lock); | ||
438 | |||
439 | if (peer_pid) { | ||
440 | if (peer_pid != pid) { | ||
441 | spin_unlock_bh(&queue_lock); | ||
442 | RCV_SKB_FAIL(-EBUSY); | ||
443 | } | ||
444 | } else { | ||
445 | enable_timestamp = true; | ||
446 | peer_pid = pid; | ||
447 | } | ||
448 | |||
449 | spin_unlock_bh(&queue_lock); | ||
450 | if (enable_timestamp) | ||
451 | net_enable_timestamp(); | ||
452 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | ||
453 | nlmsglen - NLMSG_LENGTH(0)); | ||
454 | if (status < 0) | ||
455 | RCV_SKB_FAIL(status); | ||
456 | |||
457 | if (flags & NLM_F_ACK) | ||
458 | netlink_ack(skb, nlh, 0); | ||
459 | } | ||
460 | |||
461 | static void | ||
462 | ipq_rcv_skb(struct sk_buff *skb) | ||
463 | { | ||
464 | mutex_lock(&ipqnl_mutex); | ||
465 | __ipq_rcv_skb(skb); | ||
466 | mutex_unlock(&ipqnl_mutex); | ||
467 | } | ||
468 | |||
469 | static int | ||
470 | ipq_rcv_dev_event(struct notifier_block *this, | ||
471 | unsigned long event, void *ptr) | ||
472 | { | ||
473 | struct net_device *dev = ptr; | ||
474 | |||
475 | if (!net_eq(dev_net(dev), &init_net)) | ||
476 | return NOTIFY_DONE; | ||
477 | |||
478 | /* Drop any packets associated with the downed device */ | ||
479 | if (event == NETDEV_DOWN) | ||
480 | ipq_dev_drop(dev->ifindex); | ||
481 | return NOTIFY_DONE; | ||
482 | } | ||
483 | |||
484 | static struct notifier_block ipq_dev_notifier = { | ||
485 | .notifier_call = ipq_rcv_dev_event, | ||
486 | }; | ||
487 | |||
488 | static int | ||
489 | ipq_rcv_nl_event(struct notifier_block *this, | ||
490 | unsigned long event, void *ptr) | ||
491 | { | ||
492 | struct netlink_notify *n = ptr; | ||
493 | |||
494 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { | ||
495 | spin_lock_bh(&queue_lock); | ||
496 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | ||
497 | __ipq_reset(); | ||
498 | spin_unlock_bh(&queue_lock); | ||
499 | } | ||
500 | return NOTIFY_DONE; | ||
501 | } | ||
502 | |||
503 | static struct notifier_block ipq_nl_notifier = { | ||
504 | .notifier_call = ipq_rcv_nl_event, | ||
505 | }; | ||
506 | |||
507 | #ifdef CONFIG_SYSCTL | ||
508 | static struct ctl_table_header *ipq_sysctl_header; | ||
509 | |||
510 | static ctl_table ipq_table[] = { | ||
511 | { | ||
512 | .procname = NET_IPQ_QMAX_NAME, | ||
513 | .data = &queue_maxlen, | ||
514 | .maxlen = sizeof(queue_maxlen), | ||
515 | .mode = 0644, | ||
516 | .proc_handler = proc_dointvec | ||
517 | }, | ||
518 | { } | ||
519 | }; | ||
520 | #endif | ||
521 | |||
522 | #ifdef CONFIG_PROC_FS | ||
523 | static int ip_queue_show(struct seq_file *m, void *v) | ||
524 | { | ||
525 | spin_lock_bh(&queue_lock); | ||
526 | |||
527 | seq_printf(m, | ||
528 | "Peer PID : %d\n" | ||
529 | "Copy mode : %hu\n" | ||
530 | "Copy range : %u\n" | ||
531 | "Queue length : %u\n" | ||
532 | "Queue max. length : %u\n" | ||
533 | "Queue dropped : %u\n" | ||
534 | "Netlink dropped : %u\n", | ||
535 | peer_pid, | ||
536 | copy_mode, | ||
537 | copy_range, | ||
538 | queue_total, | ||
539 | queue_maxlen, | ||
540 | queue_dropped, | ||
541 | queue_user_dropped); | ||
542 | |||
543 | spin_unlock_bh(&queue_lock); | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static int ip_queue_open(struct inode *inode, struct file *file) | ||
548 | { | ||
549 | return single_open(file, ip_queue_show, NULL); | ||
550 | } | ||
551 | |||
552 | static const struct file_operations ip_queue_proc_fops = { | ||
553 | .open = ip_queue_open, | ||
554 | .read = seq_read, | ||
555 | .llseek = seq_lseek, | ||
556 | .release = single_release, | ||
557 | .owner = THIS_MODULE, | ||
558 | }; | ||
559 | #endif | ||
560 | |||
561 | static const struct nf_queue_handler nfqh = { | ||
562 | .name = "ip_queue", | ||
563 | .outfn = &ipq_enqueue_packet, | ||
564 | }; | ||
565 | |||
566 | static int __init ip_queue_init(void) | ||
567 | { | ||
568 | int status = -ENOMEM; | ||
569 | struct proc_dir_entry *proc __maybe_unused; | ||
570 | |||
571 | netlink_register_notifier(&ipq_nl_notifier); | ||
572 | ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, | ||
573 | ipq_rcv_skb, NULL, THIS_MODULE); | ||
574 | if (ipqnl == NULL) { | ||
575 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); | ||
576 | goto cleanup_netlink_notifier; | ||
577 | } | ||
578 | |||
579 | #ifdef CONFIG_PROC_FS | ||
580 | proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, | ||
581 | &ip_queue_proc_fops); | ||
582 | if (!proc) { | ||
583 | printk(KERN_ERR "ip_queue: failed to create proc entry\n"); | ||
584 | goto cleanup_ipqnl; | ||
585 | } | ||
586 | #endif | ||
587 | register_netdevice_notifier(&ipq_dev_notifier); | ||
588 | #ifdef CONFIG_SYSCTL | ||
589 | ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); | ||
590 | #endif | ||
591 | status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); | ||
592 | if (status < 0) { | ||
593 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); | ||
594 | goto cleanup_sysctl; | ||
595 | } | ||
596 | return status; | ||
597 | |||
598 | cleanup_sysctl: | ||
599 | #ifdef CONFIG_SYSCTL | ||
600 | unregister_sysctl_table(ipq_sysctl_header); | ||
601 | #endif | ||
602 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
603 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
604 | cleanup_ipqnl: __maybe_unused | ||
605 | netlink_kernel_release(ipqnl); | ||
606 | mutex_lock(&ipqnl_mutex); | ||
607 | mutex_unlock(&ipqnl_mutex); | ||
608 | |||
609 | cleanup_netlink_notifier: | ||
610 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
611 | return status; | ||
612 | } | ||
613 | |||
614 | static void __exit ip_queue_fini(void) | ||
615 | { | ||
616 | nf_unregister_queue_handlers(&nfqh); | ||
617 | |||
618 | ipq_flush(NULL, 0); | ||
619 | |||
620 | #ifdef CONFIG_SYSCTL | ||
621 | unregister_sysctl_table(ipq_sysctl_header); | ||
622 | #endif | ||
623 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
624 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
625 | |||
626 | netlink_kernel_release(ipqnl); | ||
627 | mutex_lock(&ipqnl_mutex); | ||
628 | mutex_unlock(&ipqnl_mutex); | ||
629 | |||
630 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
631 | } | ||
632 | |||
633 | MODULE_DESCRIPTION("IPv4 packet queue handler"); | ||
634 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); | ||
637 | |||
638 | module_init(ip_queue_init); | ||
639 | module_exit(ip_queue_fini); | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 24e556e83a3b..170b1fdd6b72 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -153,8 +153,7 @@ ip_checkentry(const struct ipt_ip *ip) | |||
153 | static unsigned int | 153 | static unsigned int |
154 | ipt_error(struct sk_buff *skb, const struct xt_action_param *par) | 154 | ipt_error(struct sk_buff *skb, const struct xt_action_param *par) |
155 | { | 155 | { |
156 | if (net_ratelimit()) | 156 | net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); |
157 | pr_info("error: `%s'\n", (const char *)par->targinfo); | ||
158 | 157 | ||
159 | return NF_DROP; | 158 | return NF_DROP; |
160 | } | 159 | } |
@@ -377,7 +376,7 @@ ipt_do_table(struct sk_buff *skb, | |||
377 | if (v < 0) { | 376 | if (v < 0) { |
378 | /* Pop from stack? */ | 377 | /* Pop from stack? */ |
379 | if (v != XT_RETURN) { | 378 | if (v != XT_RETURN) { |
380 | verdict = (unsigned)(-v) - 1; | 379 | verdict = (unsigned int)(-v) - 1; |
381 | break; | 380 | break; |
382 | } | 381 | } |
383 | if (*stackptr <= origptr) { | 382 | if (*stackptr <= origptr) { |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a639967eb727..fe5daea5214d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -246,8 +246,7 @@ clusterip_hashfn(const struct sk_buff *skb, | |||
246 | dport = ports[1]; | 246 | dport = ports[1]; |
247 | } | 247 | } |
248 | } else { | 248 | } else { |
249 | if (net_ratelimit()) | 249 | net_info_ratelimited("unknown protocol %u\n", iph->protocol); |
250 | pr_info("unknown protocol %u\n", iph->protocol); | ||
251 | } | 250 | } |
252 | 251 | ||
253 | switch (config->hash_mode) { | 252 | switch (config->hash_mode) { |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index cf73cc70ed2d..91747d4ebc26 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -311,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
311 | static int ipv4_tuple_to_nlattr(struct sk_buff *skb, | 311 | static int ipv4_tuple_to_nlattr(struct sk_buff *skb, |
312 | const struct nf_conntrack_tuple *tuple) | 312 | const struct nf_conntrack_tuple *tuple) |
313 | { | 313 | { |
314 | NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip); | 314 | if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || |
315 | NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip); | 315 | nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) |
316 | goto nla_put_failure; | ||
316 | return 0; | 317 | return 0; |
317 | 318 | ||
318 | nla_put_failure: | 319 | nla_put_failure: |
@@ -364,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | |||
364 | .nla_policy = ipv4_nla_policy, | 365 | .nla_policy = ipv4_nla_policy, |
365 | #endif | 366 | #endif |
366 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | 367 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) |
367 | .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, | 368 | .ctl_table_path = "net/ipv4/netfilter", |
368 | .ctl_table = ip_ct_sysctl_table, | 369 | .ctl_table = ip_ct_sysctl_table, |
369 | #endif | 370 | #endif |
370 | .me = THIS_MODULE, | 371 | .me = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7cbe9cb261c2..0847e373d33c 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl, | |||
228 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, | 228 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, |
229 | const struct nf_conntrack_tuple *t) | 229 | const struct nf_conntrack_tuple *t) |
230 | { | 230 | { |
231 | NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id); | 231 | if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || |
232 | NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type); | 232 | nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || |
233 | NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code); | 233 | nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) |
234 | 234 | goto nla_put_failure; | |
235 | return 0; | 235 | return 0; |
236 | 236 | ||
237 | nla_put_failure: | 237 | nla_put_failure: |
@@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | |||
293 | { | 293 | { |
294 | const unsigned int *timeout = data; | 294 | const unsigned int *timeout = data; |
295 | 295 | ||
296 | NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); | 296 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) |
297 | 297 | goto nla_put_failure; | |
298 | return 0; | 298 | return 0; |
299 | 299 | ||
300 | nla_put_failure: | 300 | nla_put_failure: |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 82536701e3a3..cad29c121318 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -42,9 +42,7 @@ static int set_addr(struct sk_buff *skb, | |||
42 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 42 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, |
43 | addroff, sizeof(buf), | 43 | addroff, sizeof(buf), |
44 | (char *) &buf, sizeof(buf))) { | 44 | (char *) &buf, sizeof(buf))) { |
45 | if (net_ratelimit()) | 45 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); |
46 | pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet" | ||
47 | " error\n"); | ||
48 | return -1; | 46 | return -1; |
49 | } | 47 | } |
50 | 48 | ||
@@ -58,9 +56,7 @@ static int set_addr(struct sk_buff *skb, | |||
58 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, |
59 | addroff, sizeof(buf), | 57 | addroff, sizeof(buf), |
60 | (char *) &buf, sizeof(buf))) { | 58 | (char *) &buf, sizeof(buf))) { |
61 | if (net_ratelimit()) | 59 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); |
62 | pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet" | ||
63 | " error\n"); | ||
64 | return -1; | 60 | return -1; |
65 | } | 61 | } |
66 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy | 62 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy |
@@ -214,8 +210,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
214 | 210 | ||
215 | /* Run out of expectations */ | 211 | /* Run out of expectations */ |
216 | if (i >= H323_RTP_CHANNEL_MAX) { | 212 | if (i >= H323_RTP_CHANNEL_MAX) { |
217 | if (net_ratelimit()) | 213 | net_notice_ratelimited("nf_nat_h323: out of expectations\n"); |
218 | pr_notice("nf_nat_h323: out of expectations\n"); | ||
219 | return 0; | 214 | return 0; |
220 | } | 215 | } |
221 | 216 | ||
@@ -244,8 +239,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
244 | } | 239 | } |
245 | 240 | ||
246 | if (nated_port == 0) { /* No port available */ | 241 | if (nated_port == 0) { /* No port available */ |
247 | if (net_ratelimit()) | 242 | net_notice_ratelimited("nf_nat_h323: out of RTP ports\n"); |
248 | pr_notice("nf_nat_h323: out of RTP ports\n"); | ||
249 | return 0; | 243 | return 0; |
250 | } | 244 | } |
251 | 245 | ||
@@ -308,8 +302,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
308 | } | 302 | } |
309 | 303 | ||
310 | if (nated_port == 0) { /* No port available */ | 304 | if (nated_port == 0) { /* No port available */ |
311 | if (net_ratelimit()) | 305 | net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); |
312 | pr_notice("nf_nat_h323: out of TCP ports\n"); | ||
313 | return 0; | 306 | return 0; |
314 | } | 307 | } |
315 | 308 | ||
@@ -365,8 +358,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
365 | } | 358 | } |
366 | 359 | ||
367 | if (nated_port == 0) { /* No port available */ | 360 | if (nated_port == 0) { /* No port available */ |
368 | if (net_ratelimit()) | 361 | net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); |
369 | pr_notice("nf_nat_q931: out of TCP ports\n"); | ||
370 | return 0; | 362 | return 0; |
371 | } | 363 | } |
372 | 364 | ||
@@ -456,8 +448,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
456 | } | 448 | } |
457 | 449 | ||
458 | if (nated_port == 0) { /* No port available */ | 450 | if (nated_port == 0) { /* No port available */ |
459 | if (net_ratelimit()) | 451 | net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); |
460 | pr_notice("nf_nat_ras: out of TCP ports\n"); | ||
461 | return 0; | 452 | return 0; |
462 | } | 453 | } |
463 | 454 | ||
@@ -545,8 +536,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
545 | } | 536 | } |
546 | 537 | ||
547 | if (nated_port == 0) { /* No port available */ | 538 | if (nated_port == 0) { /* No port available */ |
548 | if (net_ratelimit()) | 539 | net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); |
549 | pr_notice("nf_nat_q931: out of TCP ports\n"); | ||
550 | return 0; | 540 | return 0; |
551 | } | 541 | } |
552 | 542 | ||
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 57932c43960e..ea4a23813d26 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, | |||
283 | __be32 newip; | 283 | __be32 newip; |
284 | u_int16_t port; | 284 | u_int16_t port; |
285 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | 285 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
286 | unsigned buflen; | 286 | unsigned int buflen; |
287 | 287 | ||
288 | /* Connection will come from reply */ | 288 | /* Connection will come from reply */ |
289 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) | 289 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 2133c30a4a5f..746edec8b86e 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -1206,8 +1206,7 @@ static int snmp_translate(struct nf_conn *ct, | |||
1206 | 1206 | ||
1207 | if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), | 1207 | if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), |
1208 | paylen, &map, &udph->check)) { | 1208 | paylen, &map, &udph->check)) { |
1209 | if (net_ratelimit()) | 1209 | net_warn_ratelimited("bsalg: parser failed\n"); |
1210 | printk(KERN_WARNING "bsalg: parser failed\n"); | ||
1211 | return NF_DROP; | 1210 | return NF_DROP; |
1212 | } | 1211 | } |
1213 | return NF_ACCEPT; | 1212 | return NF_ACCEPT; |
@@ -1241,9 +1240,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, | |||
1241 | * can mess around with the payload. | 1240 | * can mess around with the payload. |
1242 | */ | 1241 | */ |
1243 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { | 1242 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { |
1244 | if (net_ratelimit()) | 1243 | net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", |
1245 | printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", | 1244 | &iph->saddr, &iph->daddr); |
1246 | &iph->saddr, &iph->daddr); | ||
1247 | return NF_DROP; | 1245 | return NF_DROP; |
1248 | } | 1246 | } |
1249 | 1247 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 50009c787bcd..6e930c7174dd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
@@ -51,15 +51,16 @@ static struct ping_table ping_table; | |||
51 | 51 | ||
52 | static u16 ping_port_rover; | 52 | static u16 ping_port_rover; |
53 | 53 | ||
54 | static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) | 54 | static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) |
55 | { | 55 | { |
56 | int res = (num + net_hash_mix(net)) & mask; | 56 | int res = (num + net_hash_mix(net)) & mask; |
57 | |||
57 | pr_debug("hash(%d) = %d\n", num, res); | 58 | pr_debug("hash(%d) = %d\n", num, res); |
58 | return res; | 59 | return res; |
59 | } | 60 | } |
60 | 61 | ||
61 | static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, | 62 | static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, |
62 | struct net *net, unsigned num) | 63 | struct net *net, unsigned int num) |
63 | { | 64 | { |
64 | return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; | 65 | return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; |
65 | } | 66 | } |
@@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, | |||
188 | gid_t *high) | 189 | gid_t *high) |
189 | { | 190 | { |
190 | gid_t *data = net->ipv4.sysctl_ping_group_range; | 191 | gid_t *data = net->ipv4.sysctl_ping_group_range; |
191 | unsigned seq; | 192 | unsigned int seq; |
193 | |||
192 | do { | 194 | do { |
193 | seq = read_seqbegin(&sysctl_local_ports.lock); | 195 | seq = read_seqbegin(&sysctl_local_ports.lock); |
194 | 196 | ||
@@ -410,7 +412,7 @@ struct pingfakehdr { | |||
410 | __wsum wcheck; | 412 | __wsum wcheck; |
411 | }; | 413 | }; |
412 | 414 | ||
413 | static int ping_getfrag(void *from, char * to, | 415 | static int ping_getfrag(void *from, char *to, |
414 | int offset, int fraglen, int odd, struct sk_buff *skb) | 416 | int offset, int fraglen, int odd, struct sk_buff *skb) |
415 | { | 417 | { |
416 | struct pingfakehdr *pfh = (struct pingfakehdr *)from; | 418 | struct pingfakehdr *pfh = (struct pingfakehdr *)from; |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbd604c68e68..4032b818f3e4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
288 | read_unlock(&raw_v4_hashinfo.lock); | 288 | read_unlock(&raw_v4_hashinfo.lock); |
289 | } | 289 | } |
290 | 290 | ||
291 | static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) | 291 | static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) |
292 | { | 292 | { |
293 | /* Charge it to the socket. */ | 293 | /* Charge it to the socket. */ |
294 | 294 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 167ea10b521a..76e5880cdb07 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -109,6 +109,7 @@ | |||
109 | #include <net/rtnetlink.h> | 109 | #include <net/rtnetlink.h> |
110 | #ifdef CONFIG_SYSCTL | 110 | #ifdef CONFIG_SYSCTL |
111 | #include <linux/sysctl.h> | 111 | #include <linux/sysctl.h> |
112 | #include <linux/kmemleak.h> | ||
112 | #endif | 113 | #endif |
113 | #include <net/secure_seq.h> | 114 | #include <net/secure_seq.h> |
114 | 115 | ||
@@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = { | |||
229 | TC_PRIO_INTERACTIVE_BULK, | 230 | TC_PRIO_INTERACTIVE_BULK, |
230 | ECN_OR_COST(INTERACTIVE_BULK) | 231 | ECN_OR_COST(INTERACTIVE_BULK) |
231 | }; | 232 | }; |
232 | 233 | EXPORT_SYMBOL(ip_tos2prio); | |
233 | 234 | ||
234 | /* | 235 | /* |
235 | * Route cache. | 236 | * Route cache. |
@@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void) | |||
296 | #endif | 297 | #endif |
297 | 298 | ||
298 | static struct rt_hash_bucket *rt_hash_table __read_mostly; | 299 | static struct rt_hash_bucket *rt_hash_table __read_mostly; |
299 | static unsigned rt_hash_mask __read_mostly; | 300 | static unsigned int rt_hash_mask __read_mostly; |
300 | static unsigned int rt_hash_log __read_mostly; | 301 | static unsigned int rt_hash_log __read_mostly; |
301 | 302 | ||
302 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 303 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
@@ -959,8 +960,7 @@ void rt_cache_flush_batch(struct net *net) | |||
959 | 960 | ||
960 | static void rt_emergency_hash_rebuild(struct net *net) | 961 | static void rt_emergency_hash_rebuild(struct net *net) |
961 | { | 962 | { |
962 | if (net_ratelimit()) | 963 | net_warn_ratelimited("Route hash chain too long!\n"); |
963 | pr_warn("Route hash chain too long!\n"); | ||
964 | rt_cache_invalidate(net); | 964 | rt_cache_invalidate(net); |
965 | } | 965 | } |
966 | 966 | ||
@@ -1083,8 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1083 | goto out; | 1083 | goto out; |
1084 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | 1084 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) |
1085 | goto out; | 1085 | goto out; |
1086 | if (net_ratelimit()) | 1086 | net_warn_ratelimited("dst cache overflow\n"); |
1087 | pr_warn("dst cache overflow\n"); | ||
1088 | RT_CACHE_STAT_INC(gc_dst_overflow); | 1087 | RT_CACHE_STAT_INC(gc_dst_overflow); |
1089 | return 1; | 1088 | return 1; |
1090 | 1089 | ||
@@ -1143,7 +1142,7 @@ static int rt_bind_neighbour(struct rtable *rt) | |||
1143 | return 0; | 1142 | return 0; |
1144 | } | 1143 | } |
1145 | 1144 | ||
1146 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, | 1145 | static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, |
1147 | struct sk_buff *skb, int ifindex) | 1146 | struct sk_buff *skb, int ifindex) |
1148 | { | 1147 | { |
1149 | struct rtable *rth, *cand; | 1148 | struct rtable *rth, *cand; |
@@ -1181,8 +1180,7 @@ restart: | |||
1181 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1180 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1182 | int err = rt_bind_neighbour(rt); | 1181 | int err = rt_bind_neighbour(rt); |
1183 | if (err) { | 1182 | if (err) { |
1184 | if (net_ratelimit()) | 1183 | net_warn_ratelimited("Neighbour table failure & not caching routes\n"); |
1185 | pr_warn("Neighbour table failure & not caching routes\n"); | ||
1186 | ip_rt_put(rt); | 1184 | ip_rt_put(rt); |
1187 | return ERR_PTR(err); | 1185 | return ERR_PTR(err); |
1188 | } | 1186 | } |
@@ -1298,8 +1296,7 @@ restart: | |||
1298 | goto restart; | 1296 | goto restart; |
1299 | } | 1297 | } |
1300 | 1298 | ||
1301 | if (net_ratelimit()) | 1299 | net_warn_ratelimited("Neighbour table overflow\n"); |
1302 | pr_warn("Neighbour table overflow\n"); | ||
1303 | rt_drop(rt); | 1300 | rt_drop(rt); |
1304 | return ERR_PTR(-ENOBUFS); | 1301 | return ERR_PTR(-ENOBUFS); |
1305 | } | 1302 | } |
@@ -1377,14 +1374,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1377 | return; | 1374 | return; |
1378 | } | 1375 | } |
1379 | } else if (!rt) | 1376 | } else if (!rt) |
1380 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", | 1377 | pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0)); |
1381 | __builtin_return_address(0)); | ||
1382 | 1378 | ||
1383 | ip_select_fb_ident(iph); | 1379 | ip_select_fb_ident(iph); |
1384 | } | 1380 | } |
1385 | EXPORT_SYMBOL(__ip_select_ident); | 1381 | EXPORT_SYMBOL(__ip_select_ident); |
1386 | 1382 | ||
1387 | static void rt_del(unsigned hash, struct rtable *rt) | 1383 | static void rt_del(unsigned int hash, struct rtable *rt) |
1388 | { | 1384 | { |
1389 | struct rtable __rcu **rthp; | 1385 | struct rtable __rcu **rthp; |
1390 | struct rtable *aux; | 1386 | struct rtable *aux; |
@@ -1502,11 +1498,11 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1502 | 1498 | ||
1503 | reject_redirect: | 1499 | reject_redirect: |
1504 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1500 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1505 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1501 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
1506 | pr_info("Redirect from %pI4 on %s about %pI4 ignored\n" | 1502 | net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" |
1507 | " Advised path = %pI4 -> %pI4\n", | 1503 | " Advised path = %pI4 -> %pI4\n", |
1508 | &old_gw, dev->name, &new_gw, | 1504 | &old_gw, dev->name, &new_gw, |
1509 | &saddr, &daddr); | 1505 | &saddr, &daddr); |
1510 | #endif | 1506 | #endif |
1511 | ; | 1507 | ; |
1512 | } | 1508 | } |
@@ -1538,7 +1534,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1538 | ip_rt_put(rt); | 1534 | ip_rt_put(rt); |
1539 | ret = NULL; | 1535 | ret = NULL; |
1540 | } else if (rt->rt_flags & RTCF_REDIRECTED) { | 1536 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1541 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1537 | unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1542 | rt->rt_oif, | 1538 | rt->rt_oif, |
1543 | rt_genid(dev_net(dst->dev))); | 1539 | rt_genid(dev_net(dst->dev))); |
1544 | rt_del(hash, rt); | 1540 | rt_del(hash, rt); |
@@ -1616,11 +1612,10 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1616 | ++peer->rate_tokens; | 1612 | ++peer->rate_tokens; |
1617 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1613 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1618 | if (log_martians && | 1614 | if (log_martians && |
1619 | peer->rate_tokens == ip_rt_redirect_number && | 1615 | peer->rate_tokens == ip_rt_redirect_number) |
1620 | net_ratelimit()) | 1616 | net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", |
1621 | pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", | 1617 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1622 | &ip_hdr(skb)->saddr, rt->rt_iif, | 1618 | &rt->rt_dst, &rt->rt_gateway); |
1623 | &rt->rt_dst, &rt->rt_gateway); | ||
1624 | #endif | 1619 | #endif |
1625 | } | 1620 | } |
1626 | } | 1621 | } |
@@ -1843,9 +1838,9 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1843 | 1838 | ||
1844 | static int ip_rt_bug(struct sk_buff *skb) | 1839 | static int ip_rt_bug(struct sk_buff *skb) |
1845 | { | 1840 | { |
1846 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", | 1841 | pr_debug("%s: %pI4 -> %pI4, %s\n", |
1847 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 1842 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1848 | skb->dev ? skb->dev->name : "?"); | 1843 | skb->dev ? skb->dev->name : "?"); |
1849 | kfree_skb(skb); | 1844 | kfree_skb(skb); |
1850 | WARN_ON(1); | 1845 | WARN_ON(1); |
1851 | return 0; | 1846 | return 0; |
@@ -2134,8 +2129,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2134 | /* get a working reference to the output device */ | 2129 | /* get a working reference to the output device */ |
2135 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); | 2130 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
2136 | if (out_dev == NULL) { | 2131 | if (out_dev == NULL) { |
2137 | if (net_ratelimit()) | 2132 | net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); |
2138 | pr_crit("Bug in ip_route_input_slow(). Please report.\n"); | ||
2139 | return -EINVAL; | 2133 | return -EINVAL; |
2140 | } | 2134 | } |
2141 | 2135 | ||
@@ -2215,9 +2209,9 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2215 | struct in_device *in_dev, | 2209 | struct in_device *in_dev, |
2216 | __be32 daddr, __be32 saddr, u32 tos) | 2210 | __be32 daddr, __be32 saddr, u32 tos) |
2217 | { | 2211 | { |
2218 | struct rtable* rth = NULL; | 2212 | struct rtable *rth = NULL; |
2219 | int err; | 2213 | int err; |
2220 | unsigned hash; | 2214 | unsigned int hash; |
2221 | 2215 | ||
2222 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2216 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2223 | if (res->fi && res->fi->fib_nhs > 1) | 2217 | if (res->fi && res->fi->fib_nhs > 1) |
@@ -2255,13 +2249,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2255 | struct fib_result res; | 2249 | struct fib_result res; |
2256 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2250 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2257 | struct flowi4 fl4; | 2251 | struct flowi4 fl4; |
2258 | unsigned flags = 0; | 2252 | unsigned int flags = 0; |
2259 | u32 itag = 0; | 2253 | u32 itag = 0; |
2260 | struct rtable * rth; | 2254 | struct rtable *rth; |
2261 | unsigned hash; | 2255 | unsigned int hash; |
2262 | __be32 spec_dst; | 2256 | __be32 spec_dst; |
2263 | int err = -EINVAL; | 2257 | int err = -EINVAL; |
2264 | struct net * net = dev_net(dev); | 2258 | struct net *net = dev_net(dev); |
2265 | 2259 | ||
2266 | /* IP on this device is disabled. */ | 2260 | /* IP on this device is disabled. */ |
2267 | 2261 | ||
@@ -2406,9 +2400,9 @@ no_route: | |||
2406 | martian_destination: | 2400 | martian_destination: |
2407 | RT_CACHE_STAT_INC(in_martian_dst); | 2401 | RT_CACHE_STAT_INC(in_martian_dst); |
2408 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2402 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2409 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2403 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
2410 | pr_warn("martian destination %pI4 from %pI4, dev %s\n", | 2404 | net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", |
2411 | &daddr, &saddr, dev->name); | 2405 | &daddr, &saddr, dev->name); |
2412 | #endif | 2406 | #endif |
2413 | 2407 | ||
2414 | e_hostunreach: | 2408 | e_hostunreach: |
@@ -2433,8 +2427,8 @@ martian_source_keep_err: | |||
2433 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2427 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2434 | u8 tos, struct net_device *dev, bool noref) | 2428 | u8 tos, struct net_device *dev, bool noref) |
2435 | { | 2429 | { |
2436 | struct rtable * rth; | 2430 | struct rtable *rth; |
2437 | unsigned hash; | 2431 | unsigned int hash; |
2438 | int iif = dev->ifindex; | 2432 | int iif = dev->ifindex; |
2439 | struct net *net; | 2433 | struct net *net; |
2440 | int res; | 2434 | int res; |
@@ -2972,7 +2966,8 @@ static int rt_fill_info(struct net *net, | |||
2972 | r->rtm_src_len = 0; | 2966 | r->rtm_src_len = 0; |
2973 | r->rtm_tos = rt->rt_key_tos; | 2967 | r->rtm_tos = rt->rt_key_tos; |
2974 | r->rtm_table = RT_TABLE_MAIN; | 2968 | r->rtm_table = RT_TABLE_MAIN; |
2975 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2969 | if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) |
2970 | goto nla_put_failure; | ||
2976 | r->rtm_type = rt->rt_type; | 2971 | r->rtm_type = rt->rt_type; |
2977 | r->rtm_scope = RT_SCOPE_UNIVERSE; | 2972 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
2978 | r->rtm_protocol = RTPROT_UNSPEC; | 2973 | r->rtm_protocol = RTPROT_UNSPEC; |
@@ -2980,31 +2975,38 @@ static int rt_fill_info(struct net *net, | |||
2980 | if (rt->rt_flags & RTCF_NOTIFY) | 2975 | if (rt->rt_flags & RTCF_NOTIFY) |
2981 | r->rtm_flags |= RTM_F_NOTIFY; | 2976 | r->rtm_flags |= RTM_F_NOTIFY; |
2982 | 2977 | ||
2983 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2978 | if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) |
2984 | 2979 | goto nla_put_failure; | |
2985 | if (rt->rt_key_src) { | 2980 | if (rt->rt_key_src) { |
2986 | r->rtm_src_len = 32; | 2981 | r->rtm_src_len = 32; |
2987 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); | 2982 | if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) |
2983 | goto nla_put_failure; | ||
2988 | } | 2984 | } |
2989 | if (rt->dst.dev) | 2985 | if (rt->dst.dev && |
2990 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2986 | nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) |
2987 | goto nla_put_failure; | ||
2991 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2988 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2992 | if (rt->dst.tclassid) | 2989 | if (rt->dst.tclassid && |
2993 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2990 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) |
2991 | goto nla_put_failure; | ||
2994 | #endif | 2992 | #endif |
2995 | if (rt_is_input_route(rt)) | 2993 | if (rt_is_input_route(rt)) { |
2996 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2994 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) |
2997 | else if (rt->rt_src != rt->rt_key_src) | 2995 | goto nla_put_failure; |
2998 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2996 | } else if (rt->rt_src != rt->rt_key_src) { |
2999 | 2997 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) | |
3000 | if (rt->rt_dst != rt->rt_gateway) | 2998 | goto nla_put_failure; |
3001 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2999 | } |
3000 | if (rt->rt_dst != rt->rt_gateway && | ||
3001 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) | ||
3002 | goto nla_put_failure; | ||
3002 | 3003 | ||
3003 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 3004 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
3004 | goto nla_put_failure; | 3005 | goto nla_put_failure; |
3005 | 3006 | ||
3006 | if (rt->rt_mark) | 3007 | if (rt->rt_mark && |
3007 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); | 3008 | nla_put_be32(skb, RTA_MARK, rt->rt_mark)) |
3009 | goto nla_put_failure; | ||
3008 | 3010 | ||
3009 | error = rt->dst.error; | 3011 | error = rt->dst.error; |
3010 | if (peer) { | 3012 | if (peer) { |
@@ -3045,7 +3047,8 @@ static int rt_fill_info(struct net *net, | |||
3045 | } | 3047 | } |
3046 | } else | 3048 | } else |
3047 | #endif | 3049 | #endif |
3048 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); | 3050 | if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) |
3051 | goto nla_put_failure; | ||
3049 | } | 3052 | } |
3050 | 3053 | ||
3051 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 3054 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
@@ -3059,7 +3062,7 @@ nla_put_failure: | |||
3059 | return -EMSGSIZE; | 3062 | return -EMSGSIZE; |
3060 | } | 3063 | } |
3061 | 3064 | ||
3062 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 3065 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) |
3063 | { | 3066 | { |
3064 | struct net *net = sock_net(in_skb->sk); | 3067 | struct net *net = sock_net(in_skb->sk); |
3065 | struct rtmsg *rtm; | 3068 | struct rtmsg *rtm; |
@@ -3334,23 +3337,6 @@ static ctl_table ipv4_route_table[] = { | |||
3334 | { } | 3337 | { } |
3335 | }; | 3338 | }; |
3336 | 3339 | ||
3337 | static struct ctl_table empty[1]; | ||
3338 | |||
3339 | static struct ctl_table ipv4_skeleton[] = | ||
3340 | { | ||
3341 | { .procname = "route", | ||
3342 | .mode = 0555, .child = ipv4_route_table}, | ||
3343 | { .procname = "neigh", | ||
3344 | .mode = 0555, .child = empty}, | ||
3345 | { } | ||
3346 | }; | ||
3347 | |||
3348 | static __net_initdata struct ctl_path ipv4_path[] = { | ||
3349 | { .procname = "net", }, | ||
3350 | { .procname = "ipv4", }, | ||
3351 | { }, | ||
3352 | }; | ||
3353 | |||
3354 | static struct ctl_table ipv4_route_flush_table[] = { | 3340 | static struct ctl_table ipv4_route_flush_table[] = { |
3355 | { | 3341 | { |
3356 | .procname = "flush", | 3342 | .procname = "flush", |
@@ -3361,13 +3347,6 @@ static struct ctl_table ipv4_route_flush_table[] = { | |||
3361 | { }, | 3347 | { }, |
3362 | }; | 3348 | }; |
3363 | 3349 | ||
3364 | static __net_initdata struct ctl_path ipv4_route_path[] = { | ||
3365 | { .procname = "net", }, | ||
3366 | { .procname = "ipv4", }, | ||
3367 | { .procname = "route", }, | ||
3368 | { }, | ||
3369 | }; | ||
3370 | |||
3371 | static __net_init int sysctl_route_net_init(struct net *net) | 3350 | static __net_init int sysctl_route_net_init(struct net *net) |
3372 | { | 3351 | { |
3373 | struct ctl_table *tbl; | 3352 | struct ctl_table *tbl; |
@@ -3380,8 +3359,7 @@ static __net_init int sysctl_route_net_init(struct net *net) | |||
3380 | } | 3359 | } |
3381 | tbl[0].extra1 = net; | 3360 | tbl[0].extra1 = net; |
3382 | 3361 | ||
3383 | net->ipv4.route_hdr = | 3362 | net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); |
3384 | register_net_sysctl_table(net, ipv4_route_path, tbl); | ||
3385 | if (net->ipv4.route_hdr == NULL) | 3363 | if (net->ipv4.route_hdr == NULL) |
3386 | goto err_reg; | 3364 | goto err_reg; |
3387 | return 0; | 3365 | return 0; |
@@ -3505,6 +3483,6 @@ int __init ip_rt_init(void) | |||
3505 | */ | 3483 | */ |
3506 | void __init ip_static_sysctl_init(void) | 3484 | void __init ip_static_sysctl_init(void) |
3507 | { | 3485 | { |
3508 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3486 | register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); |
3509 | } | 3487 | } |
3510 | #endif | 3488 | #endif |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7a7724da9bff..ef32956ed655 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <net/tcp_memcontrol.h> | 27 | #include <net/tcp_memcontrol.h> |
28 | 28 | ||
29 | static int zero; | 29 | static int zero; |
30 | static int two = 2; | ||
30 | static int tcp_retr1_max = 255; | 31 | static int tcp_retr1_max = 255; |
31 | static int ip_local_port_range_min[] = { 1, 1 }; | 32 | static int ip_local_port_range_min[] = { 1, 1 }; |
32 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 33 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
@@ -78,7 +79,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, | |||
78 | static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) | 79 | static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) |
79 | { | 80 | { |
80 | gid_t *data = table->data; | 81 | gid_t *data = table->data; |
81 | unsigned seq; | 82 | unsigned int seq; |
82 | do { | 83 | do { |
83 | seq = read_seqbegin(&sysctl_local_ports.lock); | 84 | seq = read_seqbegin(&sysctl_local_ports.lock); |
84 | 85 | ||
@@ -677,6 +678,15 @@ static struct ctl_table ipv4_table[] = { | |||
677 | .proc_handler = proc_dointvec | 678 | .proc_handler = proc_dointvec |
678 | }, | 679 | }, |
679 | { | 680 | { |
681 | .procname = "tcp_early_retrans", | ||
682 | .data = &sysctl_tcp_early_retrans, | ||
683 | .maxlen = sizeof(int), | ||
684 | .mode = 0644, | ||
685 | .proc_handler = proc_dointvec_minmax, | ||
686 | .extra1 = &zero, | ||
687 | .extra2 = &two, | ||
688 | }, | ||
689 | { | ||
680 | .procname = "udp_mem", | 690 | .procname = "udp_mem", |
681 | .data = &sysctl_udp_mem, | 691 | .data = &sysctl_udp_mem, |
682 | .maxlen = sizeof(sysctl_udp_mem), | 692 | .maxlen = sizeof(sysctl_udp_mem), |
@@ -768,13 +778,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
768 | { } | 778 | { } |
769 | }; | 779 | }; |
770 | 780 | ||
771 | struct ctl_path net_ipv4_ctl_path[] = { | ||
772 | { .procname = "net", }, | ||
773 | { .procname = "ipv4", }, | ||
774 | { }, | ||
775 | }; | ||
776 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); | ||
777 | |||
778 | static __net_init int ipv4_sysctl_init_net(struct net *net) | 781 | static __net_init int ipv4_sysctl_init_net(struct net *net) |
779 | { | 782 | { |
780 | struct ctl_table *table; | 783 | struct ctl_table *table; |
@@ -815,8 +818,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
815 | 818 | ||
816 | tcp_init_mem(net); | 819 | tcp_init_mem(net); |
817 | 820 | ||
818 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 821 | net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); |
819 | net_ipv4_ctl_path, table); | ||
820 | if (net->ipv4.ipv4_hdr == NULL) | 822 | if (net->ipv4.ipv4_hdr == NULL) |
821 | goto err_reg; | 823 | goto err_reg; |
822 | 824 | ||
@@ -857,12 +859,12 @@ static __init int sysctl_ipv4_init(void) | |||
857 | if (!i->procname) | 859 | if (!i->procname) |
858 | return -EINVAL; | 860 | return -EINVAL; |
859 | 861 | ||
860 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); | 862 | hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); |
861 | if (hdr == NULL) | 863 | if (hdr == NULL) |
862 | return -ENOMEM; | 864 | return -ENOMEM; |
863 | 865 | ||
864 | if (register_pernet_subsys(&ipv4_sysctl_ops)) { | 866 | if (register_pernet_subsys(&ipv4_sysctl_ops)) { |
865 | unregister_sysctl_table(hdr); | 867 | unregister_net_sysctl_table(hdr); |
866 | return -ENOMEM; | 868 | return -ENOMEM; |
867 | } | 869 | } |
868 | 870 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1272a88c2a63..feb2e25091b1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) | |||
363 | return period; | 363 | return period; |
364 | } | 364 | } |
365 | 365 | ||
366 | /* Address-family independent initialization for a tcp_sock. | ||
367 | * | ||
368 | * NOTE: A lot of things set to zero explicitly by call to | ||
369 | * sk_alloc() so need not be done here. | ||
370 | */ | ||
371 | void tcp_init_sock(struct sock *sk) | ||
372 | { | ||
373 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
374 | struct tcp_sock *tp = tcp_sk(sk); | ||
375 | |||
376 | skb_queue_head_init(&tp->out_of_order_queue); | ||
377 | tcp_init_xmit_timers(sk); | ||
378 | tcp_prequeue_init(tp); | ||
379 | |||
380 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
381 | tp->mdev = TCP_TIMEOUT_INIT; | ||
382 | |||
383 | /* So many TCP implementations out there (incorrectly) count the | ||
384 | * initial SYN frame in their delayed-ACK and congestion control | ||
385 | * algorithms that we must have the following bandaid to talk | ||
386 | * efficiently to them. -DaveM | ||
387 | */ | ||
388 | tp->snd_cwnd = TCP_INIT_CWND; | ||
389 | |||
390 | /* See draft-stevens-tcpca-spec-01 for discussion of the | ||
391 | * initialization of these values. | ||
392 | */ | ||
393 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
394 | tp->snd_cwnd_clamp = ~0; | ||
395 | tp->mss_cache = TCP_MSS_DEFAULT; | ||
396 | |||
397 | tp->reordering = sysctl_tcp_reordering; | ||
398 | tcp_enable_early_retrans(tp); | ||
399 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
400 | |||
401 | sk->sk_state = TCP_CLOSE; | ||
402 | |||
403 | sk->sk_write_space = sk_stream_write_space; | ||
404 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
405 | |||
406 | icsk->icsk_sync_mss = tcp_sync_mss; | ||
407 | |||
408 | /* TCP Cookie Transactions */ | ||
409 | if (sysctl_tcp_cookie_size > 0) { | ||
410 | /* Default, cookies without s_data_payload. */ | ||
411 | tp->cookie_values = | ||
412 | kzalloc(sizeof(*tp->cookie_values), | ||
413 | sk->sk_allocation); | ||
414 | if (tp->cookie_values != NULL) | ||
415 | kref_init(&tp->cookie_values->kref); | ||
416 | } | ||
417 | /* Presumed zeroed, in order of appearance: | ||
418 | * cookie_in_always, cookie_out_never, | ||
419 | * s_data_constant, s_data_in, s_data_out | ||
420 | */ | ||
421 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | ||
422 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | ||
423 | |||
424 | local_bh_disable(); | ||
425 | sock_update_memcg(sk); | ||
426 | sk_sockets_allocated_inc(sk); | ||
427 | local_bh_enable(); | ||
428 | } | ||
429 | EXPORT_SYMBOL(tcp_init_sock); | ||
430 | |||
366 | /* | 431 | /* |
367 | * Wait for a TCP event. | 432 | * Wait for a TCP event. |
368 | * | 433 | * |
@@ -784,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
784 | while (psize > 0) { | 849 | while (psize > 0) { |
785 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 850 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
786 | struct page *page = pages[poffset / PAGE_SIZE]; | 851 | struct page *page = pages[poffset / PAGE_SIZE]; |
787 | int copy, i, can_coalesce; | 852 | int copy, i; |
788 | int offset = poffset % PAGE_SIZE; | 853 | int offset = poffset % PAGE_SIZE; |
789 | int size = min_t(size_t, psize, PAGE_SIZE - offset); | 854 | int size = min_t(size_t, psize, PAGE_SIZE - offset); |
855 | bool can_coalesce; | ||
790 | 856 | ||
791 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { | 857 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { |
792 | new_segment: | 858 | new_segment: |
@@ -919,7 +985,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
919 | struct tcp_sock *tp = tcp_sk(sk); | 985 | struct tcp_sock *tp = tcp_sk(sk); |
920 | struct sk_buff *skb; | 986 | struct sk_buff *skb; |
921 | int iovlen, flags, err, copied; | 987 | int iovlen, flags, err, copied; |
922 | int mss_now, size_goal; | 988 | int mss_now = 0, size_goal; |
923 | bool sg; | 989 | bool sg; |
924 | long timeo; | 990 | long timeo; |
925 | 991 | ||
@@ -933,6 +999,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
933 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 999 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
934 | goto out_err; | 1000 | goto out_err; |
935 | 1001 | ||
1002 | if (unlikely(tp->repair)) { | ||
1003 | if (tp->repair_queue == TCP_RECV_QUEUE) { | ||
1004 | copied = tcp_send_rcvq(sk, msg, size); | ||
1005 | goto out; | ||
1006 | } | ||
1007 | |||
1008 | err = -EINVAL; | ||
1009 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1010 | goto out_err; | ||
1011 | |||
1012 | /* 'common' sending to sendq */ | ||
1013 | } | ||
1014 | |||
936 | /* This should be in poll */ | 1015 | /* This should be in poll */ |
937 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 1016 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
938 | 1017 | ||
@@ -1089,7 +1168,7 @@ new_segment: | |||
1089 | if ((seglen -= copy) == 0 && iovlen == 0) | 1168 | if ((seglen -= copy) == 0 && iovlen == 0) |
1090 | goto out; | 1169 | goto out; |
1091 | 1170 | ||
1092 | if (skb->len < max || (flags & MSG_OOB)) | 1171 | if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) |
1093 | continue; | 1172 | continue; |
1094 | 1173 | ||
1095 | if (forced_push(tp)) { | 1174 | if (forced_push(tp)) { |
@@ -1102,7 +1181,7 @@ new_segment: | |||
1102 | wait_for_sndbuf: | 1181 | wait_for_sndbuf: |
1103 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1182 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1104 | wait_for_memory: | 1183 | wait_for_memory: |
1105 | if (copied) | 1184 | if (copied && likely(!tp->repair)) |
1106 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 1185 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); |
1107 | 1186 | ||
1108 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1187 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
@@ -1113,7 +1192,7 @@ wait_for_memory: | |||
1113 | } | 1192 | } |
1114 | 1193 | ||
1115 | out: | 1194 | out: |
1116 | if (copied) | 1195 | if (copied && likely(!tp->repair)) |
1117 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1196 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1118 | release_sock(sk); | 1197 | release_sock(sk); |
1119 | return copied; | 1198 | return copied; |
@@ -1187,6 +1266,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) | |||
1187 | return -EAGAIN; | 1266 | return -EAGAIN; |
1188 | } | 1267 | } |
1189 | 1268 | ||
1269 | static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) | ||
1270 | { | ||
1271 | struct sk_buff *skb; | ||
1272 | int copied = 0, err = 0; | ||
1273 | |||
1274 | /* XXX -- need to support SO_PEEK_OFF */ | ||
1275 | |||
1276 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
1277 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len); | ||
1278 | if (err) | ||
1279 | break; | ||
1280 | |||
1281 | copied += skb->len; | ||
1282 | } | ||
1283 | |||
1284 | return err ?: copied; | ||
1285 | } | ||
1286 | |||
1190 | /* Clean up the receive buffer for full frames taken by the user, | 1287 | /* Clean up the receive buffer for full frames taken by the user, |
1191 | * then send an ACK if necessary. COPIED is the number of bytes | 1288 | * then send an ACK if necessary. COPIED is the number of bytes |
1192 | * tcp_recvmsg has given to the user so far, it speeds up the | 1289 | * tcp_recvmsg has given to the user so far, it speeds up the |
@@ -1432,6 +1529,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1432 | if (flags & MSG_OOB) | 1529 | if (flags & MSG_OOB) |
1433 | goto recv_urg; | 1530 | goto recv_urg; |
1434 | 1531 | ||
1532 | if (unlikely(tp->repair)) { | ||
1533 | err = -EPERM; | ||
1534 | if (!(flags & MSG_PEEK)) | ||
1535 | goto out; | ||
1536 | |||
1537 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
1538 | goto recv_sndq; | ||
1539 | |||
1540 | err = -EINVAL; | ||
1541 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1542 | goto out; | ||
1543 | |||
1544 | /* 'common' recv queue MSG_PEEK-ing */ | ||
1545 | } | ||
1546 | |||
1435 | seq = &tp->copied_seq; | 1547 | seq = &tp->copied_seq; |
1436 | if (flags & MSG_PEEK) { | 1548 | if (flags & MSG_PEEK) { |
1437 | peek_seq = tp->copied_seq; | 1549 | peek_seq = tp->copied_seq; |
@@ -1633,9 +1745,9 @@ do_prequeue: | |||
1633 | } | 1745 | } |
1634 | if ((flags & MSG_PEEK) && | 1746 | if ((flags & MSG_PEEK) && |
1635 | (peek_seq - copied - urg_hole != tp->copied_seq)) { | 1747 | (peek_seq - copied - urg_hole != tp->copied_seq)) { |
1636 | if (net_ratelimit()) | 1748 | net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", |
1637 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", | 1749 | current->comm, |
1638 | current->comm, task_pid_nr(current)); | 1750 | task_pid_nr(current)); |
1639 | peek_seq = tp->copied_seq; | 1751 | peek_seq = tp->copied_seq; |
1640 | } | 1752 | } |
1641 | continue; | 1753 | continue; |
@@ -1783,6 +1895,10 @@ out: | |||
1783 | recv_urg: | 1895 | recv_urg: |
1784 | err = tcp_recv_urg(sk, msg, len, flags); | 1896 | err = tcp_recv_urg(sk, msg, len, flags); |
1785 | goto out; | 1897 | goto out; |
1898 | |||
1899 | recv_sndq: | ||
1900 | err = tcp_peek_sndq(sk, msg, len); | ||
1901 | goto out; | ||
1786 | } | 1902 | } |
1787 | EXPORT_SYMBOL(tcp_recvmsg); | 1903 | EXPORT_SYMBOL(tcp_recvmsg); |
1788 | 1904 | ||
@@ -1886,10 +2002,10 @@ bool tcp_check_oom(struct sock *sk, int shift) | |||
1886 | too_many_orphans = tcp_too_many_orphans(sk, shift); | 2002 | too_many_orphans = tcp_too_many_orphans(sk, shift); |
1887 | out_of_socket_memory = tcp_out_of_memory(sk); | 2003 | out_of_socket_memory = tcp_out_of_memory(sk); |
1888 | 2004 | ||
1889 | if (too_many_orphans && net_ratelimit()) | 2005 | if (too_many_orphans) |
1890 | pr_info("too many orphaned sockets\n"); | 2006 | net_info_ratelimited("too many orphaned sockets\n"); |
1891 | if (out_of_socket_memory && net_ratelimit()) | 2007 | if (out_of_socket_memory) |
1892 | pr_info("out of memory -- consider tuning tcp_mem\n"); | 2008 | net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); |
1893 | return too_many_orphans || out_of_socket_memory; | 2009 | return too_many_orphans || out_of_socket_memory; |
1894 | } | 2010 | } |
1895 | 2011 | ||
@@ -1935,7 +2051,9 @@ void tcp_close(struct sock *sk, long timeout) | |||
1935 | * advertise a zero window, then kill -9 the FTP client, wheee... | 2051 | * advertise a zero window, then kill -9 the FTP client, wheee... |
1936 | * Note: timeout is always zero in such a case. | 2052 | * Note: timeout is always zero in such a case. |
1937 | */ | 2053 | */ |
1938 | if (data_was_unread) { | 2054 | if (unlikely(tcp_sk(sk)->repair)) { |
2055 | sk->sk_prot->disconnect(sk, 0); | ||
2056 | } else if (data_was_unread) { | ||
1939 | /* Unread data was tossed, zap the connection. */ | 2057 | /* Unread data was tossed, zap the connection. */ |
1940 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); | 2058 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); |
1941 | tcp_set_state(sk, TCP_CLOSE); | 2059 | tcp_set_state(sk, TCP_CLOSE); |
@@ -2074,6 +2192,8 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2074 | /* ABORT function of RFC793 */ | 2192 | /* ABORT function of RFC793 */ |
2075 | if (old_state == TCP_LISTEN) { | 2193 | if (old_state == TCP_LISTEN) { |
2076 | inet_csk_listen_stop(sk); | 2194 | inet_csk_listen_stop(sk); |
2195 | } else if (unlikely(tp->repair)) { | ||
2196 | sk->sk_err = ECONNABORTED; | ||
2077 | } else if (tcp_need_reset(old_state) || | 2197 | } else if (tcp_need_reset(old_state) || |
2078 | (tp->snd_nxt != tp->write_seq && | 2198 | (tp->snd_nxt != tp->write_seq && |
2079 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { | 2199 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { |
@@ -2125,6 +2245,54 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2125 | } | 2245 | } |
2126 | EXPORT_SYMBOL(tcp_disconnect); | 2246 | EXPORT_SYMBOL(tcp_disconnect); |
2127 | 2247 | ||
2248 | static inline int tcp_can_repair_sock(struct sock *sk) | ||
2249 | { | ||
2250 | return capable(CAP_NET_ADMIN) && | ||
2251 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); | ||
2252 | } | ||
2253 | |||
2254 | static int tcp_repair_options_est(struct tcp_sock *tp, | ||
2255 | struct tcp_repair_opt __user *optbuf, unsigned int len) | ||
2256 | { | ||
2257 | struct tcp_repair_opt opt; | ||
2258 | |||
2259 | while (len >= sizeof(opt)) { | ||
2260 | if (copy_from_user(&opt, optbuf, sizeof(opt))) | ||
2261 | return -EFAULT; | ||
2262 | |||
2263 | optbuf++; | ||
2264 | len -= sizeof(opt); | ||
2265 | |||
2266 | switch (opt.opt_code) { | ||
2267 | case TCPOPT_MSS: | ||
2268 | tp->rx_opt.mss_clamp = opt.opt_val; | ||
2269 | break; | ||
2270 | case TCPOPT_WINDOW: | ||
2271 | if (opt.opt_val > 14) | ||
2272 | return -EFBIG; | ||
2273 | |||
2274 | tp->rx_opt.snd_wscale = opt.opt_val; | ||
2275 | break; | ||
2276 | case TCPOPT_SACK_PERM: | ||
2277 | if (opt.opt_val != 0) | ||
2278 | return -EINVAL; | ||
2279 | |||
2280 | tp->rx_opt.sack_ok |= TCP_SACK_SEEN; | ||
2281 | if (sysctl_tcp_fack) | ||
2282 | tcp_enable_fack(tp); | ||
2283 | break; | ||
2284 | case TCPOPT_TIMESTAMP: | ||
2285 | if (opt.opt_val != 0) | ||
2286 | return -EINVAL; | ||
2287 | |||
2288 | tp->rx_opt.tstamp_ok = 1; | ||
2289 | break; | ||
2290 | } | ||
2291 | } | ||
2292 | |||
2293 | return 0; | ||
2294 | } | ||
2295 | |||
2128 | /* | 2296 | /* |
2129 | * Socket option code for TCP. | 2297 | * Socket option code for TCP. |
2130 | */ | 2298 | */ |
@@ -2295,6 +2463,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2295 | err = -EINVAL; | 2463 | err = -EINVAL; |
2296 | else | 2464 | else |
2297 | tp->thin_dupack = val; | 2465 | tp->thin_dupack = val; |
2466 | if (tp->thin_dupack) | ||
2467 | tcp_disable_early_retrans(tp); | ||
2468 | break; | ||
2469 | |||
2470 | case TCP_REPAIR: | ||
2471 | if (!tcp_can_repair_sock(sk)) | ||
2472 | err = -EPERM; | ||
2473 | else if (val == 1) { | ||
2474 | tp->repair = 1; | ||
2475 | sk->sk_reuse = SK_FORCE_REUSE; | ||
2476 | tp->repair_queue = TCP_NO_QUEUE; | ||
2477 | } else if (val == 0) { | ||
2478 | tp->repair = 0; | ||
2479 | sk->sk_reuse = SK_NO_REUSE; | ||
2480 | tcp_send_window_probe(sk); | ||
2481 | } else | ||
2482 | err = -EINVAL; | ||
2483 | |||
2484 | break; | ||
2485 | |||
2486 | case TCP_REPAIR_QUEUE: | ||
2487 | if (!tp->repair) | ||
2488 | err = -EPERM; | ||
2489 | else if (val < TCP_QUEUES_NR) | ||
2490 | tp->repair_queue = val; | ||
2491 | else | ||
2492 | err = -EINVAL; | ||
2493 | break; | ||
2494 | |||
2495 | case TCP_QUEUE_SEQ: | ||
2496 | if (sk->sk_state != TCP_CLOSE) | ||
2497 | err = -EPERM; | ||
2498 | else if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2499 | tp->write_seq = val; | ||
2500 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2501 | tp->rcv_nxt = val; | ||
2502 | else | ||
2503 | err = -EINVAL; | ||
2504 | break; | ||
2505 | |||
2506 | case TCP_REPAIR_OPTIONS: | ||
2507 | if (!tp->repair) | ||
2508 | err = -EINVAL; | ||
2509 | else if (sk->sk_state == TCP_ESTABLISHED) | ||
2510 | err = tcp_repair_options_est(tp, | ||
2511 | (struct tcp_repair_opt __user *)optval, | ||
2512 | optlen); | ||
2513 | else | ||
2514 | err = -EPERM; | ||
2298 | break; | 2515 | break; |
2299 | 2516 | ||
2300 | case TCP_CORK: | 2517 | case TCP_CORK: |
@@ -2530,6 +2747,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2530 | val = tp->mss_cache; | 2747 | val = tp->mss_cache; |
2531 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) | 2748 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
2532 | val = tp->rx_opt.user_mss; | 2749 | val = tp->rx_opt.user_mss; |
2750 | if (tp->repair) | ||
2751 | val = tp->rx_opt.mss_clamp; | ||
2533 | break; | 2752 | break; |
2534 | case TCP_NODELAY: | 2753 | case TCP_NODELAY: |
2535 | val = !!(tp->nonagle&TCP_NAGLE_OFF); | 2754 | val = !!(tp->nonagle&TCP_NAGLE_OFF); |
@@ -2632,6 +2851,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2632 | val = tp->thin_dupack; | 2851 | val = tp->thin_dupack; |
2633 | break; | 2852 | break; |
2634 | 2853 | ||
2854 | case TCP_REPAIR: | ||
2855 | val = tp->repair; | ||
2856 | break; | ||
2857 | |||
2858 | case TCP_REPAIR_QUEUE: | ||
2859 | if (tp->repair) | ||
2860 | val = tp->repair_queue; | ||
2861 | else | ||
2862 | return -EINVAL; | ||
2863 | break; | ||
2864 | |||
2865 | case TCP_QUEUE_SEQ: | ||
2866 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2867 | val = tp->write_seq; | ||
2868 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2869 | val = tp->rcv_nxt; | ||
2870 | else | ||
2871 | return -EINVAL; | ||
2872 | break; | ||
2873 | |||
2635 | case TCP_USER_TIMEOUT: | 2874 | case TCP_USER_TIMEOUT: |
2636 | val = jiffies_to_msecs(icsk->icsk_user_timeout); | 2875 | val = jiffies_to_msecs(icsk->icsk_user_timeout); |
2637 | break; | 2876 | break; |
@@ -2675,7 +2914,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
2675 | { | 2914 | { |
2676 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2915 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2677 | struct tcphdr *th; | 2916 | struct tcphdr *th; |
2678 | unsigned thlen; | 2917 | unsigned int thlen; |
2679 | unsigned int seq; | 2918 | unsigned int seq; |
2680 | __be32 delta; | 2919 | __be32 delta; |
2681 | unsigned int oldlen; | 2920 | unsigned int oldlen; |
@@ -3033,9 +3272,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3033 | struct scatterlist sg; | 3272 | struct scatterlist sg; |
3034 | const struct tcphdr *tp = tcp_hdr(skb); | 3273 | const struct tcphdr *tp = tcp_hdr(skb); |
3035 | struct hash_desc *desc = &hp->md5_desc; | 3274 | struct hash_desc *desc = &hp->md5_desc; |
3036 | unsigned i; | 3275 | unsigned int i; |
3037 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | 3276 | const unsigned int head_data_len = skb_headlen(skb) > header_len ? |
3038 | skb_headlen(skb) - header_len : 0; | 3277 | skb_headlen(skb) - header_len : 0; |
3039 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3278 | const struct skb_shared_info *shi = skb_shinfo(skb); |
3040 | struct sk_buff *frag_iter; | 3279 | struct sk_buff *frag_iter; |
3041 | 3280 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 257b61789eeb..eb97787be757 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly; | |||
99 | 99 | ||
100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
101 | int sysctl_tcp_abc __read_mostly; | 101 | int sysctl_tcp_abc __read_mostly; |
102 | int sysctl_tcp_early_retrans __read_mostly = 2; | ||
102 | 103 | ||
103 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 104 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
104 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 105 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -175,7 +176,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) | |||
175 | static void tcp_incr_quickack(struct sock *sk) | 176 | static void tcp_incr_quickack(struct sock *sk) |
176 | { | 177 | { |
177 | struct inet_connection_sock *icsk = inet_csk(sk); | 178 | struct inet_connection_sock *icsk = inet_csk(sk); |
178 | unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); | 179 | unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); |
179 | 180 | ||
180 | if (quickacks == 0) | 181 | if (quickacks == 0) |
181 | quickacks = 2; | 182 | quickacks = 2; |
@@ -906,6 +907,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
906 | if (dst_metric(dst, RTAX_REORDERING) && | 907 | if (dst_metric(dst, RTAX_REORDERING) && |
907 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 908 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
908 | tcp_disable_fack(tp); | 909 | tcp_disable_fack(tp); |
910 | tcp_disable_early_retrans(tp); | ||
909 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 911 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
910 | } | 912 | } |
911 | 913 | ||
@@ -937,7 +939,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
937 | tcp_set_rto(sk); | 939 | tcp_set_rto(sk); |
938 | reset: | 940 | reset: |
939 | if (tp->srtt == 0) { | 941 | if (tp->srtt == 0) { |
940 | /* RFC2988bis: We've failed to get a valid RTT sample from | 942 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from |
941 | * 3WHS. This is most likely due to retransmission, | 943 | * 3WHS. This is most likely due to retransmission, |
942 | * including spurious one. Reset the RTO back to 3secs | 944 | * including spurious one. Reset the RTO back to 3secs |
943 | * from the more aggressive 1sec to avoid more spurious | 945 | * from the more aggressive 1sec to avoid more spurious |
@@ -947,7 +949,7 @@ reset: | |||
947 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | 949 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
948 | } | 950 | } |
949 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | 951 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
950 | * retransmitted. In light of RFC2988bis' more aggressive 1sec | 952 | * retransmitted. In light of RFC6298 more aggressive 1sec |
951 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | 953 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK |
952 | * retransmission has occurred. | 954 | * retransmission has occurred. |
953 | */ | 955 | */ |
@@ -979,15 +981,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
979 | 981 | ||
980 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 982 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
981 | #if FASTRETRANS_DEBUG > 1 | 983 | #if FASTRETRANS_DEBUG > 1 |
982 | printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", | 984 | pr_debug("Disorder%d %d %u f%u s%u rr%d\n", |
983 | tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, | 985 | tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, |
984 | tp->reordering, | 986 | tp->reordering, |
985 | tp->fackets_out, | 987 | tp->fackets_out, |
986 | tp->sacked_out, | 988 | tp->sacked_out, |
987 | tp->undo_marker ? tp->undo_retrans : 0); | 989 | tp->undo_marker ? tp->undo_retrans : 0); |
988 | #endif | 990 | #endif |
989 | tcp_disable_fack(tp); | 991 | tcp_disable_fack(tp); |
990 | } | 992 | } |
993 | |||
994 | if (metric > 0) | ||
995 | tcp_disable_early_retrans(tp); | ||
991 | } | 996 | } |
992 | 997 | ||
993 | /* This must be called before lost_out is incremented */ | 998 | /* This must be called before lost_out is incremented */ |
@@ -2339,6 +2344,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) | |||
2339 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; | 2344 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; |
2340 | } | 2345 | } |
2341 | 2346 | ||
2347 | static bool tcp_pause_early_retransmit(struct sock *sk, int flag) | ||
2348 | { | ||
2349 | struct tcp_sock *tp = tcp_sk(sk); | ||
2350 | unsigned long delay; | ||
2351 | |||
2352 | /* Delay early retransmit and entering fast recovery for | ||
2353 | * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples | ||
2354 | * available, or RTO is scheduled to fire first. | ||
2355 | */ | ||
2356 | if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) | ||
2357 | return false; | ||
2358 | |||
2359 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); | ||
2360 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) | ||
2361 | return false; | ||
2362 | |||
2363 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); | ||
2364 | tp->early_retrans_delayed = 1; | ||
2365 | return true; | ||
2366 | } | ||
2367 | |||
2342 | static inline int tcp_skb_timedout(const struct sock *sk, | 2368 | static inline int tcp_skb_timedout(const struct sock *sk, |
2343 | const struct sk_buff *skb) | 2369 | const struct sk_buff *skb) |
2344 | { | 2370 | { |
@@ -2446,7 +2472,7 @@ static inline int tcp_head_timedout(const struct sock *sk) | |||
2446 | * Main question: may we further continue forward transmission | 2472 | * Main question: may we further continue forward transmission |
2447 | * with the same cwnd? | 2473 | * with the same cwnd? |
2448 | */ | 2474 | */ |
2449 | static int tcp_time_to_recover(struct sock *sk) | 2475 | static int tcp_time_to_recover(struct sock *sk, int flag) |
2450 | { | 2476 | { |
2451 | struct tcp_sock *tp = tcp_sk(sk); | 2477 | struct tcp_sock *tp = tcp_sk(sk); |
2452 | __u32 packets_out; | 2478 | __u32 packets_out; |
@@ -2492,6 +2518,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2492 | tcp_is_sack(tp) && !tcp_send_head(sk)) | 2518 | tcp_is_sack(tp) && !tcp_send_head(sk)) |
2493 | return 1; | 2519 | return 1; |
2494 | 2520 | ||
2521 | /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious | ||
2522 | * retransmissions due to small network reorderings, we implement | ||
2523 | * Mitigation A.3 in the RFC and delay the retransmission for a short | ||
2524 | * interval if appropriate. | ||
2525 | */ | ||
2526 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && | ||
2527 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && | ||
2528 | !tcp_may_send_now(sk)) | ||
2529 | return !tcp_pause_early_retransmit(sk, flag); | ||
2530 | |||
2495 | return 0; | 2531 | return 0; |
2496 | } | 2532 | } |
2497 | 2533 | ||
@@ -2680,22 +2716,22 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2680 | struct inet_sock *inet = inet_sk(sk); | 2716 | struct inet_sock *inet = inet_sk(sk); |
2681 | 2717 | ||
2682 | if (sk->sk_family == AF_INET) { | 2718 | if (sk->sk_family == AF_INET) { |
2683 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", | 2719 | pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
2684 | msg, | 2720 | msg, |
2685 | &inet->inet_daddr, ntohs(inet->inet_dport), | 2721 | &inet->inet_daddr, ntohs(inet->inet_dport), |
2686 | tp->snd_cwnd, tcp_left_out(tp), | 2722 | tp->snd_cwnd, tcp_left_out(tp), |
2687 | tp->snd_ssthresh, tp->prior_ssthresh, | 2723 | tp->snd_ssthresh, tp->prior_ssthresh, |
2688 | tp->packets_out); | 2724 | tp->packets_out); |
2689 | } | 2725 | } |
2690 | #if IS_ENABLED(CONFIG_IPV6) | 2726 | #if IS_ENABLED(CONFIG_IPV6) |
2691 | else if (sk->sk_family == AF_INET6) { | 2727 | else if (sk->sk_family == AF_INET6) { |
2692 | struct ipv6_pinfo *np = inet6_sk(sk); | 2728 | struct ipv6_pinfo *np = inet6_sk(sk); |
2693 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2729 | pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
2694 | msg, | 2730 | msg, |
2695 | &np->daddr, ntohs(inet->inet_dport), | 2731 | &np->daddr, ntohs(inet->inet_dport), |
2696 | tp->snd_cwnd, tcp_left_out(tp), | 2732 | tp->snd_cwnd, tcp_left_out(tp), |
2697 | tp->snd_ssthresh, tp->prior_ssthresh, | 2733 | tp->snd_ssthresh, tp->prior_ssthresh, |
2698 | tp->packets_out); | 2734 | tp->packets_out); |
2699 | } | 2735 | } |
2700 | #endif | 2736 | #endif |
2701 | } | 2737 | } |
@@ -3025,6 +3061,38 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | |||
3025 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | 3061 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; |
3026 | } | 3062 | } |
3027 | 3063 | ||
3064 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | ||
3065 | { | ||
3066 | struct tcp_sock *tp = tcp_sk(sk); | ||
3067 | int mib_idx; | ||
3068 | |||
3069 | if (tcp_is_reno(tp)) | ||
3070 | mib_idx = LINUX_MIB_TCPRENORECOVERY; | ||
3071 | else | ||
3072 | mib_idx = LINUX_MIB_TCPSACKRECOVERY; | ||
3073 | |||
3074 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
3075 | |||
3076 | tp->high_seq = tp->snd_nxt; | ||
3077 | tp->prior_ssthresh = 0; | ||
3078 | tp->undo_marker = tp->snd_una; | ||
3079 | tp->undo_retrans = tp->retrans_out; | ||
3080 | |||
3081 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | ||
3082 | if (!ece_ack) | ||
3083 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | ||
3084 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
3085 | TCP_ECN_queue_cwr(tp); | ||
3086 | } | ||
3087 | |||
3088 | tp->bytes_acked = 0; | ||
3089 | tp->snd_cwnd_cnt = 0; | ||
3090 | tp->prior_cwnd = tp->snd_cwnd; | ||
3091 | tp->prr_delivered = 0; | ||
3092 | tp->prr_out = 0; | ||
3093 | tcp_set_ca_state(sk, TCP_CA_Recovery); | ||
3094 | } | ||
3095 | |||
3028 | /* Process an event, which can update packets-in-flight not trivially. | 3096 | /* Process an event, which can update packets-in-flight not trivially. |
3029 | * Main goal of this function is to calculate new estimate for left_out, | 3097 | * Main goal of this function is to calculate new estimate for left_out, |
3030 | * taking into account both packets sitting in receiver's buffer and | 3098 | * taking into account both packets sitting in receiver's buffer and |
@@ -3044,7 +3112,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3044 | struct tcp_sock *tp = tcp_sk(sk); | 3112 | struct tcp_sock *tp = tcp_sk(sk); |
3045 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && | 3113 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && |
3046 | (tcp_fackets_out(tp) > tp->reordering)); | 3114 | (tcp_fackets_out(tp) > tp->reordering)); |
3047 | int fast_rexmit = 0, mib_idx; | 3115 | int fast_rexmit = 0; |
3048 | 3116 | ||
3049 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) | 3117 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) |
3050 | tp->sacked_out = 0; | 3118 | tp->sacked_out = 0; |
@@ -3128,7 +3196,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3128 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) | 3196 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) |
3129 | tcp_try_undo_dsack(sk); | 3197 | tcp_try_undo_dsack(sk); |
3130 | 3198 | ||
3131 | if (!tcp_time_to_recover(sk)) { | 3199 | if (!tcp_time_to_recover(sk, flag)) { |
3132 | tcp_try_to_open(sk, flag); | 3200 | tcp_try_to_open(sk, flag); |
3133 | return; | 3201 | return; |
3134 | } | 3202 | } |
@@ -3145,32 +3213,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3145 | } | 3213 | } |
3146 | 3214 | ||
3147 | /* Otherwise enter Recovery state */ | 3215 | /* Otherwise enter Recovery state */ |
3148 | 3216 | tcp_enter_recovery(sk, (flag & FLAG_ECE)); | |
3149 | if (tcp_is_reno(tp)) | ||
3150 | mib_idx = LINUX_MIB_TCPRENORECOVERY; | ||
3151 | else | ||
3152 | mib_idx = LINUX_MIB_TCPSACKRECOVERY; | ||
3153 | |||
3154 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
3155 | |||
3156 | tp->high_seq = tp->snd_nxt; | ||
3157 | tp->prior_ssthresh = 0; | ||
3158 | tp->undo_marker = tp->snd_una; | ||
3159 | tp->undo_retrans = tp->retrans_out; | ||
3160 | |||
3161 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | ||
3162 | if (!(flag & FLAG_ECE)) | ||
3163 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | ||
3164 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
3165 | TCP_ECN_queue_cwr(tp); | ||
3166 | } | ||
3167 | |||
3168 | tp->bytes_acked = 0; | ||
3169 | tp->snd_cwnd_cnt = 0; | ||
3170 | tp->prior_cwnd = tp->snd_cwnd; | ||
3171 | tp->prr_delivered = 0; | ||
3172 | tp->prr_out = 0; | ||
3173 | tcp_set_ca_state(sk, TCP_CA_Recovery); | ||
3174 | fast_rexmit = 1; | 3217 | fast_rexmit = 1; |
3175 | } | 3218 | } |
3176 | 3219 | ||
@@ -3252,16 +3295,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
3252 | /* Restart timer after forward progress on connection. | 3295 | /* Restart timer after forward progress on connection. |
3253 | * RFC2988 recommends to restart timer to now+rto. | 3296 | * RFC2988 recommends to restart timer to now+rto. |
3254 | */ | 3297 | */ |
3255 | static void tcp_rearm_rto(struct sock *sk) | 3298 | void tcp_rearm_rto(struct sock *sk) |
3256 | { | 3299 | { |
3257 | const struct tcp_sock *tp = tcp_sk(sk); | 3300 | struct tcp_sock *tp = tcp_sk(sk); |
3258 | 3301 | ||
3259 | if (!tp->packets_out) { | 3302 | if (!tp->packets_out) { |
3260 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3303 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3261 | } else { | 3304 | } else { |
3262 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 3305 | u32 rto = inet_csk(sk)->icsk_rto; |
3263 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | 3306 | /* Offset the time elapsed after installing regular RTO */ |
3307 | if (tp->early_retrans_delayed) { | ||
3308 | struct sk_buff *skb = tcp_write_queue_head(sk); | ||
3309 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; | ||
3310 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); | ||
3311 | /* delta may not be positive if the socket is locked | ||
3312 | * when the delayed ER timer fires and is rescheduled. | ||
3313 | */ | ||
3314 | if (delta > 0) | ||
3315 | rto = delta; | ||
3316 | } | ||
3317 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, | ||
3318 | TCP_RTO_MAX); | ||
3264 | } | 3319 | } |
3320 | tp->early_retrans_delayed = 0; | ||
3321 | } | ||
3322 | |||
3323 | /* This function is called when the delayed ER timer fires. TCP enters | ||
3324 | * fast recovery and performs fast-retransmit. | ||
3325 | */ | ||
3326 | void tcp_resume_early_retransmit(struct sock *sk) | ||
3327 | { | ||
3328 | struct tcp_sock *tp = tcp_sk(sk); | ||
3329 | |||
3330 | tcp_rearm_rto(sk); | ||
3331 | |||
3332 | /* Stop if ER is disabled after the delayed ER timer is scheduled */ | ||
3333 | if (!tp->do_early_retrans) | ||
3334 | return; | ||
3335 | |||
3336 | tcp_enter_recovery(sk, false); | ||
3337 | tcp_update_scoreboard(sk, 1); | ||
3338 | tcp_xmit_retransmit_queue(sk); | ||
3265 | } | 3339 | } |
3266 | 3340 | ||
3267 | /* If we get here, the whole TSO packet has not been acked. */ | 3341 | /* If we get here, the whole TSO packet has not been acked. */ |
@@ -3437,18 +3511,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3437 | if (!tp->packets_out && tcp_is_sack(tp)) { | 3511 | if (!tp->packets_out && tcp_is_sack(tp)) { |
3438 | icsk = inet_csk(sk); | 3512 | icsk = inet_csk(sk); |
3439 | if (tp->lost_out) { | 3513 | if (tp->lost_out) { |
3440 | printk(KERN_DEBUG "Leak l=%u %d\n", | 3514 | pr_debug("Leak l=%u %d\n", |
3441 | tp->lost_out, icsk->icsk_ca_state); | 3515 | tp->lost_out, icsk->icsk_ca_state); |
3442 | tp->lost_out = 0; | 3516 | tp->lost_out = 0; |
3443 | } | 3517 | } |
3444 | if (tp->sacked_out) { | 3518 | if (tp->sacked_out) { |
3445 | printk(KERN_DEBUG "Leak s=%u %d\n", | 3519 | pr_debug("Leak s=%u %d\n", |
3446 | tp->sacked_out, icsk->icsk_ca_state); | 3520 | tp->sacked_out, icsk->icsk_ca_state); |
3447 | tp->sacked_out = 0; | 3521 | tp->sacked_out = 0; |
3448 | } | 3522 | } |
3449 | if (tp->retrans_out) { | 3523 | if (tp->retrans_out) { |
3450 | printk(KERN_DEBUG "Leak r=%u %d\n", | 3524 | pr_debug("Leak r=%u %d\n", |
3451 | tp->retrans_out, icsk->icsk_ca_state); | 3525 | tp->retrans_out, icsk->icsk_ca_state); |
3452 | tp->retrans_out = 0; | 3526 | tp->retrans_out = 0; |
3453 | } | 3527 | } |
3454 | } | 3528 | } |
@@ -3710,6 +3784,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3710 | if (after(ack, tp->snd_nxt)) | 3784 | if (after(ack, tp->snd_nxt)) |
3711 | goto invalid_ack; | 3785 | goto invalid_ack; |
3712 | 3786 | ||
3787 | if (tp->early_retrans_delayed) | ||
3788 | tcp_rearm_rto(sk); | ||
3789 | |||
3713 | if (after(ack, prior_snd_una)) | 3790 | if (after(ack, prior_snd_una)) |
3714 | flag |= FLAG_SND_UNA_ADVANCED; | 3791 | flag |= FLAG_SND_UNA_ADVANCED; |
3715 | 3792 | ||
@@ -3875,10 +3952,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
3875 | __u8 snd_wscale = *(__u8 *)ptr; | 3952 | __u8 snd_wscale = *(__u8 *)ptr; |
3876 | opt_rx->wscale_ok = 1; | 3953 | opt_rx->wscale_ok = 1; |
3877 | if (snd_wscale > 14) { | 3954 | if (snd_wscale > 14) { |
3878 | if (net_ratelimit()) | 3955 | net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", |
3879 | pr_info("%s: Illegal window scaling value %d >14 received\n", | 3956 | __func__, |
3880 | __func__, | 3957 | snd_wscale); |
3881 | snd_wscale); | ||
3882 | snd_wscale = 14; | 3958 | snd_wscale = 14; |
3883 | } | 3959 | } |
3884 | opt_rx->snd_wscale = snd_wscale; | 3960 | opt_rx->snd_wscale = snd_wscale; |
@@ -4434,7 +4510,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4434 | static int tcp_prune_ofo_queue(struct sock *sk); | 4510 | static int tcp_prune_ofo_queue(struct sock *sk); |
4435 | static int tcp_prune_queue(struct sock *sk); | 4511 | static int tcp_prune_queue(struct sock *sk); |
4436 | 4512 | ||
4437 | static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | 4513 | static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) |
4438 | { | 4514 | { |
4439 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 4515 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
4440 | !sk_rmem_schedule(sk, size)) { | 4516 | !sk_rmem_schedule(sk, size)) { |
@@ -4453,6 +4529,102 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | |||
4453 | return 0; | 4529 | return 0; |
4454 | } | 4530 | } |
4455 | 4531 | ||
4532 | /** | ||
4533 | * tcp_try_coalesce - try to merge skb to prior one | ||
4534 | * @sk: socket | ||
4535 | * @to: prior buffer | ||
4536 | * @from: buffer to add in queue | ||
4537 | * @fragstolen: pointer to boolean | ||
4538 | * | ||
4539 | * Before queueing skb @from after @to, try to merge them | ||
4540 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4541 | * Packets in ofo or receive queues can stay a long time. | ||
4542 | * Better try to coalesce them right now to avoid future collapses. | ||
4543 | * Returns true if caller should free @from instead of queueing it | ||
4544 | */ | ||
4545 | static bool tcp_try_coalesce(struct sock *sk, | ||
4546 | struct sk_buff *to, | ||
4547 | struct sk_buff *from, | ||
4548 | bool *fragstolen) | ||
4549 | { | ||
4550 | int i, delta, len = from->len; | ||
4551 | |||
4552 | *fragstolen = false; | ||
4553 | |||
4554 | if (tcp_hdr(from)->fin || skb_cloned(to)) | ||
4555 | return false; | ||
4556 | |||
4557 | if (len <= skb_tailroom(to)) { | ||
4558 | BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); | ||
4559 | goto merge; | ||
4560 | } | ||
4561 | |||
4562 | if (skb_has_frag_list(to) || skb_has_frag_list(from)) | ||
4563 | return false; | ||
4564 | |||
4565 | if (skb_headlen(from) != 0) { | ||
4566 | struct page *page; | ||
4567 | unsigned int offset; | ||
4568 | |||
4569 | if (skb_shinfo(to)->nr_frags + | ||
4570 | skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) | ||
4571 | return false; | ||
4572 | |||
4573 | if (skb_head_is_locked(from)) | ||
4574 | return false; | ||
4575 | |||
4576 | delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); | ||
4577 | |||
4578 | page = virt_to_head_page(from->head); | ||
4579 | offset = from->data - (unsigned char *)page_address(page); | ||
4580 | |||
4581 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, | ||
4582 | page, offset, skb_headlen(from)); | ||
4583 | *fragstolen = true; | ||
4584 | } else { | ||
4585 | if (skb_shinfo(to)->nr_frags + | ||
4586 | skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) | ||
4587 | return false; | ||
4588 | |||
4589 | delta = from->truesize - | ||
4590 | SKB_TRUESIZE(skb_end_pointer(from) - from->head); | ||
4591 | } | ||
4592 | |||
4593 | WARN_ON_ONCE(delta < len); | ||
4594 | |||
4595 | memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, | ||
4596 | skb_shinfo(from)->frags, | ||
4597 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); | ||
4598 | skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; | ||
4599 | |||
4600 | if (!skb_cloned(from)) | ||
4601 | skb_shinfo(from)->nr_frags = 0; | ||
4602 | |||
4603 | /* if the skb is cloned this does nothing since we set nr_frags to 0 */ | ||
4604 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) | ||
4605 | skb_frag_ref(from, i); | ||
4606 | |||
4607 | to->truesize += delta; | ||
4608 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4609 | sk_mem_charge(sk, delta); | ||
4610 | to->len += len; | ||
4611 | to->data_len += len; | ||
4612 | |||
4613 | merge: | ||
4614 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4615 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4616 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4617 | return true; | ||
4618 | } | ||
4619 | |||
4620 | static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) | ||
4621 | { | ||
4622 | if (head_stolen) | ||
4623 | kmem_cache_free(skbuff_head_cache, skb); | ||
4624 | else | ||
4625 | __kfree_skb(skb); | ||
4626 | } | ||
4627 | |||
4456 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | 4628 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) |
4457 | { | 4629 | { |
4458 | struct tcp_sock *tp = tcp_sk(sk); | 4630 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4491,23 +4663,13 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4491 | end_seq = TCP_SKB_CB(skb)->end_seq; | 4663 | end_seq = TCP_SKB_CB(skb)->end_seq; |
4492 | 4664 | ||
4493 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | 4665 | if (seq == TCP_SKB_CB(skb1)->end_seq) { |
4494 | /* Packets in ofo can stay in queue a long time. | 4666 | bool fragstolen; |
4495 | * Better try to coalesce them right now | 4667 | |
4496 | * to avoid future tcp_collapse_ofo_queue(), | 4668 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { |
4497 | * probably the most expensive function in tcp stack. | ||
4498 | */ | ||
4499 | if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { | ||
4500 | NET_INC_STATS_BH(sock_net(sk), | ||
4501 | LINUX_MIB_TCPRCVCOALESCE); | ||
4502 | BUG_ON(skb_copy_bits(skb, 0, | ||
4503 | skb_put(skb1, skb->len), | ||
4504 | skb->len)); | ||
4505 | TCP_SKB_CB(skb1)->end_seq = end_seq; | ||
4506 | TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; | ||
4507 | __kfree_skb(skb); | ||
4508 | skb = NULL; | ||
4509 | } else { | ||
4510 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | 4669 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4670 | } else { | ||
4671 | kfree_skb_partial(skb, fragstolen); | ||
4672 | skb = NULL; | ||
4511 | } | 4673 | } |
4512 | 4674 | ||
4513 | if (!tp->rx_opt.num_sacks || | 4675 | if (!tp->rx_opt.num_sacks || |
@@ -4583,12 +4745,65 @@ end: | |||
4583 | skb_set_owner_r(skb, sk); | 4745 | skb_set_owner_r(skb, sk); |
4584 | } | 4746 | } |
4585 | 4747 | ||
4748 | static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, | ||
4749 | bool *fragstolen) | ||
4750 | { | ||
4751 | int eaten; | ||
4752 | struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); | ||
4753 | |||
4754 | __skb_pull(skb, hdrlen); | ||
4755 | eaten = (tail && | ||
4756 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | ||
4757 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
4758 | if (!eaten) { | ||
4759 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4760 | skb_set_owner_r(skb, sk); | ||
4761 | } | ||
4762 | return eaten; | ||
4763 | } | ||
4764 | |||
4765 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | ||
4766 | { | ||
4767 | struct sk_buff *skb; | ||
4768 | struct tcphdr *th; | ||
4769 | bool fragstolen; | ||
4770 | |||
4771 | if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) | ||
4772 | goto err; | ||
4773 | |||
4774 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); | ||
4775 | if (!skb) | ||
4776 | goto err; | ||
4777 | |||
4778 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); | ||
4779 | skb_reset_transport_header(skb); | ||
4780 | memset(th, 0, sizeof(*th)); | ||
4781 | |||
4782 | if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) | ||
4783 | goto err_free; | ||
4784 | |||
4785 | TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; | ||
4786 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; | ||
4787 | TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; | ||
4788 | |||
4789 | if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) { | ||
4790 | WARN_ON_ONCE(fragstolen); /* should not happen */ | ||
4791 | __kfree_skb(skb); | ||
4792 | } | ||
4793 | return size; | ||
4794 | |||
4795 | err_free: | ||
4796 | kfree_skb(skb); | ||
4797 | err: | ||
4798 | return -ENOMEM; | ||
4799 | } | ||
4586 | 4800 | ||
4587 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 4801 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
4588 | { | 4802 | { |
4589 | const struct tcphdr *th = tcp_hdr(skb); | 4803 | const struct tcphdr *th = tcp_hdr(skb); |
4590 | struct tcp_sock *tp = tcp_sk(sk); | 4804 | struct tcp_sock *tp = tcp_sk(sk); |
4591 | int eaten = -1; | 4805 | int eaten = -1; |
4806 | bool fragstolen = false; | ||
4592 | 4807 | ||
4593 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4808 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
4594 | goto drop; | 4809 | goto drop; |
@@ -4633,8 +4848,7 @@ queue_and_out: | |||
4633 | tcp_try_rmem_schedule(sk, skb->truesize)) | 4848 | tcp_try_rmem_schedule(sk, skb->truesize)) |
4634 | goto drop; | 4849 | goto drop; |
4635 | 4850 | ||
4636 | skb_set_owner_r(skb, sk); | 4851 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
4637 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4638 | } | 4852 | } |
4639 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4853 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4640 | if (skb->len) | 4854 | if (skb->len) |
@@ -4658,7 +4872,7 @@ queue_and_out: | |||
4658 | tcp_fast_path_check(sk); | 4872 | tcp_fast_path_check(sk); |
4659 | 4873 | ||
4660 | if (eaten > 0) | 4874 | if (eaten > 0) |
4661 | __kfree_skb(skb); | 4875 | kfree_skb_partial(skb, fragstolen); |
4662 | else if (!sock_flag(sk, SOCK_DEAD)) | 4876 | else if (!sock_flag(sk, SOCK_DEAD)) |
4663 | sk->sk_data_ready(sk, 0); | 4877 | sk->sk_data_ready(sk, 0); |
4664 | return; | 4878 | return; |
@@ -5437,6 +5651,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5437 | } else { | 5651 | } else { |
5438 | int eaten = 0; | 5652 | int eaten = 0; |
5439 | int copied_early = 0; | 5653 | int copied_early = 0; |
5654 | bool fragstolen = false; | ||
5440 | 5655 | ||
5441 | if (tp->copied_seq == tp->rcv_nxt && | 5656 | if (tp->copied_seq == tp->rcv_nxt && |
5442 | len - tcp_header_len <= tp->ucopy.len) { | 5657 | len - tcp_header_len <= tp->ucopy.len) { |
@@ -5494,10 +5709,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5494 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); | 5709 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); |
5495 | 5710 | ||
5496 | /* Bulk data transfer: receiver */ | 5711 | /* Bulk data transfer: receiver */ |
5497 | __skb_pull(skb, tcp_header_len); | 5712 | eaten = tcp_queue_rcv(sk, skb, tcp_header_len, |
5498 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 5713 | &fragstolen); |
5499 | skb_set_owner_r(skb, sk); | ||
5500 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
5501 | } | 5714 | } |
5502 | 5715 | ||
5503 | tcp_event_data_recv(sk, skb); | 5716 | tcp_event_data_recv(sk, skb); |
@@ -5519,7 +5732,7 @@ no_ack: | |||
5519 | else | 5732 | else |
5520 | #endif | 5733 | #endif |
5521 | if (eaten) | 5734 | if (eaten) |
5522 | __kfree_skb(skb); | 5735 | kfree_skb_partial(skb, fragstolen); |
5523 | else | 5736 | else |
5524 | sk->sk_data_ready(sk, 0); | 5737 | sk->sk_data_ready(sk, 0); |
5525 | return 0; | 5738 | return 0; |
@@ -5563,6 +5776,44 @@ discard: | |||
5563 | } | 5776 | } |
5564 | EXPORT_SYMBOL(tcp_rcv_established); | 5777 | EXPORT_SYMBOL(tcp_rcv_established); |
5565 | 5778 | ||
5779 | void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | ||
5780 | { | ||
5781 | struct tcp_sock *tp = tcp_sk(sk); | ||
5782 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
5783 | |||
5784 | tcp_set_state(sk, TCP_ESTABLISHED); | ||
5785 | |||
5786 | if (skb != NULL) | ||
5787 | security_inet_conn_established(sk, skb); | ||
5788 | |||
5789 | /* Make sure socket is routed, for correct metrics. */ | ||
5790 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5791 | |||
5792 | tcp_init_metrics(sk); | ||
5793 | |||
5794 | tcp_init_congestion_control(sk); | ||
5795 | |||
5796 | /* Prevent spurious tcp_cwnd_restart() on first data | ||
5797 | * packet. | ||
5798 | */ | ||
5799 | tp->lsndtime = tcp_time_stamp; | ||
5800 | |||
5801 | tcp_init_buffer_space(sk); | ||
5802 | |||
5803 | if (sock_flag(sk, SOCK_KEEPOPEN)) | ||
5804 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); | ||
5805 | |||
5806 | if (!tp->rx_opt.snd_wscale) | ||
5807 | __tcp_fast_path_on(tp, tp->snd_wnd); | ||
5808 | else | ||
5809 | tp->pred_flags = 0; | ||
5810 | |||
5811 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
5812 | sk->sk_state_change(sk); | ||
5813 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); | ||
5814 | } | ||
5815 | } | ||
5816 | |||
5566 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5817 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5567 | const struct tcphdr *th, unsigned int len) | 5818 | const struct tcphdr *th, unsigned int len) |
5568 | { | 5819 | { |
@@ -5695,36 +5946,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5695 | } | 5946 | } |
5696 | 5947 | ||
5697 | smp_mb(); | 5948 | smp_mb(); |
5698 | tcp_set_state(sk, TCP_ESTABLISHED); | ||
5699 | 5949 | ||
5700 | security_inet_conn_established(sk, skb); | 5950 | tcp_finish_connect(sk, skb); |
5701 | |||
5702 | /* Make sure socket is routed, for correct metrics. */ | ||
5703 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5704 | |||
5705 | tcp_init_metrics(sk); | ||
5706 | |||
5707 | tcp_init_congestion_control(sk); | ||
5708 | |||
5709 | /* Prevent spurious tcp_cwnd_restart() on first data | ||
5710 | * packet. | ||
5711 | */ | ||
5712 | tp->lsndtime = tcp_time_stamp; | ||
5713 | |||
5714 | tcp_init_buffer_space(sk); | ||
5715 | |||
5716 | if (sock_flag(sk, SOCK_KEEPOPEN)) | ||
5717 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); | ||
5718 | |||
5719 | if (!tp->rx_opt.snd_wscale) | ||
5720 | __tcp_fast_path_on(tp, tp->snd_wnd); | ||
5721 | else | ||
5722 | tp->pred_flags = 0; | ||
5723 | |||
5724 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
5725 | sk->sk_state_change(sk); | ||
5726 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); | ||
5727 | } | ||
5728 | 5951 | ||
5729 | if (sk->sk_write_pending || | 5952 | if (sk->sk_write_pending || |
5730 | icsk->icsk_accept_queue.rskq_defer_accept || | 5953 | icsk->icsk_accept_queue.rskq_defer_accept || |
@@ -5738,8 +5961,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5738 | */ | 5961 | */ |
5739 | inet_csk_schedule_ack(sk); | 5962 | inet_csk_schedule_ack(sk); |
5740 | icsk->icsk_ack.lrcvtime = tcp_time_stamp; | 5963 | icsk->icsk_ack.lrcvtime = tcp_time_stamp; |
5741 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
5742 | tcp_incr_quickack(sk); | ||
5743 | tcp_enter_quickack_mode(sk); | 5964 | tcp_enter_quickack_mode(sk); |
5744 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 5965 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
5745 | TCP_DELACK_MAX, TCP_RTO_MAX); | 5966 | TCP_DELACK_MAX, TCP_RTO_MAX); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0cb86ceb652f..2e76ffb66d7c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
138 | } | 138 | } |
139 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); | 139 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); |
140 | 140 | ||
141 | static int tcp_repair_connect(struct sock *sk) | ||
142 | { | ||
143 | tcp_connect_init(sk); | ||
144 | tcp_finish_connect(sk, NULL); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
141 | /* This will initiate an outgoing connection. */ | 149 | /* This will initiate an outgoing connection. */ |
142 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 150 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
143 | { | 151 | { |
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
196 | /* Reset inherited state */ | 204 | /* Reset inherited state */ |
197 | tp->rx_opt.ts_recent = 0; | 205 | tp->rx_opt.ts_recent = 0; |
198 | tp->rx_opt.ts_recent_stamp = 0; | 206 | tp->rx_opt.ts_recent_stamp = 0; |
199 | tp->write_seq = 0; | 207 | if (likely(!tp->repair)) |
208 | tp->write_seq = 0; | ||
200 | } | 209 | } |
201 | 210 | ||
202 | if (tcp_death_row.sysctl_tw_recycle && | 211 | if (tcp_death_row.sysctl_tw_recycle && |
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
247 | sk->sk_gso_type = SKB_GSO_TCPV4; | 256 | sk->sk_gso_type = SKB_GSO_TCPV4; |
248 | sk_setup_caps(sk, &rt->dst); | 257 | sk_setup_caps(sk, &rt->dst); |
249 | 258 | ||
250 | if (!tp->write_seq) | 259 | if (!tp->write_seq && likely(!tp->repair)) |
251 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, | 260 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
252 | inet->inet_daddr, | 261 | inet->inet_daddr, |
253 | inet->inet_sport, | 262 | inet->inet_sport, |
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
255 | 264 | ||
256 | inet->inet_id = tp->write_seq ^ jiffies; | 265 | inet->inet_id = tp->write_seq ^ jiffies; |
257 | 266 | ||
258 | err = tcp_connect(sk); | 267 | if (likely(!tp->repair)) |
268 | err = tcp_connect(sk); | ||
269 | else | ||
270 | err = tcp_repair_connect(sk); | ||
271 | |||
259 | rt = NULL; | 272 | rt = NULL; |
260 | if (err) | 273 | if (err) |
261 | goto failure; | 274 | goto failure; |
@@ -1226,12 +1239,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | |||
1226 | NULL, NULL, skb); | 1239 | NULL, NULL, skb); |
1227 | 1240 | ||
1228 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { | 1241 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { |
1229 | if (net_ratelimit()) { | 1242 | net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", |
1230 | pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", | 1243 | &iph->saddr, ntohs(th->source), |
1231 | &iph->saddr, ntohs(th->source), | 1244 | &iph->daddr, ntohs(th->dest), |
1232 | &iph->daddr, ntohs(th->dest), | 1245 | genhash ? " tcp_v4_calc_md5_hash failed" |
1233 | genhash ? " tcp_v4_calc_md5_hash failed" : ""); | 1246 | : ""); |
1234 | } | ||
1235 | return 1; | 1247 | return 1; |
1236 | } | 1248 | } |
1237 | return 0; | 1249 | return 0; |
@@ -1355,7 +1367,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1355 | goto drop_and_free; | 1367 | goto drop_and_free; |
1356 | 1368 | ||
1357 | if (!want_cookie || tmp_opt.tstamp_ok) | 1369 | if (!want_cookie || tmp_opt.tstamp_ok) |
1358 | TCP_ECN_create_request(req, tcp_hdr(skb)); | 1370 | TCP_ECN_create_request(req, skb); |
1359 | 1371 | ||
1360 | if (want_cookie) { | 1372 | if (want_cookie) { |
1361 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1373 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
@@ -1739,7 +1751,8 @@ process: | |||
1739 | if (!tcp_prequeue(sk, skb)) | 1751 | if (!tcp_prequeue(sk, skb)) |
1740 | ret = tcp_v4_do_rcv(sk, skb); | 1752 | ret = tcp_v4_do_rcv(sk, skb); |
1741 | } | 1753 | } |
1742 | } else if (unlikely(sk_add_backlog(sk, skb))) { | 1754 | } else if (unlikely(sk_add_backlog(sk, skb, |
1755 | sk->sk_rcvbuf + sk->sk_sndbuf))) { | ||
1743 | bh_unlock_sock(sk); | 1756 | bh_unlock_sock(sk); |
1744 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | 1757 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); |
1745 | goto discard_and_relse; | 1758 | goto discard_and_relse; |
@@ -1875,64 +1888,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { | |||
1875 | static int tcp_v4_init_sock(struct sock *sk) | 1888 | static int tcp_v4_init_sock(struct sock *sk) |
1876 | { | 1889 | { |
1877 | struct inet_connection_sock *icsk = inet_csk(sk); | 1890 | struct inet_connection_sock *icsk = inet_csk(sk); |
1878 | struct tcp_sock *tp = tcp_sk(sk); | ||
1879 | |||
1880 | skb_queue_head_init(&tp->out_of_order_queue); | ||
1881 | tcp_init_xmit_timers(sk); | ||
1882 | tcp_prequeue_init(tp); | ||
1883 | |||
1884 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
1885 | tp->mdev = TCP_TIMEOUT_INIT; | ||
1886 | 1891 | ||
1887 | /* So many TCP implementations out there (incorrectly) count the | 1892 | tcp_init_sock(sk); |
1888 | * initial SYN frame in their delayed-ACK and congestion control | ||
1889 | * algorithms that we must have the following bandaid to talk | ||
1890 | * efficiently to them. -DaveM | ||
1891 | */ | ||
1892 | tp->snd_cwnd = TCP_INIT_CWND; | ||
1893 | |||
1894 | /* See draft-stevens-tcpca-spec-01 for discussion of the | ||
1895 | * initialization of these values. | ||
1896 | */ | ||
1897 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
1898 | tp->snd_cwnd_clamp = ~0; | ||
1899 | tp->mss_cache = TCP_MSS_DEFAULT; | ||
1900 | |||
1901 | tp->reordering = sysctl_tcp_reordering; | ||
1902 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
1903 | |||
1904 | sk->sk_state = TCP_CLOSE; | ||
1905 | |||
1906 | sk->sk_write_space = sk_stream_write_space; | ||
1907 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
1908 | 1893 | ||
1909 | icsk->icsk_af_ops = &ipv4_specific; | 1894 | icsk->icsk_af_ops = &ipv4_specific; |
1910 | icsk->icsk_sync_mss = tcp_sync_mss; | 1895 | |
1911 | #ifdef CONFIG_TCP_MD5SIG | 1896 | #ifdef CONFIG_TCP_MD5SIG |
1912 | tp->af_specific = &tcp_sock_ipv4_specific; | 1897 | tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; |
1913 | #endif | 1898 | #endif |
1914 | 1899 | ||
1915 | /* TCP Cookie Transactions */ | ||
1916 | if (sysctl_tcp_cookie_size > 0) { | ||
1917 | /* Default, cookies without s_data_payload. */ | ||
1918 | tp->cookie_values = | ||
1919 | kzalloc(sizeof(*tp->cookie_values), | ||
1920 | sk->sk_allocation); | ||
1921 | if (tp->cookie_values != NULL) | ||
1922 | kref_init(&tp->cookie_values->kref); | ||
1923 | } | ||
1924 | /* Presumed zeroed, in order of appearance: | ||
1925 | * cookie_in_always, cookie_out_never, | ||
1926 | * s_data_constant, s_data_in, s_data_out | ||
1927 | */ | ||
1928 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | ||
1929 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | ||
1930 | |||
1931 | local_bh_disable(); | ||
1932 | sock_update_memcg(sk); | ||
1933 | sk_sockets_allocated_inc(sk); | ||
1934 | local_bh_enable(); | ||
1935 | |||
1936 | return 0; | 1900 | return 0; |
1937 | } | 1901 | } |
1938 | 1902 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3cabafb5cdd1..6f6a91832826 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -482,6 +482,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
482 | newtp->sacked_out = 0; | 482 | newtp->sacked_out = 0; |
483 | newtp->fackets_out = 0; | 483 | newtp->fackets_out = 0; |
484 | newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 484 | newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
485 | tcp_enable_early_retrans(newtp); | ||
485 | 486 | ||
486 | /* So many TCP implementations out there (incorrectly) count the | 487 | /* So many TCP implementations out there (incorrectly) count the |
487 | * initial SYN frame in their delayed-ACK and congestion control | 488 | * initial SYN frame in their delayed-ACK and congestion control |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7ac6423117ad..1a630825c45b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -34,6 +34,8 @@ | |||
34 | * | 34 | * |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #define pr_fmt(fmt) "TCP: " fmt | ||
38 | |||
37 | #include <net/tcp.h> | 39 | #include <net/tcp.h> |
38 | 40 | ||
39 | #include <linux/compiler.h> | 41 | #include <linux/compiler.h> |
@@ -78,9 +80,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | |||
78 | tp->frto_counter = 3; | 80 | tp->frto_counter = 3; |
79 | 81 | ||
80 | tp->packets_out += tcp_skb_pcount(skb); | 82 | tp->packets_out += tcp_skb_pcount(skb); |
81 | if (!prior_packets) | 83 | if (!prior_packets || tp->early_retrans_delayed) |
82 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 84 | tcp_rearm_rto(sk); |
83 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
84 | } | 85 | } |
85 | 86 | ||
86 | /* SND.NXT, if window was not shrunk. | 87 | /* SND.NXT, if window was not shrunk. |
@@ -563,13 +564,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
563 | /* Compute TCP options for SYN packets. This is not the final | 564 | /* Compute TCP options for SYN packets. This is not the final |
564 | * network wire format yet. | 565 | * network wire format yet. |
565 | */ | 566 | */ |
566 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | 567 | static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
567 | struct tcp_out_options *opts, | 568 | struct tcp_out_options *opts, |
568 | struct tcp_md5sig_key **md5) | 569 | struct tcp_md5sig_key **md5) |
569 | { | 570 | { |
570 | struct tcp_sock *tp = tcp_sk(sk); | 571 | struct tcp_sock *tp = tcp_sk(sk); |
571 | struct tcp_cookie_values *cvp = tp->cookie_values; | 572 | struct tcp_cookie_values *cvp = tp->cookie_values; |
572 | unsigned remaining = MAX_TCP_OPTION_SPACE; | 573 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
573 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | 574 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? |
574 | tcp_cookie_size_check(cvp->cookie_desired) : | 575 | tcp_cookie_size_check(cvp->cookie_desired) : |
575 | 0; | 576 | 0; |
@@ -663,15 +664,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
663 | } | 664 | } |
664 | 665 | ||
665 | /* Set up TCP options for SYN-ACKs. */ | 666 | /* Set up TCP options for SYN-ACKs. */ |
666 | static unsigned tcp_synack_options(struct sock *sk, | 667 | static unsigned int tcp_synack_options(struct sock *sk, |
667 | struct request_sock *req, | 668 | struct request_sock *req, |
668 | unsigned mss, struct sk_buff *skb, | 669 | unsigned int mss, struct sk_buff *skb, |
669 | struct tcp_out_options *opts, | 670 | struct tcp_out_options *opts, |
670 | struct tcp_md5sig_key **md5, | 671 | struct tcp_md5sig_key **md5, |
671 | struct tcp_extend_values *xvp) | 672 | struct tcp_extend_values *xvp) |
672 | { | 673 | { |
673 | struct inet_request_sock *ireq = inet_rsk(req); | 674 | struct inet_request_sock *ireq = inet_rsk(req); |
674 | unsigned remaining = MAX_TCP_OPTION_SPACE; | 675 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
675 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | 676 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? |
676 | xvp->cookie_plus : | 677 | xvp->cookie_plus : |
677 | 0; | 678 | 0; |
@@ -742,13 +743,13 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
742 | /* Compute TCP options for ESTABLISHED sockets. This is not the | 743 | /* Compute TCP options for ESTABLISHED sockets. This is not the |
743 | * final wire format yet. | 744 | * final wire format yet. |
744 | */ | 745 | */ |
745 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | 746 | static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, |
746 | struct tcp_out_options *opts, | 747 | struct tcp_out_options *opts, |
747 | struct tcp_md5sig_key **md5) | 748 | struct tcp_md5sig_key **md5) |
748 | { | 749 | { |
749 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | 750 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; |
750 | struct tcp_sock *tp = tcp_sk(sk); | 751 | struct tcp_sock *tp = tcp_sk(sk); |
751 | unsigned size = 0; | 752 | unsigned int size = 0; |
752 | unsigned int eff_sacks; | 753 | unsigned int eff_sacks; |
753 | 754 | ||
754 | #ifdef CONFIG_TCP_MD5SIG | 755 | #ifdef CONFIG_TCP_MD5SIG |
@@ -770,9 +771,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
770 | 771 | ||
771 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | 772 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; |
772 | if (unlikely(eff_sacks)) { | 773 | if (unlikely(eff_sacks)) { |
773 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | 774 | const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; |
774 | opts->num_sack_blocks = | 775 | opts->num_sack_blocks = |
775 | min_t(unsigned, eff_sacks, | 776 | min_t(unsigned int, eff_sacks, |
776 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | 777 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / |
777 | TCPOLEN_SACK_PERBLOCK); | 778 | TCPOLEN_SACK_PERBLOCK); |
778 | size += TCPOLEN_SACK_BASE_ALIGNED + | 779 | size += TCPOLEN_SACK_BASE_ALIGNED + |
@@ -801,7 +802,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
801 | struct tcp_sock *tp; | 802 | struct tcp_sock *tp; |
802 | struct tcp_skb_cb *tcb; | 803 | struct tcp_skb_cb *tcb; |
803 | struct tcp_out_options opts; | 804 | struct tcp_out_options opts; |
804 | unsigned tcp_options_size, tcp_header_size; | 805 | unsigned int tcp_options_size, tcp_header_size; |
805 | struct tcp_md5sig_key *md5; | 806 | struct tcp_md5sig_key *md5; |
806 | struct tcphdr *th; | 807 | struct tcphdr *th; |
807 | int err; | 808 | int err; |
@@ -1150,7 +1151,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
1150 | } | 1151 | } |
1151 | 1152 | ||
1152 | /* Calculate MSS. Not accounting for SACKs here. */ | 1153 | /* Calculate MSS. Not accounting for SACKs here. */ |
1153 | int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | 1154 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) |
1154 | { | 1155 | { |
1155 | const struct tcp_sock *tp = tcp_sk(sk); | 1156 | const struct tcp_sock *tp = tcp_sk(sk); |
1156 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1157 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1161,6 +1162,14 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | |||
1161 | */ | 1162 | */ |
1162 | mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); | 1163 | mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); |
1163 | 1164 | ||
1165 | /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ | ||
1166 | if (icsk->icsk_af_ops->net_frag_header_len) { | ||
1167 | const struct dst_entry *dst = __sk_dst_get(sk); | ||
1168 | |||
1169 | if (dst && dst_allfrag(dst)) | ||
1170 | mss_now -= icsk->icsk_af_ops->net_frag_header_len; | ||
1171 | } | ||
1172 | |||
1164 | /* Clamp it (mss_clamp does not include tcp options) */ | 1173 | /* Clamp it (mss_clamp does not include tcp options) */ |
1165 | if (mss_now > tp->rx_opt.mss_clamp) | 1174 | if (mss_now > tp->rx_opt.mss_clamp) |
1166 | mss_now = tp->rx_opt.mss_clamp; | 1175 | mss_now = tp->rx_opt.mss_clamp; |
@@ -1179,7 +1188,7 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | |||
1179 | } | 1188 | } |
1180 | 1189 | ||
1181 | /* Inverse of above */ | 1190 | /* Inverse of above */ |
1182 | int tcp_mss_to_mtu(const struct sock *sk, int mss) | 1191 | int tcp_mss_to_mtu(struct sock *sk, int mss) |
1183 | { | 1192 | { |
1184 | const struct tcp_sock *tp = tcp_sk(sk); | 1193 | const struct tcp_sock *tp = tcp_sk(sk); |
1185 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1194 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1190,6 +1199,13 @@ int tcp_mss_to_mtu(const struct sock *sk, int mss) | |||
1190 | icsk->icsk_ext_hdr_len + | 1199 | icsk->icsk_ext_hdr_len + |
1191 | icsk->icsk_af_ops->net_header_len; | 1200 | icsk->icsk_af_ops->net_header_len; |
1192 | 1201 | ||
1202 | /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ | ||
1203 | if (icsk->icsk_af_ops->net_frag_header_len) { | ||
1204 | const struct dst_entry *dst = __sk_dst_get(sk); | ||
1205 | |||
1206 | if (dst && dst_allfrag(dst)) | ||
1207 | mtu += icsk->icsk_af_ops->net_frag_header_len; | ||
1208 | } | ||
1193 | return mtu; | 1209 | return mtu; |
1194 | } | 1210 | } |
1195 | 1211 | ||
@@ -1259,7 +1275,7 @@ unsigned int tcp_current_mss(struct sock *sk) | |||
1259 | const struct tcp_sock *tp = tcp_sk(sk); | 1275 | const struct tcp_sock *tp = tcp_sk(sk); |
1260 | const struct dst_entry *dst = __sk_dst_get(sk); | 1276 | const struct dst_entry *dst = __sk_dst_get(sk); |
1261 | u32 mss_now; | 1277 | u32 mss_now; |
1262 | unsigned header_len; | 1278 | unsigned int header_len; |
1263 | struct tcp_out_options opts; | 1279 | struct tcp_out_options opts; |
1264 | struct tcp_md5sig_key *md5; | 1280 | struct tcp_md5sig_key *md5; |
1265 | 1281 | ||
@@ -1390,7 +1406,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp) | |||
1390 | */ | 1406 | */ |
1391 | static inline int tcp_nagle_check(const struct tcp_sock *tp, | 1407 | static inline int tcp_nagle_check(const struct tcp_sock *tp, |
1392 | const struct sk_buff *skb, | 1408 | const struct sk_buff *skb, |
1393 | unsigned mss_now, int nonagle) | 1409 | unsigned int mss_now, int nonagle) |
1394 | { | 1410 | { |
1395 | return skb->len < mss_now && | 1411 | return skb->len < mss_now && |
1396 | ((nonagle & TCP_NAGLE_CORK) || | 1412 | ((nonagle & TCP_NAGLE_CORK) || |
@@ -2167,8 +2183,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2167 | 2183 | ||
2168 | #if FASTRETRANS_DEBUG > 0 | 2184 | #if FASTRETRANS_DEBUG > 0 |
2169 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | 2185 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { |
2170 | if (net_ratelimit()) | 2186 | net_dbg_ratelimited("retrans_out leaked\n"); |
2171 | printk(KERN_DEBUG "retrans_out leaked.\n"); | ||
2172 | } | 2187 | } |
2173 | #endif | 2188 | #endif |
2174 | if (!tp->retrans_out) | 2189 | if (!tp->retrans_out) |
@@ -2402,7 +2417,7 @@ int tcp_send_synack(struct sock *sk) | |||
2402 | 2417 | ||
2403 | skb = tcp_write_queue_head(sk); | 2418 | skb = tcp_write_queue_head(sk); |
2404 | if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { | 2419 | if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { |
2405 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); | 2420 | pr_debug("%s: wrong queue state\n", __func__); |
2406 | return -EFAULT; | 2421 | return -EFAULT; |
2407 | } | 2422 | } |
2408 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { | 2423 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { |
@@ -2562,7 +2577,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2562 | EXPORT_SYMBOL(tcp_make_synack); | 2577 | EXPORT_SYMBOL(tcp_make_synack); |
2563 | 2578 | ||
2564 | /* Do all connect socket setups that can be done AF independent. */ | 2579 | /* Do all connect socket setups that can be done AF independent. */ |
2565 | static void tcp_connect_init(struct sock *sk) | 2580 | void tcp_connect_init(struct sock *sk) |
2566 | { | 2581 | { |
2567 | const struct dst_entry *dst = __sk_dst_get(sk); | 2582 | const struct dst_entry *dst = __sk_dst_get(sk); |
2568 | struct tcp_sock *tp = tcp_sk(sk); | 2583 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2617,9 +2632,12 @@ static void tcp_connect_init(struct sock *sk) | |||
2617 | tp->snd_una = tp->write_seq; | 2632 | tp->snd_una = tp->write_seq; |
2618 | tp->snd_sml = tp->write_seq; | 2633 | tp->snd_sml = tp->write_seq; |
2619 | tp->snd_up = tp->write_seq; | 2634 | tp->snd_up = tp->write_seq; |
2620 | tp->rcv_nxt = 0; | 2635 | tp->snd_nxt = tp->write_seq; |
2621 | tp->rcv_wup = 0; | 2636 | |
2622 | tp->copied_seq = 0; | 2637 | if (likely(!tp->repair)) |
2638 | tp->rcv_nxt = 0; | ||
2639 | tp->rcv_wup = tp->rcv_nxt; | ||
2640 | tp->copied_seq = tp->rcv_nxt; | ||
2623 | 2641 | ||
2624 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | 2642 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
2625 | inet_csk(sk)->icsk_retransmits = 0; | 2643 | inet_csk(sk)->icsk_retransmits = 0; |
@@ -2642,7 +2660,6 @@ int tcp_connect(struct sock *sk) | |||
2642 | /* Reserve space for headers. */ | 2660 | /* Reserve space for headers. */ |
2643 | skb_reserve(buff, MAX_TCP_HEADER); | 2661 | skb_reserve(buff, MAX_TCP_HEADER); |
2644 | 2662 | ||
2645 | tp->snd_nxt = tp->write_seq; | ||
2646 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); | 2663 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
2647 | TCP_ECN_send_syn(sk, buff); | 2664 | TCP_ECN_send_syn(sk, buff); |
2648 | 2665 | ||
@@ -2791,6 +2808,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
2791 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2808 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2792 | } | 2809 | } |
2793 | 2810 | ||
2811 | void tcp_send_window_probe(struct sock *sk) | ||
2812 | { | ||
2813 | if (sk->sk_state == TCP_ESTABLISHED) { | ||
2814 | tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; | ||
2815 | tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; | ||
2816 | tcp_xmit_probe_skb(sk, 0); | ||
2817 | } | ||
2818 | } | ||
2819 | |||
2794 | /* Initiate keepalive or window probe from timer. */ | 2820 | /* Initiate keepalive or window probe from timer. */ |
2795 | int tcp_write_wakeup(struct sock *sk) | 2821 | int tcp_write_wakeup(struct sock *sk) |
2796 | { | 2822 | { |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index a981cdc0a6e9..4526fe68e60e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void) | |||
91 | * Note: arguments must match tcp_rcv_established()! | 91 | * Note: arguments must match tcp_rcv_established()! |
92 | */ | 92 | */ |
93 | static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | 93 | static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, |
94 | struct tcphdr *th, unsigned len) | 94 | struct tcphdr *th, unsigned int len) |
95 | { | 95 | { |
96 | const struct tcp_sock *tp = tcp_sk(sk); | 96 | const struct tcp_sock *tp = tcp_sk(sk); |
97 | const struct inet_sock *inet = inet_sk(sk); | 97 | const struct inet_sock *inet = inet_sk(sk); |
@@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = { | |||
138 | .entry = jtcp_rcv_established, | 138 | .entry = jtcp_rcv_established, |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static int tcpprobe_open(struct inode * inode, struct file * file) | 141 | static int tcpprobe_open(struct inode *inode, struct file *file) |
142 | { | 142 | { |
143 | /* Reset (empty) log */ | 143 | /* Reset (empty) log */ |
144 | spin_lock_bh(&tcp_probe.lock); | 144 | spin_lock_bh(&tcp_probe.lock); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 34d4a02c2f16..e911e6c523ec 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk) | |||
319 | struct tcp_sock *tp = tcp_sk(sk); | 319 | struct tcp_sock *tp = tcp_sk(sk); |
320 | struct inet_connection_sock *icsk = inet_csk(sk); | 320 | struct inet_connection_sock *icsk = inet_csk(sk); |
321 | 321 | ||
322 | if (tp->early_retrans_delayed) { | ||
323 | tcp_resume_early_retransmit(sk); | ||
324 | return; | ||
325 | } | ||
326 | |||
322 | if (!tp->packets_out) | 327 | if (!tp->packets_out) |
323 | goto out; | 328 | goto out; |
324 | 329 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fe141052a1be..279fd0846302 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -107,6 +107,7 @@ | |||
107 | #include <net/checksum.h> | 107 | #include <net/checksum.h> |
108 | #include <net/xfrm.h> | 108 | #include <net/xfrm.h> |
109 | #include <trace/events/udp.h> | 109 | #include <trace/events/udp.h> |
110 | #include <linux/static_key.h> | ||
110 | #include "udp_impl.h" | 111 | #include "udp_impl.h" |
111 | 112 | ||
112 | struct udp_table udp_table __read_mostly; | 113 | struct udp_table udp_table __read_mostly; |
@@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
206 | 207 | ||
207 | if (!snum) { | 208 | if (!snum) { |
208 | int low, high, remaining; | 209 | int low, high, remaining; |
209 | unsigned rand; | 210 | unsigned int rand; |
210 | unsigned short first, last; | 211 | unsigned short first, last; |
211 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); | 212 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); |
212 | 213 | ||
@@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
846 | * Get and verify the address. | 847 | * Get and verify the address. |
847 | */ | 848 | */ |
848 | if (msg->msg_name) { | 849 | if (msg->msg_name) { |
849 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; | 850 | struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; |
850 | if (msg->msg_namelen < sizeof(*usin)) | 851 | if (msg->msg_namelen < sizeof(*usin)) |
851 | return -EINVAL; | 852 | return -EINVAL; |
852 | if (usin->sin_family != AF_INET) { | 853 | if (usin->sin_family != AF_INET) { |
@@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1379 | 1380 | ||
1380 | } | 1381 | } |
1381 | 1382 | ||
1383 | static struct static_key udp_encap_needed __read_mostly; | ||
1384 | void udp_encap_enable(void) | ||
1385 | { | ||
1386 | if (!static_key_enabled(&udp_encap_needed)) | ||
1387 | static_key_slow_inc(&udp_encap_needed); | ||
1388 | } | ||
1389 | EXPORT_SYMBOL(udp_encap_enable); | ||
1390 | |||
1382 | /* returns: | 1391 | /* returns: |
1383 | * -1: error | 1392 | * -1: error |
1384 | * 0: success | 1393 | * 0: success |
@@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1400 | goto drop; | 1409 | goto drop; |
1401 | nf_reset(skb); | 1410 | nf_reset(skb); |
1402 | 1411 | ||
1403 | if (up->encap_type) { | 1412 | if (static_key_false(&udp_encap_needed) && up->encap_type) { |
1404 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); | 1413 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); |
1405 | 1414 | ||
1406 | /* | 1415 | /* |
@@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1470 | goto drop; | 1479 | goto drop; |
1471 | 1480 | ||
1472 | 1481 | ||
1473 | if (sk_rcvqueues_full(sk, skb)) | 1482 | if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) |
1474 | goto drop; | 1483 | goto drop; |
1475 | 1484 | ||
1476 | rc = 0; | 1485 | rc = 0; |
@@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1479 | bh_lock_sock(sk); | 1488 | bh_lock_sock(sk); |
1480 | if (!sock_owned_by_user(sk)) | 1489 | if (!sock_owned_by_user(sk)) |
1481 | rc = __udp_queue_rcv_skb(sk, skb); | 1490 | rc = __udp_queue_rcv_skb(sk, skb); |
1482 | else if (sk_add_backlog(sk, skb)) { | 1491 | else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
1483 | bh_unlock_sock(sk); | 1492 | bh_unlock_sock(sk); |
1484 | goto drop; | 1493 | goto drop; |
1485 | } | 1494 | } |
@@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1760 | /* FALLTHROUGH */ | 1769 | /* FALLTHROUGH */ |
1761 | case UDP_ENCAP_L2TPINUDP: | 1770 | case UDP_ENCAP_L2TPINUDP: |
1762 | up->encap_type = val; | 1771 | up->encap_type = val; |
1772 | udp_encap_enable(); | ||
1763 | break; | 1773 | break; |
1764 | default: | 1774 | default: |
1765 | err = -ENOPROTOOPT; | 1775 | err = -ENOPROTOOPT; |
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index aaad650d47d9..5a681e298b90 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h | |||
@@ -25,7 +25,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
25 | size_t len, int noblock, int flags, int *addr_len); | 25 | size_t len, int noblock, int flags, int *addr_len); |
26 | extern int udp_sendpage(struct sock *sk, struct page *page, int offset, | 26 | extern int udp_sendpage(struct sock *sk, struct page *page, int offset, |
27 | size_t size, int flags); | 27 | size_t size, int flags); |
28 | extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); | 28 | extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); |
29 | extern void udp_destroy_sock(struct sock *sk); | 29 | extern void udp_destroy_sock(struct sock *sk); |
30 | 30 | ||
31 | #ifdef CONFIG_PROC_FS | 31 | #ifdef CONFIG_PROC_FS |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d43..0d3426cb5c4f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
152 | 152 | ||
153 | case IPPROTO_AH: | 153 | case IPPROTO_AH: |
154 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | 154 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { |
155 | __be32 *ah_hdr = (__be32*)xprth; | 155 | __be32 *ah_hdr = (__be32 *)xprth; |
156 | 156 | ||
157 | fl4->fl4_ipsec_spi = ah_hdr[1]; | 157 | fl4->fl4_ipsec_spi = ah_hdr[1]; |
158 | } | 158 | } |
@@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size) | |||
298 | xfrm4_state_init(); | 298 | xfrm4_state_init(); |
299 | xfrm4_policy_init(); | 299 | xfrm4_policy_init(); |
300 | #ifdef CONFIG_SYSCTL | 300 | #ifdef CONFIG_SYSCTL |
301 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | 301 | sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", |
302 | xfrm4_policy_table); | 302 | xfrm4_policy_table); |
303 | #endif | 303 | #endif |
304 | } | 304 | } |
305 | 305 | ||