diff options
Diffstat (limited to 'net/ipv4')
53 files changed, 1323 insertions, 1529 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d183262943d9..20f1cb5c8aba 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -262,8 +262,8 @@ config ARPD | |||
262 | bool "IP: ARP daemon support" | 262 | bool "IP: ARP daemon support" |
263 | ---help--- | 263 | ---help--- |
264 | The kernel maintains an internal cache which maps IP addresses to | 264 | The kernel maintains an internal cache which maps IP addresses to |
265 | hardware addresses on the local network, so that Ethernet/Token Ring/ | 265 | hardware addresses on the local network, so that Ethernet |
266 | etc. frames are sent to the proper address on the physical networking | 266 | frames are sent to the proper address on the physical networking |
267 | layer. Normally, kernel uses the ARP protocol to resolve these | 267 | layer. Normally, kernel uses the ARP protocol to resolve these |
268 | mappings. | 268 | mappings. |
269 | 269 | ||
@@ -312,7 +312,7 @@ config SYN_COOKIES | |||
312 | 312 | ||
313 | config INET_AH | 313 | config INET_AH |
314 | tristate "IP: AH transformation" | 314 | tristate "IP: AH transformation" |
315 | select XFRM | 315 | select XFRM_ALGO |
316 | select CRYPTO | 316 | select CRYPTO |
317 | select CRYPTO_HMAC | 317 | select CRYPTO_HMAC |
318 | select CRYPTO_MD5 | 318 | select CRYPTO_MD5 |
@@ -324,7 +324,7 @@ config INET_AH | |||
324 | 324 | ||
325 | config INET_ESP | 325 | config INET_ESP |
326 | tristate "IP: ESP transformation" | 326 | tristate "IP: ESP transformation" |
327 | select XFRM | 327 | select XFRM_ALGO |
328 | select CRYPTO | 328 | select CRYPTO |
329 | select CRYPTO_AUTHENC | 329 | select CRYPTO_AUTHENC |
330 | select CRYPTO_HMAC | 330 | select CRYPTO_HMAC |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 10e3751466b5..c8f7aee587d1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -350,7 +350,7 @@ lookup_protocol: | |||
350 | err = 0; | 350 | err = 0; |
351 | sk->sk_no_check = answer_no_check; | 351 | sk->sk_no_check = answer_no_check; |
352 | if (INET_PROTOSW_REUSE & answer_flags) | 352 | if (INET_PROTOSW_REUSE & answer_flags) |
353 | sk->sk_reuse = 1; | 353 | sk->sk_reuse = SK_CAN_REUSE; |
354 | 354 | ||
355 | inet = inet_sk(sk); | 355 | inet = inet_sk(sk); |
356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; | 356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; |
@@ -541,7 +541,7 @@ out: | |||
541 | } | 541 | } |
542 | EXPORT_SYMBOL(inet_bind); | 542 | EXPORT_SYMBOL(inet_bind); |
543 | 543 | ||
544 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | 544 | int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, |
545 | int addr_len, int flags) | 545 | int addr_len, int flags) |
546 | { | 546 | { |
547 | struct sock *sk = sock->sk; | 547 | struct sock *sk = sock->sk; |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index fd508b526014..e8f2617ecd47 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, | |||
77 | 77 | ||
78 | static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) | 78 | static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) |
79 | { | 79 | { |
80 | unsigned char * optptr = (unsigned char*)(iph+1); | 80 | unsigned char *optptr = (unsigned char *)(iph+1); |
81 | int l = iph->ihl*4 - sizeof(struct iphdr); | 81 | int l = iph->ihl*4 - sizeof(struct iphdr); |
82 | int optlen; | 82 | int optlen; |
83 | 83 | ||
@@ -406,8 +406,8 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
406 | ah->spi, IPPROTO_AH, AF_INET); | 406 | ah->spi, IPPROTO_AH, AF_INET); |
407 | if (!x) | 407 | if (!x) |
408 | return; | 408 | return; |
409 | printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", | 409 | pr_debug("pmtu discovery on SA AH/%08x/%08x\n", |
410 | ntohl(ah->spi), ntohl(iph->daddr)); | 410 | ntohl(ah->spi), ntohl(iph->daddr)); |
411 | xfrm_state_put(x); | 411 | xfrm_state_put(x); |
412 | } | 412 | } |
413 | 413 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 18d9b81ecb1a..cda37be02f8d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -73,6 +73,8 @@ | |||
73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. | 73 | * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. |
74 | */ | 74 | */ |
75 | 75 | ||
76 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
77 | |||
76 | #include <linux/module.h> | 78 | #include <linux/module.h> |
77 | #include <linux/types.h> | 79 | #include <linux/types.h> |
78 | #include <linux/string.h> | 80 | #include <linux/string.h> |
@@ -89,7 +91,6 @@ | |||
89 | #include <linux/etherdevice.h> | 91 | #include <linux/etherdevice.h> |
90 | #include <linux/fddidevice.h> | 92 | #include <linux/fddidevice.h> |
91 | #include <linux/if_arp.h> | 93 | #include <linux/if_arp.h> |
92 | #include <linux/trdevice.h> | ||
93 | #include <linux/skbuff.h> | 94 | #include <linux/skbuff.h> |
94 | #include <linux/proc_fs.h> | 95 | #include <linux/proc_fs.h> |
95 | #include <linux/seq_file.h> | 96 | #include <linux/seq_file.h> |
@@ -193,9 +194,6 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) | |||
193 | case ARPHRD_IEEE802: | 194 | case ARPHRD_IEEE802: |
194 | ip_eth_mc_map(addr, haddr); | 195 | ip_eth_mc_map(addr, haddr); |
195 | return 0; | 196 | return 0; |
196 | case ARPHRD_IEEE802_TR: | ||
197 | ip_tr_mc_map(addr, haddr); | ||
198 | return 0; | ||
199 | case ARPHRD_INFINIBAND: | 197 | case ARPHRD_INFINIBAND: |
200 | ip_ib_mc_map(addr, dev->broadcast, haddr); | 198 | ip_ib_mc_map(addr, dev->broadcast, haddr); |
201 | return 0; | 199 | return 0; |
@@ -364,8 +362,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
364 | probes -= neigh->parms->ucast_probes; | 362 | probes -= neigh->parms->ucast_probes; |
365 | if (probes < 0) { | 363 | if (probes < 0) { |
366 | if (!(neigh->nud_state & NUD_VALID)) | 364 | if (!(neigh->nud_state & NUD_VALID)) |
367 | printk(KERN_DEBUG | 365 | pr_debug("trying to ucast probe in NUD_INVALID\n"); |
368 | "trying to ucast probe in NUD_INVALID\n"); | ||
369 | dst_ha = neigh->ha; | 366 | dst_ha = neigh->ha; |
370 | read_lock_bh(&neigh->lock); | 367 | read_lock_bh(&neigh->lock); |
371 | } else { | 368 | } else { |
@@ -452,7 +449,7 @@ static int arp_set_predefined(int addr_hint, unsigned char *haddr, | |||
452 | { | 449 | { |
453 | switch (addr_hint) { | 450 | switch (addr_hint) { |
454 | case RTN_LOCAL: | 451 | case RTN_LOCAL: |
455 | printk(KERN_DEBUG "ARP: arp called for own IP address\n"); | 452 | pr_debug("arp called for own IP address\n"); |
456 | memcpy(haddr, dev->dev_addr, dev->addr_len); | 453 | memcpy(haddr, dev->dev_addr, dev->addr_len); |
457 | return 1; | 454 | return 1; |
458 | case RTN_MULTICAST: | 455 | case RTN_MULTICAST: |
@@ -473,7 +470,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
473 | struct neighbour *n; | 470 | struct neighbour *n; |
474 | 471 | ||
475 | if (!skb_dst(skb)) { | 472 | if (!skb_dst(skb)) { |
476 | printk(KERN_DEBUG "arp_find is called with dst==NULL\n"); | 473 | pr_debug("arp_find is called with dst==NULL\n"); |
477 | kfree_skb(skb); | 474 | kfree_skb(skb); |
478 | return 1; | 475 | return 1; |
479 | } | 476 | } |
@@ -648,12 +645,6 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
648 | arp->ar_pro = htons(ETH_P_IP); | 645 | arp->ar_pro = htons(ETH_P_IP); |
649 | break; | 646 | break; |
650 | #endif | 647 | #endif |
651 | #if IS_ENABLED(CONFIG_TR) | ||
652 | case ARPHRD_IEEE802_TR: | ||
653 | arp->ar_hrd = htons(ARPHRD_IEEE802); | ||
654 | arp->ar_pro = htons(ETH_P_IP); | ||
655 | break; | ||
656 | #endif | ||
657 | } | 648 | } |
658 | 649 | ||
659 | arp->ar_hln = dev->addr_len; | 650 | arp->ar_hln = dev->addr_len; |
@@ -751,11 +742,10 @@ static int arp_process(struct sk_buff *skb) | |||
751 | goto out; | 742 | goto out; |
752 | break; | 743 | break; |
753 | case ARPHRD_ETHER: | 744 | case ARPHRD_ETHER: |
754 | case ARPHRD_IEEE802_TR: | ||
755 | case ARPHRD_FDDI: | 745 | case ARPHRD_FDDI: |
756 | case ARPHRD_IEEE802: | 746 | case ARPHRD_IEEE802: |
757 | /* | 747 | /* |
758 | * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802 | 748 | * ETHERNET, and Fibre Channel (which are IEEE 802 |
759 | * devices, according to RFC 2625) devices will accept ARP | 749 | * devices, according to RFC 2625) devices will accept ARP |
760 | * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2). | 750 | * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2). |
761 | * This is the case also of FDDI, where the RFC 1390 says that | 751 | * This is the case also of FDDI, where the RFC 1390 says that |
@@ -1059,7 +1049,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1059 | neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); | 1049 | neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); |
1060 | err = PTR_ERR(neigh); | 1050 | err = PTR_ERR(neigh); |
1061 | if (!IS_ERR(neigh)) { | 1051 | if (!IS_ERR(neigh)) { |
1062 | unsigned state = NUD_STALE; | 1052 | unsigned int state = NUD_STALE; |
1063 | if (r->arp_flags & ATF_PERM) | 1053 | if (r->arp_flags & ATF_PERM) |
1064 | state = NUD_PERMANENT; | 1054 | state = NUD_PERMANENT; |
1065 | err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? | 1055 | err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? |
@@ -1071,7 +1061,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1071 | return err; | 1061 | return err; |
1072 | } | 1062 | } |
1073 | 1063 | ||
1074 | static unsigned arp_state_to_flags(struct neighbour *neigh) | 1064 | static unsigned int arp_state_to_flags(struct neighbour *neigh) |
1075 | { | 1065 | { |
1076 | if (neigh->nud_state&NUD_PERMANENT) | 1066 | if (neigh->nud_state&NUD_PERMANENT) |
1077 | return ATF_PERM | ATF_COM; | 1067 | return ATF_PERM | ATF_COM; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6e447ff94dfa..10e15a144e95 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -217,8 +217,7 @@ void in_dev_finish_destroy(struct in_device *idev) | |||
217 | WARN_ON(idev->ifa_list); | 217 | WARN_ON(idev->ifa_list); |
218 | WARN_ON(idev->mc_list); | 218 | WARN_ON(idev->mc_list); |
219 | #ifdef NET_REFCNT_DEBUG | 219 | #ifdef NET_REFCNT_DEBUG |
220 | printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", | 220 | pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); |
221 | idev, dev ? dev->name : "NIL"); | ||
222 | #endif | 221 | #endif |
223 | dev_put(dev); | 222 | dev_put(dev); |
224 | if (!idev->dead) | 223 | if (!idev->dead) |
@@ -1125,7 +1124,7 @@ skip: | |||
1125 | } | 1124 | } |
1126 | } | 1125 | } |
1127 | 1126 | ||
1128 | static inline bool inetdev_valid_mtu(unsigned mtu) | 1127 | static inline bool inetdev_valid_mtu(unsigned int mtu) |
1129 | { | 1128 | { |
1130 | return mtu >= 68; | 1129 | return mtu >= 68; |
1131 | } | 1130 | } |
@@ -1174,7 +1173,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1174 | 1173 | ||
1175 | switch (event) { | 1174 | switch (event) { |
1176 | case NETDEV_REGISTER: | 1175 | case NETDEV_REGISTER: |
1177 | printk(KERN_DEBUG "inetdev_event: bug\n"); | 1176 | pr_debug("%s: bug\n", __func__); |
1178 | RCU_INIT_POINTER(dev->ip_ptr, NULL); | 1177 | RCU_INIT_POINTER(dev->ip_ptr, NULL); |
1179 | break; | 1178 | break; |
1180 | case NETDEV_UP: | 1179 | case NETDEV_UP: |
@@ -1266,17 +1265,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, | |||
1266 | ifm->ifa_scope = ifa->ifa_scope; | 1265 | ifm->ifa_scope = ifa->ifa_scope; |
1267 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; | 1266 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; |
1268 | 1267 | ||
1269 | if (ifa->ifa_address) | 1268 | if ((ifa->ifa_address && |
1270 | NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); | 1269 | nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || |
1271 | 1270 | (ifa->ifa_local && | |
1272 | if (ifa->ifa_local) | 1271 | nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || |
1273 | NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); | 1272 | (ifa->ifa_broadcast && |
1274 | 1273 | nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || | |
1275 | if (ifa->ifa_broadcast) | 1274 | (ifa->ifa_label[0] && |
1276 | NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); | 1275 | nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) |
1277 | 1276 | goto nla_put_failure; | |
1278 | if (ifa->ifa_label[0]) | ||
1279 | NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); | ||
1280 | 1277 | ||
1281 | return nlmsg_end(skb, nlh); | 1278 | return nlmsg_end(skb, nlh); |
1282 | 1279 | ||
@@ -1587,7 +1584,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, | |||
1587 | static struct devinet_sysctl_table { | 1584 | static struct devinet_sysctl_table { |
1588 | struct ctl_table_header *sysctl_header; | 1585 | struct ctl_table_header *sysctl_header; |
1589 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; | 1586 | struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; |
1590 | char *dev_name; | ||
1591 | } devinet_sysctl = { | 1587 | } devinet_sysctl = { |
1592 | .devinet_vars = { | 1588 | .devinet_vars = { |
1593 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", | 1589 | DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", |
@@ -1629,16 +1625,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1629 | { | 1625 | { |
1630 | int i; | 1626 | int i; |
1631 | struct devinet_sysctl_table *t; | 1627 | struct devinet_sysctl_table *t; |
1632 | 1628 | char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; | |
1633 | #define DEVINET_CTL_PATH_DEV 3 | ||
1634 | |||
1635 | struct ctl_path devinet_ctl_path[] = { | ||
1636 | { .procname = "net", }, | ||
1637 | { .procname = "ipv4", }, | ||
1638 | { .procname = "conf", }, | ||
1639 | { /* to be set */ }, | ||
1640 | { }, | ||
1641 | }; | ||
1642 | 1629 | ||
1643 | t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); | 1630 | t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); |
1644 | if (!t) | 1631 | if (!t) |
@@ -1650,27 +1637,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, | |||
1650 | t->devinet_vars[i].extra2 = net; | 1637 | t->devinet_vars[i].extra2 = net; |
1651 | } | 1638 | } |
1652 | 1639 | ||
1653 | /* | 1640 | snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); |
1654 | * Make a copy of dev_name, because '.procname' is regarded as const | ||
1655 | * by sysctl and we wouldn't want anyone to change it under our feet | ||
1656 | * (see SIOCSIFNAME). | ||
1657 | */ | ||
1658 | t->dev_name = kstrdup(dev_name, GFP_KERNEL); | ||
1659 | if (!t->dev_name) | ||
1660 | goto free; | ||
1661 | |||
1662 | devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; | ||
1663 | 1641 | ||
1664 | t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, | 1642 | t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); |
1665 | t->devinet_vars); | ||
1666 | if (!t->sysctl_header) | 1643 | if (!t->sysctl_header) |
1667 | goto free_procname; | 1644 | goto free; |
1668 | 1645 | ||
1669 | p->sysctl = t; | 1646 | p->sysctl = t; |
1670 | return 0; | 1647 | return 0; |
1671 | 1648 | ||
1672 | free_procname: | ||
1673 | kfree(t->dev_name); | ||
1674 | free: | 1649 | free: |
1675 | kfree(t); | 1650 | kfree(t); |
1676 | out: | 1651 | out: |
@@ -1686,7 +1661,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) | |||
1686 | 1661 | ||
1687 | cnf->sysctl = NULL; | 1662 | cnf->sysctl = NULL; |
1688 | unregister_net_sysctl_table(t->sysctl_header); | 1663 | unregister_net_sysctl_table(t->sysctl_header); |
1689 | kfree(t->dev_name); | ||
1690 | kfree(t); | 1664 | kfree(t); |
1691 | } | 1665 | } |
1692 | 1666 | ||
@@ -1716,12 +1690,6 @@ static struct ctl_table ctl_forward_entry[] = { | |||
1716 | }, | 1690 | }, |
1717 | { }, | 1691 | { }, |
1718 | }; | 1692 | }; |
1719 | |||
1720 | static __net_initdata struct ctl_path net_ipv4_path[] = { | ||
1721 | { .procname = "net", }, | ||
1722 | { .procname = "ipv4", }, | ||
1723 | { }, | ||
1724 | }; | ||
1725 | #endif | 1693 | #endif |
1726 | 1694 | ||
1727 | static __net_init int devinet_init_net(struct net *net) | 1695 | static __net_init int devinet_init_net(struct net *net) |
@@ -1767,7 +1735,7 @@ static __net_init int devinet_init_net(struct net *net) | |||
1767 | goto err_reg_dflt; | 1735 | goto err_reg_dflt; |
1768 | 1736 | ||
1769 | err = -ENOMEM; | 1737 | err = -ENOMEM; |
1770 | forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); | 1738 | forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); |
1771 | if (forw_hdr == NULL) | 1739 | if (forw_hdr == NULL) |
1772 | goto err_reg_ctl; | 1740 | goto err_reg_ctl; |
1773 | net->ipv4.forw_hdr = forw_hdr; | 1741 | net->ipv4.forw_hdr = forw_hdr; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cbe3a68507cf..3854411fa37c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -136,13 +136,13 @@ static void fib_flush(struct net *net) | |||
136 | * Find address type as if only "dev" was present in the system. If | 136 | * Find address type as if only "dev" was present in the system. If |
137 | * on_dev is NULL then all interfaces are taken into consideration. | 137 | * on_dev is NULL then all interfaces are taken into consideration. |
138 | */ | 138 | */ |
139 | static inline unsigned __inet_dev_addr_type(struct net *net, | 139 | static inline unsigned int __inet_dev_addr_type(struct net *net, |
140 | const struct net_device *dev, | 140 | const struct net_device *dev, |
141 | __be32 addr) | 141 | __be32 addr) |
142 | { | 142 | { |
143 | struct flowi4 fl4 = { .daddr = addr }; | 143 | struct flowi4 fl4 = { .daddr = addr }; |
144 | struct fib_result res; | 144 | struct fib_result res; |
145 | unsigned ret = RTN_BROADCAST; | 145 | unsigned int ret = RTN_BROADCAST; |
146 | struct fib_table *local_table; | 146 | struct fib_table *local_table; |
147 | 147 | ||
148 | if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) | 148 | if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) |
@@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) | |||
740 | #define BRD_OK 2 | 740 | #define BRD_OK 2 |
741 | #define BRD0_OK 4 | 741 | #define BRD0_OK 4 |
742 | #define BRD1_OK 8 | 742 | #define BRD1_OK 8 |
743 | unsigned ok = 0; | 743 | unsigned int ok = 0; |
744 | int subnet = 0; /* Primary network */ | 744 | int subnet = 0; /* Primary network */ |
745 | int gone = 1; /* Address is missing */ | 745 | int gone = 1; /* Address is missing */ |
746 | int same_prefsrc = 0; /* Another primary with same IP */ | 746 | int same_prefsrc = 0; /* Another primary with same IP */ |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 799fc790b3cf..2d043f71ef70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
221 | frh->src_len = rule4->src_len; | 221 | frh->src_len = rule4->src_len; |
222 | frh->tos = rule4->tos; | 222 | frh->tos = rule4->tos; |
223 | 223 | ||
224 | if (rule4->dst_len) | 224 | if ((rule4->dst_len && |
225 | NLA_PUT_BE32(skb, FRA_DST, rule4->dst); | 225 | nla_put_be32(skb, FRA_DST, rule4->dst)) || |
226 | 226 | (rule4->src_len && | |
227 | if (rule4->src_len) | 227 | nla_put_be32(skb, FRA_SRC, rule4->src))) |
228 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 228 | goto nla_put_failure; |
229 | |||
230 | #ifdef CONFIG_IP_ROUTE_CLASSID | 229 | #ifdef CONFIG_IP_ROUTE_CLASSID |
231 | if (rule4->tclassid) | 230 | if (rule4->tclassid && |
232 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 231 | nla_put_u32(skb, FRA_FLOW, rule4->tclassid)) |
232 | goto nla_put_failure; | ||
233 | #endif | 233 | #endif |
234 | return 0; | 234 | return 0; |
235 | 235 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5063fa38ac7b..a8bdf7405433 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
931 | rtm->rtm_table = tb_id; | 931 | rtm->rtm_table = tb_id; |
932 | else | 932 | else |
933 | rtm->rtm_table = RT_TABLE_COMPAT; | 933 | rtm->rtm_table = RT_TABLE_COMPAT; |
934 | NLA_PUT_U32(skb, RTA_TABLE, tb_id); | 934 | if (nla_put_u32(skb, RTA_TABLE, tb_id)) |
935 | goto nla_put_failure; | ||
935 | rtm->rtm_type = type; | 936 | rtm->rtm_type = type; |
936 | rtm->rtm_flags = fi->fib_flags; | 937 | rtm->rtm_flags = fi->fib_flags; |
937 | rtm->rtm_scope = fi->fib_scope; | 938 | rtm->rtm_scope = fi->fib_scope; |
938 | rtm->rtm_protocol = fi->fib_protocol; | 939 | rtm->rtm_protocol = fi->fib_protocol; |
939 | 940 | ||
940 | if (rtm->rtm_dst_len) | 941 | if (rtm->rtm_dst_len && |
941 | NLA_PUT_BE32(skb, RTA_DST, dst); | 942 | nla_put_be32(skb, RTA_DST, dst)) |
942 | 943 | goto nla_put_failure; | |
943 | if (fi->fib_priority) | 944 | if (fi->fib_priority && |
944 | NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); | 945 | nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) |
945 | 946 | goto nla_put_failure; | |
946 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) | 947 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) |
947 | goto nla_put_failure; | 948 | goto nla_put_failure; |
948 | 949 | ||
949 | if (fi->fib_prefsrc) | 950 | if (fi->fib_prefsrc && |
950 | NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); | 951 | nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc)) |
951 | 952 | goto nla_put_failure; | |
952 | if (fi->fib_nhs == 1) { | 953 | if (fi->fib_nhs == 1) { |
953 | if (fi->fib_nh->nh_gw) | 954 | if (fi->fib_nh->nh_gw && |
954 | NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); | 955 | nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) |
955 | 956 | goto nla_put_failure; | |
956 | if (fi->fib_nh->nh_oif) | 957 | if (fi->fib_nh->nh_oif && |
957 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 958 | nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) |
959 | goto nla_put_failure; | ||
958 | #ifdef CONFIG_IP_ROUTE_CLASSID | 960 | #ifdef CONFIG_IP_ROUTE_CLASSID |
959 | if (fi->fib_nh[0].nh_tclassid) | 961 | if (fi->fib_nh[0].nh_tclassid && |
960 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 962 | nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) |
963 | goto nla_put_failure; | ||
961 | #endif | 964 | #endif |
962 | } | 965 | } |
963 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 966 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
978 | rtnh->rtnh_hops = nh->nh_weight - 1; | 981 | rtnh->rtnh_hops = nh->nh_weight - 1; |
979 | rtnh->rtnh_ifindex = nh->nh_oif; | 982 | rtnh->rtnh_ifindex = nh->nh_oif; |
980 | 983 | ||
981 | if (nh->nh_gw) | 984 | if (nh->nh_gw && |
982 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 985 | nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw)) |
986 | goto nla_put_failure; | ||
983 | #ifdef CONFIG_IP_ROUTE_CLASSID | 987 | #ifdef CONFIG_IP_ROUTE_CLASSID |
984 | if (nh->nh_tclassid) | 988 | if (nh->nh_tclassid && |
985 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 989 | nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) |
990 | goto nla_put_failure; | ||
986 | #endif | 991 | #endif |
987 | /* length of rtnetlink header + attributes */ | 992 | /* length of rtnetlink header + attributes */ |
988 | rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; | 993 | rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index bce36f1a37b4..30b88d7b4bd6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1370,6 +1370,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1370 | 1370 | ||
1371 | if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) | 1371 | if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) |
1372 | continue; | 1372 | continue; |
1373 | if (fi->fib_dead) | ||
1374 | continue; | ||
1373 | if (fa->fa_info->fib_scope < flp->flowi4_scope) | 1375 | if (fa->fa_info->fib_scope < flp->flowi4_scope) |
1374 | continue; | 1376 | continue; |
1375 | fib_alias_accessed(fa); | 1377 | fib_alias_accessed(fa); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2cb2bf845641..c75efbdc71cb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -713,11 +713,10 @@ static void icmp_unreach(struct sk_buff *skb) | |||
713 | 713 | ||
714 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && | 714 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && |
715 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { | 715 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { |
716 | if (net_ratelimit()) | 716 | net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", |
717 | pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", | 717 | &ip_hdr(skb)->saddr, |
718 | &ip_hdr(skb)->saddr, | 718 | icmph->type, icmph->code, |
719 | icmph->type, icmph->code, | 719 | &iph->daddr, skb->dev->name); |
720 | &iph->daddr, skb->dev->name); | ||
721 | goto out; | 720 | goto out; |
722 | } | 721 | } |
723 | 722 | ||
@@ -906,8 +905,7 @@ out_err: | |||
906 | static void icmp_address(struct sk_buff *skb) | 905 | static void icmp_address(struct sk_buff *skb) |
907 | { | 906 | { |
908 | #if 0 | 907 | #if 0 |
909 | if (net_ratelimit()) | 908 | net_dbg_ratelimited("a guy asks for address mask. Who is it?\n"); |
910 | printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); | ||
911 | #endif | 909 | #endif |
912 | } | 910 | } |
913 | 911 | ||
@@ -943,10 +941,10 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
943 | inet_ifa_match(ip_hdr(skb)->saddr, ifa)) | 941 | inet_ifa_match(ip_hdr(skb)->saddr, ifa)) |
944 | break; | 942 | break; |
945 | } | 943 | } |
946 | if (!ifa && net_ratelimit()) { | 944 | if (!ifa) |
947 | pr_info("Wrong address mask %pI4 from %s/%pI4\n", | 945 | net_info_ratelimited("Wrong address mask %pI4 from %s/%pI4\n", |
948 | mp, dev->name, &ip_hdr(skb)->saddr); | 946 | mp, |
949 | } | 947 | dev->name, &ip_hdr(skb)->saddr); |
950 | } | 948 | } |
951 | } | 949 | } |
952 | 950 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5dfecfd7d5e9..6699f23e6f55 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
344 | pip->protocol = IPPROTO_IGMP; | 344 | pip->protocol = IPPROTO_IGMP; |
345 | pip->tot_len = 0; /* filled in later */ | 345 | pip->tot_len = 0; /* filled in later */ |
346 | ip_select_ident(pip, &rt->dst, NULL); | 346 | ip_select_ident(pip, &rt->dst, NULL); |
347 | ((u8*)&pip[1])[0] = IPOPT_RA; | 347 | ((u8 *)&pip[1])[0] = IPOPT_RA; |
348 | ((u8*)&pip[1])[1] = 4; | 348 | ((u8 *)&pip[1])[1] = 4; |
349 | ((u8*)&pip[1])[2] = 0; | 349 | ((u8 *)&pip[1])[2] = 0; |
350 | ((u8*)&pip[1])[3] = 0; | 350 | ((u8 *)&pip[1])[3] = 0; |
351 | 351 | ||
352 | skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; | 352 | skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; |
353 | skb_put(skb, sizeof(*pig)); | 353 | skb_put(skb, sizeof(*pig)); |
@@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
688 | iph->saddr = fl4.saddr; | 688 | iph->saddr = fl4.saddr; |
689 | iph->protocol = IPPROTO_IGMP; | 689 | iph->protocol = IPPROTO_IGMP; |
690 | ip_select_ident(iph, &rt->dst, NULL); | 690 | ip_select_ident(iph, &rt->dst, NULL); |
691 | ((u8*)&iph[1])[0] = IPOPT_RA; | 691 | ((u8 *)&iph[1])[0] = IPOPT_RA; |
692 | ((u8*)&iph[1])[1] = 4; | 692 | ((u8 *)&iph[1])[1] = 4; |
693 | ((u8*)&iph[1])[2] = 0; | 693 | ((u8 *)&iph[1])[2] = 0; |
694 | ((u8*)&iph[1])[3] = 0; | 694 | ((u8 *)&iph[1])[3] = 0; |
695 | 695 | ||
696 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); | 696 | ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); |
697 | ih->type = type; | 697 | ih->type = type; |
@@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) | |||
774 | if (psf->sf_count[MCAST_INCLUDE] || | 774 | if (psf->sf_count[MCAST_INCLUDE] || |
775 | pmc->sfcount[MCAST_EXCLUDE] != | 775 | pmc->sfcount[MCAST_EXCLUDE] != |
776 | psf->sf_count[MCAST_EXCLUDE]) | 776 | psf->sf_count[MCAST_EXCLUDE]) |
777 | continue; | 777 | break; |
778 | if (srcs[i] == psf->sf_inaddr) { | 778 | if (srcs[i] == psf->sf_inaddr) { |
779 | scount++; | 779 | scount++; |
780 | break; | 780 | break; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 19d66cefd7d3..95e61596e605 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports); | |||
42 | 42 | ||
43 | void inet_get_local_port_range(int *low, int *high) | 43 | void inet_get_local_port_range(int *low, int *high) |
44 | { | 44 | { |
45 | unsigned seq; | 45 | unsigned int seq; |
46 | |||
46 | do { | 47 | do { |
47 | seq = read_seqbegin(&sysctl_local_ports.lock); | 48 | seq = read_seqbegin(&sysctl_local_ports.lock); |
48 | 49 | ||
@@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high) | |||
53 | EXPORT_SYMBOL(inet_get_local_port_range); | 54 | EXPORT_SYMBOL(inet_get_local_port_range); |
54 | 55 | ||
55 | int inet_csk_bind_conflict(const struct sock *sk, | 56 | int inet_csk_bind_conflict(const struct sock *sk, |
56 | const struct inet_bind_bucket *tb) | 57 | const struct inet_bind_bucket *tb, bool relax) |
57 | { | 58 | { |
58 | struct sock *sk2; | 59 | struct sock *sk2; |
59 | struct hlist_node *node; | 60 | struct hlist_node *node; |
@@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | 80 | sk2_rcv_saddr == sk_rcv_saddr(sk)) |
80 | break; | 81 | break; |
81 | } | 82 | } |
83 | if (!relax && reuse && sk2->sk_reuse && | ||
84 | sk2->sk_state != TCP_LISTEN) { | ||
85 | const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); | ||
86 | |||
87 | if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || | ||
88 | sk2_rcv_saddr == sk_rcv_saddr(sk)) | ||
89 | break; | ||
90 | } | ||
82 | } | 91 | } |
83 | } | 92 | } |
84 | return node != NULL; | 93 | return node != NULL; |
@@ -122,12 +131,13 @@ again: | |||
122 | (tb->num_owners < smallest_size || smallest_size == -1)) { | 131 | (tb->num_owners < smallest_size || smallest_size == -1)) { |
123 | smallest_size = tb->num_owners; | 132 | smallest_size = tb->num_owners; |
124 | smallest_rover = rover; | 133 | smallest_rover = rover; |
125 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { | 134 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && |
135 | !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { | ||
126 | snum = smallest_rover; | 136 | snum = smallest_rover; |
127 | goto tb_found; | 137 | goto tb_found; |
128 | } | 138 | } |
129 | } | 139 | } |
130 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 140 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { |
131 | snum = rover; | 141 | snum = rover; |
132 | goto tb_found; | 142 | goto tb_found; |
133 | } | 143 | } |
@@ -172,18 +182,22 @@ have_snum: | |||
172 | goto tb_not_found; | 182 | goto tb_not_found; |
173 | tb_found: | 183 | tb_found: |
174 | if (!hlist_empty(&tb->owners)) { | 184 | if (!hlist_empty(&tb->owners)) { |
185 | if (sk->sk_reuse == SK_FORCE_REUSE) | ||
186 | goto success; | ||
187 | |||
175 | if (tb->fastreuse > 0 && | 188 | if (tb->fastreuse > 0 && |
176 | sk->sk_reuse && sk->sk_state != TCP_LISTEN && | 189 | sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
177 | smallest_size == -1) { | 190 | smallest_size == -1) { |
178 | goto success; | 191 | goto success; |
179 | } else { | 192 | } else { |
180 | ret = 1; | 193 | ret = 1; |
181 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | 194 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { |
182 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && | 195 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
183 | smallest_size != -1 && --attempts >= 0) { | 196 | smallest_size != -1 && --attempts >= 0) { |
184 | spin_unlock(&head->lock); | 197 | spin_unlock(&head->lock); |
185 | goto again; | 198 | goto again; |
186 | } | 199 | } |
200 | |||
187 | goto fail_unlock; | 201 | goto fail_unlock; |
188 | } | 202 | } |
189 | } | 203 | } |
@@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
514 | 528 | ||
515 | /* Normally all the openreqs are young and become mature | 529 | /* Normally all the openreqs are young and become mature |
516 | * (i.e. converted to established socket) for first timeout. | 530 | * (i.e. converted to established socket) for first timeout. |
517 | * If synack was not acknowledged for 3 seconds, it means | 531 | * If synack was not acknowledged for 1 second, it means |
518 | * one of the following things: synack was lost, ack was lost, | 532 | * one of the following things: synack was lost, ack was lost, |
519 | * rtt is high or nobody planned to ack (i.e. synflood). | 533 | * rtt is high or nobody planned to ack (i.e. synflood). |
520 | * When server is a bit loaded, queue is populated with old | 534 | * When server is a bit loaded, queue is populated with old |
@@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
555 | syn_ack_recalc(req, thresh, max_retries, | 569 | syn_ack_recalc(req, thresh, max_retries, |
556 | queue->rskq_defer_accept, | 570 | queue->rskq_defer_accept, |
557 | &expire, &resend); | 571 | &expire, &resend); |
558 | if (req->rsk_ops->syn_ack_timeout) | 572 | req->rsk_ops->syn_ack_timeout(parent, req); |
559 | req->rsk_ops->syn_ack_timeout(parent, req); | ||
560 | if (!expire && | 573 | if (!expire && |
561 | (!resend || | 574 | (!resend || |
562 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || | 575 | !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8d25a1c557eb..46d1e7199a8c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -141,7 +141,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
141 | goto rtattr_failure; | 141 | goto rtattr_failure; |
142 | 142 | ||
143 | if (icsk == NULL) { | 143 | if (icsk == NULL) { |
144 | r->idiag_rqueue = r->idiag_wqueue = 0; | 144 | handler->idiag_get_info(sk, r, NULL); |
145 | goto out; | 145 | goto out; |
146 | } | 146 | } |
147 | 147 | ||
@@ -999,12 +999,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) | |||
999 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); | 999 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | static struct sock_diag_handler inet_diag_handler = { | 1002 | static const struct sock_diag_handler inet_diag_handler = { |
1003 | .family = AF_INET, | 1003 | .family = AF_INET, |
1004 | .dump = inet_diag_handler_dump, | 1004 | .dump = inet_diag_handler_dump, |
1005 | }; | 1005 | }; |
1006 | 1006 | ||
1007 | static struct sock_diag_handler inet6_diag_handler = { | 1007 | static const struct sock_diag_handler inet6_diag_handler = { |
1008 | .family = AF_INET6, | 1008 | .family = AF_INET6, |
1009 | .dump = inet_diag_handler_dump, | 1009 | .dump = inet_diag_handler_dump, |
1010 | }; | 1010 | }; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 984ec656b03b..7880af970208 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -217,7 +217,7 @@ begin: | |||
217 | } | 217 | } |
218 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 218 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
219 | 219 | ||
220 | struct sock * __inet_lookup_established(struct net *net, | 220 | struct sock *__inet_lookup_established(struct net *net, |
221 | struct inet_hashinfo *hashinfo, | 221 | struct inet_hashinfo *hashinfo, |
222 | const __be32 saddr, const __be16 sport, | 222 | const __be32 saddr, const __be16 sport, |
223 | const __be32 daddr, const u16 hnum, | 223 | const __be32 daddr, const u16 hnum, |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 89168c6351ff..2784db3155fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -89,8 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, | |||
89 | 89 | ||
90 | #ifdef SOCK_REFCNT_DEBUG | 90 | #ifdef SOCK_REFCNT_DEBUG |
91 | if (atomic_read(&tw->tw_refcnt) != 1) { | 91 | if (atomic_read(&tw->tw_refcnt) != 1) { |
92 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", | 92 | pr_debug("%s timewait_sock %p refcnt=%d\n", |
93 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | 93 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); |
94 | } | 94 | } |
95 | #endif | 95 | #endif |
96 | while (refcnt) { | 96 | while (refcnt) { |
@@ -263,7 +263,7 @@ rescan: | |||
263 | void inet_twdr_hangman(unsigned long data) | 263 | void inet_twdr_hangman(unsigned long data) |
264 | { | 264 | { |
265 | struct inet_timewait_death_row *twdr; | 265 | struct inet_timewait_death_row *twdr; |
266 | int unsigned need_timer; | 266 | unsigned int need_timer; |
267 | 267 | ||
268 | twdr = (struct inet_timewait_death_row *)data; | 268 | twdr = (struct inet_timewait_death_row *)data; |
269 | spin_lock(&twdr->death_lock); | 269 | spin_lock(&twdr->death_lock); |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 29a07b6c7168..e5c44fc586ab 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | static int ip_forward_finish(struct sk_buff *skb) | 42 | static int ip_forward_finish(struct sk_buff *skb) |
43 | { | 43 | { |
44 | struct ip_options * opt = &(IPCB(skb)->opt); | 44 | struct ip_options *opt = &(IPCB(skb)->opt); |
45 | 45 | ||
46 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); | 46 | IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); |
47 | 47 | ||
@@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb) | |||
55 | { | 55 | { |
56 | struct iphdr *iph; /* Our header */ | 56 | struct iphdr *iph; /* Our header */ |
57 | struct rtable *rt; /* Route we use */ | 57 | struct rtable *rt; /* Route we use */ |
58 | struct ip_options * opt = &(IPCB(skb)->opt); | 58 | struct ip_options *opt = &(IPCB(skb)->opt); |
59 | 59 | ||
60 | if (skb_warn_if_lro(skb)) | 60 | if (skb_warn_if_lro(skb)) |
61 | goto drop; | 61 | goto drop; |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3727e234c884..9dbd3dd6022d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -148,17 +148,17 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q) | |||
148 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); | 148 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
149 | } | 149 | } |
150 | 150 | ||
151 | static int ip4_frag_match(struct inet_frag_queue *q, void *a) | 151 | static bool ip4_frag_match(struct inet_frag_queue *q, void *a) |
152 | { | 152 | { |
153 | struct ipq *qp; | 153 | struct ipq *qp; |
154 | struct ip4_create_arg *arg = a; | 154 | struct ip4_create_arg *arg = a; |
155 | 155 | ||
156 | qp = container_of(q, struct ipq, q); | 156 | qp = container_of(q, struct ipq, q); |
157 | return qp->id == arg->iph->id && | 157 | return qp->id == arg->iph->id && |
158 | qp->saddr == arg->iph->saddr && | 158 | qp->saddr == arg->iph->saddr && |
159 | qp->daddr == arg->iph->daddr && | 159 | qp->daddr == arg->iph->daddr && |
160 | qp->protocol == arg->iph->protocol && | 160 | qp->protocol == arg->iph->protocol && |
161 | qp->user == arg->user; | 161 | qp->user == arg->user; |
162 | } | 162 | } |
163 | 163 | ||
164 | /* Memory Tracking Functions. */ | 164 | /* Memory Tracking Functions. */ |
@@ -545,6 +545,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
545 | int len; | 545 | int len; |
546 | int ihlen; | 546 | int ihlen; |
547 | int err; | 547 | int err; |
548 | int sum_truesize; | ||
548 | u8 ecn; | 549 | u8 ecn; |
549 | 550 | ||
550 | ipq_kill(qp); | 551 | ipq_kill(qp); |
@@ -569,7 +570,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
569 | skb_morph(head, qp->q.fragments); | 570 | skb_morph(head, qp->q.fragments); |
570 | head->next = qp->q.fragments->next; | 571 | head->next = qp->q.fragments->next; |
571 | 572 | ||
572 | kfree_skb(qp->q.fragments); | 573 | consume_skb(qp->q.fragments); |
573 | qp->q.fragments = head; | 574 | qp->q.fragments = head; |
574 | } | 575 | } |
575 | 576 | ||
@@ -611,19 +612,32 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
611 | atomic_add(clone->truesize, &qp->q.net->mem); | 612 | atomic_add(clone->truesize, &qp->q.net->mem); |
612 | } | 613 | } |
613 | 614 | ||
614 | skb_shinfo(head)->frag_list = head->next; | ||
615 | skb_push(head, head->data - skb_network_header(head)); | 615 | skb_push(head, head->data - skb_network_header(head)); |
616 | 616 | ||
617 | for (fp=head->next; fp; fp = fp->next) { | 617 | sum_truesize = head->truesize; |
618 | head->data_len += fp->len; | 618 | for (fp = head->next; fp;) { |
619 | head->len += fp->len; | 619 | bool headstolen; |
620 | int delta; | ||
621 | struct sk_buff *next = fp->next; | ||
622 | |||
623 | sum_truesize += fp->truesize; | ||
620 | if (head->ip_summed != fp->ip_summed) | 624 | if (head->ip_summed != fp->ip_summed) |
621 | head->ip_summed = CHECKSUM_NONE; | 625 | head->ip_summed = CHECKSUM_NONE; |
622 | else if (head->ip_summed == CHECKSUM_COMPLETE) | 626 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
623 | head->csum = csum_add(head->csum, fp->csum); | 627 | head->csum = csum_add(head->csum, fp->csum); |
624 | head->truesize += fp->truesize; | 628 | |
629 | if (skb_try_coalesce(head, fp, &headstolen, &delta)) { | ||
630 | kfree_skb_partial(fp, headstolen); | ||
631 | } else { | ||
632 | if (!skb_shinfo(head)->frag_list) | ||
633 | skb_shinfo(head)->frag_list = fp; | ||
634 | head->data_len += fp->len; | ||
635 | head->len += fp->len; | ||
636 | head->truesize += fp->truesize; | ||
637 | } | ||
638 | fp = next; | ||
625 | } | 639 | } |
626 | atomic_sub(head->truesize, &qp->q.net->mem); | 640 | atomic_sub(sum_truesize, &qp->q.net->mem); |
627 | 641 | ||
628 | head->next = NULL; | 642 | head->next = NULL; |
629 | head->dev = dev; | 643 | head->dev = dev; |
@@ -644,8 +658,7 @@ out_nomem: | |||
644 | err = -ENOMEM; | 658 | err = -ENOMEM; |
645 | goto out_fail; | 659 | goto out_fail; |
646 | out_oversize: | 660 | out_oversize: |
647 | if (net_ratelimit()) | 661 | net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); |
648 | pr_info("Oversized IP packet from %pI4\n", &qp->saddr); | ||
649 | out_fail: | 662 | out_fail: |
650 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 663 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
651 | return err; | 664 | return err; |
@@ -782,7 +795,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) | |||
782 | table[2].data = &net->ipv4.frags.timeout; | 795 | table[2].data = &net->ipv4.frags.timeout; |
783 | } | 796 | } |
784 | 797 | ||
785 | hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); | 798 | hdr = register_net_sysctl(net, "net/ipv4", table); |
786 | if (hdr == NULL) | 799 | if (hdr == NULL) |
787 | goto err_reg; | 800 | goto err_reg; |
788 | 801 | ||
@@ -807,7 +820,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) | |||
807 | 820 | ||
808 | static void ip4_frags_ctl_register(void) | 821 | static void ip4_frags_ctl_register(void) |
809 | { | 822 | { |
810 | register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); | 823 | register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); |
811 | } | 824 | } |
812 | #else | 825 | #else |
813 | static inline int ip4_frags_ns_ctl_register(struct net *net) | 826 | static inline int ip4_frags_ns_ctl_register(struct net *net) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b57532d4742c..f49047b79609 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -169,37 +169,56 @@ struct ipgre_net { | |||
169 | 169 | ||
170 | /* often modified stats are per cpu, other are shared (netdev->stats) */ | 170 | /* often modified stats are per cpu, other are shared (netdev->stats) */ |
171 | struct pcpu_tstats { | 171 | struct pcpu_tstats { |
172 | unsigned long rx_packets; | 172 | u64 rx_packets; |
173 | unsigned long rx_bytes; | 173 | u64 rx_bytes; |
174 | unsigned long tx_packets; | 174 | u64 tx_packets; |
175 | unsigned long tx_bytes; | 175 | u64 tx_bytes; |
176 | } __attribute__((aligned(4*sizeof(unsigned long)))); | 176 | struct u64_stats_sync syncp; |
177 | }; | ||
177 | 178 | ||
178 | static struct net_device_stats *ipgre_get_stats(struct net_device *dev) | 179 | static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, |
180 | struct rtnl_link_stats64 *tot) | ||
179 | { | 181 | { |
180 | struct pcpu_tstats sum = { 0 }; | ||
181 | int i; | 182 | int i; |
182 | 183 | ||
183 | for_each_possible_cpu(i) { | 184 | for_each_possible_cpu(i) { |
184 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | 185 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); |
185 | 186 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | |
186 | sum.rx_packets += tstats->rx_packets; | 187 | unsigned int start; |
187 | sum.rx_bytes += tstats->rx_bytes; | 188 | |
188 | sum.tx_packets += tstats->tx_packets; | 189 | do { |
189 | sum.tx_bytes += tstats->tx_bytes; | 190 | start = u64_stats_fetch_begin_bh(&tstats->syncp); |
191 | rx_packets = tstats->rx_packets; | ||
192 | tx_packets = tstats->tx_packets; | ||
193 | rx_bytes = tstats->rx_bytes; | ||
194 | tx_bytes = tstats->tx_bytes; | ||
195 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
196 | |||
197 | tot->rx_packets += rx_packets; | ||
198 | tot->tx_packets += tx_packets; | ||
199 | tot->rx_bytes += rx_bytes; | ||
200 | tot->tx_bytes += tx_bytes; | ||
190 | } | 201 | } |
191 | dev->stats.rx_packets = sum.rx_packets; | 202 | |
192 | dev->stats.rx_bytes = sum.rx_bytes; | 203 | tot->multicast = dev->stats.multicast; |
193 | dev->stats.tx_packets = sum.tx_packets; | 204 | tot->rx_crc_errors = dev->stats.rx_crc_errors; |
194 | dev->stats.tx_bytes = sum.tx_bytes; | 205 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; |
195 | return &dev->stats; | 206 | tot->rx_length_errors = dev->stats.rx_length_errors; |
207 | tot->rx_errors = dev->stats.rx_errors; | ||
208 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
209 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
210 | tot->tx_dropped = dev->stats.tx_dropped; | ||
211 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
212 | tot->tx_errors = dev->stats.tx_errors; | ||
213 | |||
214 | return tot; | ||
196 | } | 215 | } |
197 | 216 | ||
198 | /* Given src, dst and key, find appropriate for input tunnel. */ | 217 | /* Given src, dst and key, find appropriate for input tunnel. */ |
199 | 218 | ||
200 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, | 219 | static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, |
201 | __be32 remote, __be32 local, | 220 | __be32 remote, __be32 local, |
202 | __be32 key, __be16 gre_proto) | 221 | __be32 key, __be16 gre_proto) |
203 | { | 222 | { |
204 | struct net *net = dev_net(dev); | 223 | struct net *net = dev_net(dev); |
205 | int link = dev->ifindex; | 224 | int link = dev->ifindex; |
@@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
464 | */ | 483 | */ |
465 | 484 | ||
466 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 485 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
467 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 486 | __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); |
468 | int grehlen = (iph->ihl<<2) + 4; | 487 | int grehlen = (iph->ihl<<2) + 4; |
469 | const int type = icmp_hdr(skb)->type; | 488 | const int type = icmp_hdr(skb)->type; |
470 | const int code = icmp_hdr(skb)->code; | 489 | const int code = icmp_hdr(skb)->code; |
@@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
574 | 593 | ||
575 | iph = ip_hdr(skb); | 594 | iph = ip_hdr(skb); |
576 | h = skb->data; | 595 | h = skb->data; |
577 | flags = *(__be16*)h; | 596 | flags = *(__be16 *)h; |
578 | 597 | ||
579 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { | 598 | if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { |
580 | /* - Version must be 0. | 599 | /* - Version must be 0. |
@@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
598 | offset += 4; | 617 | offset += 4; |
599 | } | 618 | } |
600 | if (flags&GRE_KEY) { | 619 | if (flags&GRE_KEY) { |
601 | key = *(__be32*)(h + offset); | 620 | key = *(__be32 *)(h + offset); |
602 | offset += 4; | 621 | offset += 4; |
603 | } | 622 | } |
604 | if (flags&GRE_SEQ) { | 623 | if (flags&GRE_SEQ) { |
605 | seqno = ntohl(*(__be32*)(h + offset)); | 624 | seqno = ntohl(*(__be32 *)(h + offset)); |
606 | offset += 4; | 625 | offset += 4; |
607 | } | 626 | } |
608 | } | 627 | } |
@@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
672 | } | 691 | } |
673 | 692 | ||
674 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 693 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
694 | u64_stats_update_begin(&tstats->syncp); | ||
675 | tstats->rx_packets++; | 695 | tstats->rx_packets++; |
676 | tstats->rx_bytes += skb->len; | 696 | tstats->rx_bytes += skb->len; |
697 | u64_stats_update_end(&tstats->syncp); | ||
677 | 698 | ||
678 | __skb_tunnel_rx(skb, tunnel->dev); | 699 | __skb_tunnel_rx(skb, tunnel->dev); |
679 | 700 | ||
@@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
900 | htons(ETH_P_TEB) : skb->protocol; | 921 | htons(ETH_P_TEB) : skb->protocol; |
901 | 922 | ||
902 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { | 923 | if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { |
903 | __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); | 924 | __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); |
904 | 925 | ||
905 | if (tunnel->parms.o_flags&GRE_SEQ) { | 926 | if (tunnel->parms.o_flags&GRE_SEQ) { |
906 | ++tunnel->o_seqno; | 927 | ++tunnel->o_seqno; |
@@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
913 | } | 934 | } |
914 | if (tunnel->parms.o_flags&GRE_CSUM) { | 935 | if (tunnel->parms.o_flags&GRE_CSUM) { |
915 | *ptr = 0; | 936 | *ptr = 0; |
916 | *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); | 937 | *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr)); |
917 | } | 938 | } |
918 | } | 939 | } |
919 | 940 | ||
@@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, | |||
1169 | { | 1190 | { |
1170 | struct ip_tunnel *t = netdev_priv(dev); | 1191 | struct ip_tunnel *t = netdev_priv(dev); |
1171 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); | 1192 | struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); |
1172 | __be16 *p = (__be16*)(iph+1); | 1193 | __be16 *p = (__be16 *)(iph+1); |
1173 | 1194 | ||
1174 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); | 1195 | memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); |
1175 | p[0] = t->parms.o_flags; | 1196 | p[0] = t->parms.o_flags; |
@@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = { | |||
1253 | .ndo_start_xmit = ipgre_tunnel_xmit, | 1274 | .ndo_start_xmit = ipgre_tunnel_xmit, |
1254 | .ndo_do_ioctl = ipgre_tunnel_ioctl, | 1275 | .ndo_do_ioctl = ipgre_tunnel_ioctl, |
1255 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1276 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1256 | .ndo_get_stats = ipgre_get_stats, | 1277 | .ndo_get_stats64 = ipgre_get_stats64, |
1257 | }; | 1278 | }; |
1258 | 1279 | ||
1259 | static void ipgre_dev_free(struct net_device *dev) | 1280 | static void ipgre_dev_free(struct net_device *dev) |
@@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = { | |||
1507 | .ndo_set_mac_address = eth_mac_addr, | 1528 | .ndo_set_mac_address = eth_mac_addr, |
1508 | .ndo_validate_addr = eth_validate_addr, | 1529 | .ndo_validate_addr = eth_validate_addr, |
1509 | .ndo_change_mtu = ipgre_tunnel_change_mtu, | 1530 | .ndo_change_mtu = ipgre_tunnel_change_mtu, |
1510 | .ndo_get_stats = ipgre_get_stats, | 1531 | .ndo_get_stats64 = ipgre_get_stats64, |
1511 | }; | 1532 | }; |
1512 | 1533 | ||
1513 | static void ipgre_tap_setup(struct net_device *dev) | 1534 | static void ipgre_tap_setup(struct net_device *dev) |
@@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) | |||
1654 | struct ip_tunnel *t = netdev_priv(dev); | 1675 | struct ip_tunnel *t = netdev_priv(dev); |
1655 | struct ip_tunnel_parm *p = &t->parms; | 1676 | struct ip_tunnel_parm *p = &t->parms; |
1656 | 1677 | ||
1657 | NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); | 1678 | if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || |
1658 | NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); | 1679 | nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || |
1659 | NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); | 1680 | nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || |
1660 | NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); | 1681 | nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || |
1661 | NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); | 1682 | nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || |
1662 | NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); | 1683 | nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || |
1663 | NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); | 1684 | nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) || |
1664 | NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); | 1685 | nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || |
1665 | NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); | 1686 | nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || |
1666 | NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); | 1687 | nla_put_u8(skb, IFLA_GRE_PMTUDISC, |
1667 | 1688 | !!(p->iph.frag_off & htons(IP_DF)))) | |
1689 | goto nla_put_failure; | ||
1668 | return 0; | 1690 | return 0; |
1669 | 1691 | ||
1670 | nla_put_failure: | 1692 | nla_put_failure: |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 26eccc5bab1c..8590144ca330 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -210,9 +210,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb) | |||
210 | int ret; | 210 | int ret; |
211 | 211 | ||
212 | if (!net_eq(net, &init_net) && !ipprot->netns_ok) { | 212 | if (!net_eq(net, &init_net) && !ipprot->netns_ok) { |
213 | if (net_ratelimit()) | 213 | net_info_ratelimited("%s: proto %d isn't netns-ready\n", |
214 | printk("%s: proto %d isn't netns-ready\n", | 214 | __func__, protocol); |
215 | __func__, protocol); | ||
216 | kfree_skb(skb); | 215 | kfree_skb(skb); |
217 | goto out; | 216 | goto out; |
218 | } | 217 | } |
@@ -298,10 +297,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb) | |||
298 | 297 | ||
299 | if (in_dev) { | 298 | if (in_dev) { |
300 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | 299 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { |
301 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 300 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
302 | net_ratelimit()) | 301 | net_info_ratelimited("source route option %pI4 -> %pI4\n", |
303 | pr_info("source route option %pI4 -> %pI4\n", | 302 | &iph->saddr, |
304 | &iph->saddr, &iph->daddr); | 303 | &iph->daddr); |
305 | goto drop; | 304 | goto drop; |
306 | } | 305 | } |
307 | } | 306 | } |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index a0d0d9d9b870..708b99494e23 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) | |||
210 | * Simple and stupid 8), but the most efficient way. | 210 | * Simple and stupid 8), but the most efficient way. |
211 | */ | 211 | */ |
212 | 212 | ||
213 | void ip_options_fragment(struct sk_buff * skb) | 213 | void ip_options_fragment(struct sk_buff *skb) |
214 | { | 214 | { |
215 | unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); | 215 | unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); |
216 | struct ip_options * opt = &(IPCB(skb)->opt); | 216 | struct ip_options *opt = &(IPCB(skb)->opt); |
217 | int l = opt->optlen; | 217 | int l = opt->optlen; |
218 | int optlen; | 218 | int optlen; |
219 | 219 | ||
@@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb) | |||
248 | */ | 248 | */ |
249 | 249 | ||
250 | int ip_options_compile(struct net *net, | 250 | int ip_options_compile(struct net *net, |
251 | struct ip_options * opt, struct sk_buff * skb) | 251 | struct ip_options *opt, struct sk_buff *skb) |
252 | { | 252 | { |
253 | int l; | 253 | int l; |
254 | unsigned char * iph; | 254 | unsigned char *iph; |
255 | unsigned char * optptr; | 255 | unsigned char *optptr; |
256 | int optlen; | 256 | int optlen; |
257 | unsigned char * pp_ptr = NULL; | 257 | unsigned char *pp_ptr = NULL; |
258 | struct rtable *rt = NULL; | 258 | struct rtable *rt = NULL; |
259 | 259 | ||
260 | if (skb != NULL) { | 260 | if (skb != NULL) { |
@@ -413,7 +413,7 @@ int ip_options_compile(struct net *net, | |||
413 | opt->is_changed = 1; | 413 | opt->is_changed = 1; |
414 | } | 414 | } |
415 | } else { | 415 | } else { |
416 | unsigned overflow = optptr[3]>>4; | 416 | unsigned int overflow = optptr[3]>>4; |
417 | if (overflow == 15) { | 417 | if (overflow == 15) { |
418 | pp_ptr = optptr + 3; | 418 | pp_ptr = optptr + 3; |
419 | goto error; | 419 | goto error; |
@@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile); | |||
473 | * Undo all the changes done by ip_options_compile(). | 473 | * Undo all the changes done by ip_options_compile(). |
474 | */ | 474 | */ |
475 | 475 | ||
476 | void ip_options_undo(struct ip_options * opt) | 476 | void ip_options_undo(struct ip_options *opt) |
477 | { | 477 | { |
478 | if (opt->srr) { | 478 | if (opt->srr) { |
479 | unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr); | 479 | unsigned char *optptr = opt->__data+opt->srr-sizeof(struct iphdr); |
480 | memmove(optptr+7, optptr+3, optptr[1]-7); | 480 | memmove(optptr+7, optptr+3, optptr[1]-7); |
481 | memcpy(optptr+3, &opt->faddr, 4); | 481 | memcpy(optptr+3, &opt->faddr, 4); |
482 | } | 482 | } |
483 | if (opt->rr_needaddr) { | 483 | if (opt->rr_needaddr) { |
484 | unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr); | 484 | unsigned char *optptr = opt->__data+opt->rr-sizeof(struct iphdr); |
485 | optptr[2] -= 4; | 485 | optptr[2] -= 4; |
486 | memset(&optptr[optptr[2]-1], 0, 4); | 486 | memset(&optptr[optptr[2]-1], 0, 4); |
487 | } | 487 | } |
488 | if (opt->ts) { | 488 | if (opt->ts) { |
489 | unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr); | 489 | unsigned char *optptr = opt->__data+opt->ts-sizeof(struct iphdr); |
490 | if (opt->ts_needtime) { | 490 | if (opt->ts_needtime) { |
491 | optptr[2] -= 4; | 491 | optptr[2] -= 4; |
492 | memset(&optptr[optptr[2]-1], 0, 4); | 492 | memset(&optptr[optptr[2]-1], 0, 4); |
@@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp, | |||
549 | 549 | ||
550 | void ip_forward_options(struct sk_buff *skb) | 550 | void ip_forward_options(struct sk_buff *skb) |
551 | { | 551 | { |
552 | struct ip_options * opt = &(IPCB(skb)->opt); | 552 | struct ip_options *opt = &(IPCB(skb)->opt); |
553 | unsigned char * optptr; | 553 | unsigned char *optptr; |
554 | struct rtable *rt = skb_rtable(skb); | 554 | struct rtable *rt = skb_rtable(skb); |
555 | unsigned char *raw = skb_network_header(skb); | 555 | unsigned char *raw = skb_network_header(skb); |
556 | 556 | ||
@@ -578,8 +578,10 @@ void ip_forward_options(struct sk_buff *skb) | |||
578 | ip_hdr(skb)->daddr = opt->nexthop; | 578 | ip_hdr(skb)->daddr = opt->nexthop; |
579 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | 579 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); |
580 | optptr[2] = srrptr+4; | 580 | optptr[2] = srrptr+4; |
581 | } else if (net_ratelimit()) | 581 | } else { |
582 | pr_crit("%s(): Argh! Destination lost!\n", __func__); | 582 | net_crit_ratelimited("%s(): Argh! Destination lost!\n", |
583 | __func__); | ||
584 | } | ||
583 | if (opt->ts_needaddr) { | 585 | if (opt->ts_needaddr) { |
584 | optptr = raw + opt->ts; | 586 | optptr = raw + opt->ts; |
585 | ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); | 587 | ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4910176d24ed..451f97c42eb4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -214,8 +214,8 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
214 | } | 214 | } |
215 | rcu_read_unlock(); | 215 | rcu_read_unlock(); |
216 | 216 | ||
217 | if (net_ratelimit()) | 217 | net_dbg_ratelimited("%s: No header cache and no neighbour!\n", |
218 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); | 218 | __func__); |
219 | kfree_skb(skb); | 219 | kfree_skb(skb); |
220 | return -EINVAL; | 220 | return -EINVAL; |
221 | } | 221 | } |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2fd0fba77124..0d11f234d615 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) | |||
90 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) | 90 | static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) |
91 | { | 91 | { |
92 | unsigned char optbuf[sizeof(struct ip_options) + 40]; | 92 | unsigned char optbuf[sizeof(struct ip_options) + 40]; |
93 | struct ip_options * opt = (struct ip_options *)optbuf; | 93 | struct ip_options *opt = (struct ip_options *)optbuf; |
94 | 94 | ||
95 | if (IPCB(skb)->opt.optlen == 0) | 95 | if (IPCB(skb)->opt.optlen == 0) |
96 | return; | 96 | return; |
@@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) | |||
147 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) | 147 | void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) |
148 | { | 148 | { |
149 | struct inet_sock *inet = inet_sk(skb->sk); | 149 | struct inet_sock *inet = inet_sk(skb->sk); |
150 | unsigned flags = inet->cmsg_flags; | 150 | unsigned int flags = inet->cmsg_flags; |
151 | 151 | ||
152 | /* Ordered by supposed usage frequency */ | 152 | /* Ordered by supposed usage frequency */ |
153 | if (flags & 1) | 153 | if (flags & 1) |
@@ -673,10 +673,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
673 | break; | 673 | break; |
674 | } else { | 674 | } else { |
675 | memset(&mreq, 0, sizeof(mreq)); | 675 | memset(&mreq, 0, sizeof(mreq)); |
676 | if (optlen >= sizeof(struct in_addr) && | 676 | if (optlen >= sizeof(struct ip_mreq)) { |
677 | copy_from_user(&mreq.imr_address, optval, | 677 | if (copy_from_user(&mreq, optval, |
678 | sizeof(struct in_addr))) | 678 | sizeof(struct ip_mreq))) |
679 | break; | 679 | break; |
680 | } else if (optlen >= sizeof(struct in_addr)) { | ||
681 | if (copy_from_user(&mreq.imr_address, optval, | ||
682 | sizeof(struct in_addr))) | ||
683 | break; | ||
684 | } | ||
680 | } | 685 | } |
681 | 686 | ||
682 | if (!mreq.imr_ifindex) { | 687 | if (!mreq.imr_ifindex) { |
@@ -1094,7 +1099,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); | |||
1094 | */ | 1099 | */ |
1095 | 1100 | ||
1096 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, | 1101 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, |
1097 | char __user *optval, int __user *optlen, unsigned flags) | 1102 | char __user *optval, int __user *optlen, unsigned int flags) |
1098 | { | 1103 | { |
1099 | struct inet_sock *inet = inet_sk(sk); | 1104 | struct inet_sock *inet = inet_sk(sk); |
1100 | int val; | 1105 | int val; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 92ac7e7363a0..67e8a6b086ea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -808,8 +808,6 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d | |||
808 | b->op = BOOTP_REQUEST; | 808 | b->op = BOOTP_REQUEST; |
809 | if (dev->type < 256) /* check for false types */ | 809 | if (dev->type < 256) /* check for false types */ |
810 | b->htype = dev->type; | 810 | b->htype = dev->type; |
811 | else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */ | ||
812 | b->htype = ARPHRD_IEEE802; | ||
813 | else if (dev->type == ARPHRD_FDDI) | 811 | else if (dev->type == ARPHRD_FDDI) |
814 | b->htype = ARPHRD_ETHER; | 812 | b->htype = ARPHRD_ETHER; |
815 | else { | 813 | else { |
@@ -955,8 +953,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
955 | 953 | ||
956 | /* Fragments are not supported */ | 954 | /* Fragments are not supported */ |
957 | if (ip_is_fragment(h)) { | 955 | if (ip_is_fragment(h)) { |
958 | if (net_ratelimit()) | 956 | net_err_ratelimited("DHCP/BOOTP: Ignoring fragmented reply\n"); |
959 | pr_err("DHCP/BOOTP: Ignoring fragmented reply\n"); | ||
960 | goto drop; | 957 | goto drop; |
961 | } | 958 | } |
962 | 959 | ||
@@ -1004,16 +1001,14 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
1004 | /* Is it a reply to our BOOTP request? */ | 1001 | /* Is it a reply to our BOOTP request? */ |
1005 | if (b->op != BOOTP_REPLY || | 1002 | if (b->op != BOOTP_REPLY || |
1006 | b->xid != d->xid) { | 1003 | b->xid != d->xid) { |
1007 | if (net_ratelimit()) | 1004 | net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", |
1008 | pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", | 1005 | b->op, b->xid); |
1009 | b->op, b->xid); | ||
1010 | goto drop_unlock; | 1006 | goto drop_unlock; |
1011 | } | 1007 | } |
1012 | 1008 | ||
1013 | /* Is it a reply for the device we are configuring? */ | 1009 | /* Is it a reply for the device we are configuring? */ |
1014 | if (b->xid != ic_dev_xid) { | 1010 | if (b->xid != ic_dev_xid) { |
1015 | if (net_ratelimit()) | 1011 | net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); |
1016 | pr_err("DHCP/BOOTP: Ignoring delayed packet\n"); | ||
1017 | goto drop_unlock; | 1012 | goto drop_unlock; |
1018 | } | 1013 | } |
1019 | 1014 | ||
@@ -1198,7 +1193,7 @@ static int __init ic_dynamic(void) | |||
1198 | d = ic_first_dev; | 1193 | d = ic_first_dev; |
1199 | retries = CONF_SEND_RETRIES; | 1194 | retries = CONF_SEND_RETRIES; |
1200 | get_random_bytes(&timeout, sizeof(timeout)); | 1195 | get_random_bytes(&timeout, sizeof(timeout)); |
1201 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); | 1196 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); |
1202 | for (;;) { | 1197 | for (;;) { |
1203 | /* Track the device we are configuring */ | 1198 | /* Track the device we are configuring */ |
1204 | ic_dev_xid = d->xid; | 1199 | ic_dev_xid = d->xid; |
@@ -1626,11 +1621,13 @@ static int __init ip_auto_config_setup(char *addrs) | |||
1626 | 1621 | ||
1627 | return 1; | 1622 | return 1; |
1628 | } | 1623 | } |
1624 | __setup("ip=", ip_auto_config_setup); | ||
1629 | 1625 | ||
1630 | static int __init nfsaddrs_config_setup(char *addrs) | 1626 | static int __init nfsaddrs_config_setup(char *addrs) |
1631 | { | 1627 | { |
1632 | return ip_auto_config_setup(addrs); | 1628 | return ip_auto_config_setup(addrs); |
1633 | } | 1629 | } |
1630 | __setup("nfsaddrs=", nfsaddrs_config_setup); | ||
1634 | 1631 | ||
1635 | static int __init vendor_class_identifier_setup(char *addrs) | 1632 | static int __init vendor_class_identifier_setup(char *addrs) |
1636 | { | 1633 | { |
@@ -1641,7 +1638,4 @@ static int __init vendor_class_identifier_setup(char *addrs) | |||
1641 | vendor_class_identifier); | 1638 | vendor_class_identifier); |
1642 | return 1; | 1639 | return 1; |
1643 | } | 1640 | } |
1644 | |||
1645 | __setup("ip=", ip_auto_config_setup); | ||
1646 | __setup("nfsaddrs=", nfsaddrs_config_setup); | ||
1647 | __setup("dhcpclass=", vendor_class_identifier_setup); | 1641 | __setup("dhcpclass=", vendor_class_identifier_setup); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ae1413e3f2f8..2d0f99bf61b3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev); | |||
144 | 144 | ||
145 | /* often modified stats are per cpu, other are shared (netdev->stats) */ | 145 | /* often modified stats are per cpu, other are shared (netdev->stats) */ |
146 | struct pcpu_tstats { | 146 | struct pcpu_tstats { |
147 | unsigned long rx_packets; | 147 | u64 rx_packets; |
148 | unsigned long rx_bytes; | 148 | u64 rx_bytes; |
149 | unsigned long tx_packets; | 149 | u64 tx_packets; |
150 | unsigned long tx_bytes; | 150 | u64 tx_bytes; |
151 | } __attribute__((aligned(4*sizeof(unsigned long)))); | 151 | struct u64_stats_sync syncp; |
152 | }; | ||
152 | 153 | ||
153 | static struct net_device_stats *ipip_get_stats(struct net_device *dev) | 154 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, |
155 | struct rtnl_link_stats64 *tot) | ||
154 | { | 156 | { |
155 | struct pcpu_tstats sum = { 0 }; | ||
156 | int i; | 157 | int i; |
157 | 158 | ||
158 | for_each_possible_cpu(i) { | 159 | for_each_possible_cpu(i) { |
159 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | 160 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); |
160 | 161 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | |
161 | sum.rx_packets += tstats->rx_packets; | 162 | unsigned int start; |
162 | sum.rx_bytes += tstats->rx_bytes; | 163 | |
163 | sum.tx_packets += tstats->tx_packets; | 164 | do { |
164 | sum.tx_bytes += tstats->tx_bytes; | 165 | start = u64_stats_fetch_begin_bh(&tstats->syncp); |
166 | rx_packets = tstats->rx_packets; | ||
167 | tx_packets = tstats->tx_packets; | ||
168 | rx_bytes = tstats->rx_bytes; | ||
169 | tx_bytes = tstats->tx_bytes; | ||
170 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
171 | |||
172 | tot->rx_packets += rx_packets; | ||
173 | tot->tx_packets += tx_packets; | ||
174 | tot->rx_bytes += rx_bytes; | ||
175 | tot->tx_bytes += tx_bytes; | ||
165 | } | 176 | } |
166 | dev->stats.rx_packets = sum.rx_packets; | 177 | |
167 | dev->stats.rx_bytes = sum.rx_bytes; | 178 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; |
168 | dev->stats.tx_packets = sum.tx_packets; | 179 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; |
169 | dev->stats.tx_bytes = sum.tx_bytes; | 180 | tot->tx_dropped = dev->stats.tx_dropped; |
170 | return &dev->stats; | 181 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; |
182 | tot->tx_errors = dev->stats.tx_errors; | ||
183 | tot->collisions = dev->stats.collisions; | ||
184 | |||
185 | return tot; | ||
171 | } | 186 | } |
172 | 187 | ||
173 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, | 188 | static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, |
174 | __be32 remote, __be32 local) | 189 | __be32 remote, __be32 local) |
175 | { | 190 | { |
176 | unsigned int h0 = HASH(remote); | 191 | unsigned int h0 = HASH(remote); |
@@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | |||
245 | rcu_assign_pointer(*tp, t); | 260 | rcu_assign_pointer(*tp, t); |
246 | } | 261 | } |
247 | 262 | ||
248 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | 263 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, |
249 | struct ip_tunnel_parm *parms, int create) | 264 | struct ip_tunnel_parm *parms, int create) |
250 | { | 265 | { |
251 | __be32 remote = parms->iph.daddr; | 266 | __be32 remote = parms->iph.daddr; |
@@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb) | |||
404 | skb->pkt_type = PACKET_HOST; | 419 | skb->pkt_type = PACKET_HOST; |
405 | 420 | ||
406 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 421 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
422 | u64_stats_update_begin(&tstats->syncp); | ||
407 | tstats->rx_packets++; | 423 | tstats->rx_packets++; |
408 | tstats->rx_bytes += skb->len; | 424 | tstats->rx_bytes += skb->len; |
425 | u64_stats_update_end(&tstats->syncp); | ||
409 | 426 | ||
410 | __skb_tunnel_rx(skb, tunnel->dev); | 427 | __skb_tunnel_rx(skb, tunnel->dev); |
411 | 428 | ||
@@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = { | |||
730 | .ndo_start_xmit = ipip_tunnel_xmit, | 747 | .ndo_start_xmit = ipip_tunnel_xmit, |
731 | .ndo_do_ioctl = ipip_tunnel_ioctl, | 748 | .ndo_do_ioctl = ipip_tunnel_ioctl, |
732 | .ndo_change_mtu = ipip_tunnel_change_mtu, | 749 | .ndo_change_mtu = ipip_tunnel_change_mtu, |
733 | .ndo_get_stats = ipip_get_stats, | 750 | .ndo_get_stats64 = ipip_get_stats64, |
734 | }; | 751 | }; |
735 | 752 | ||
736 | static void ipip_dev_free(struct net_device *dev) | 753 | static void ipip_dev_free(struct net_device *dev) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 960fbfc3e976..a9e519ad6db5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -949,8 +949,7 @@ static int ipmr_cache_report(struct mr_table *mrt, | |||
949 | ret = sock_queue_rcv_skb(mroute_sk, skb); | 949 | ret = sock_queue_rcv_skb(mroute_sk, skb); |
950 | rcu_read_unlock(); | 950 | rcu_read_unlock(); |
951 | if (ret < 0) { | 951 | if (ret < 0) { |
952 | if (net_ratelimit()) | 952 | net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); |
953 | pr_warn("mroute: pending queue full, dropping entries\n"); | ||
954 | kfree_skb(skb); | 953 | kfree_skb(skb); |
955 | } | 954 | } |
956 | 955 | ||
@@ -2119,15 +2118,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | |||
2119 | rtm->rtm_src_len = 32; | 2118 | rtm->rtm_src_len = 32; |
2120 | rtm->rtm_tos = 0; | 2119 | rtm->rtm_tos = 0; |
2121 | rtm->rtm_table = mrt->id; | 2120 | rtm->rtm_table = mrt->id; |
2122 | NLA_PUT_U32(skb, RTA_TABLE, mrt->id); | 2121 | if (nla_put_u32(skb, RTA_TABLE, mrt->id)) |
2122 | goto nla_put_failure; | ||
2123 | rtm->rtm_type = RTN_MULTICAST; | 2123 | rtm->rtm_type = RTN_MULTICAST; |
2124 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | 2124 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; |
2125 | rtm->rtm_protocol = RTPROT_UNSPEC; | 2125 | rtm->rtm_protocol = RTPROT_UNSPEC; |
2126 | rtm->rtm_flags = 0; | 2126 | rtm->rtm_flags = 0; |
2127 | 2127 | ||
2128 | NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); | 2128 | if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || |
2129 | NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); | 2129 | nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp)) |
2130 | 2130 | goto nla_put_failure; | |
2131 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) | 2131 | if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) |
2132 | goto nla_put_failure; | 2132 | goto nla_put_failure; |
2133 | 2133 | ||
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4f47e064e262..ed1b36783192 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <net/netfilter/nf_queue.h> | 12 | #include <net/netfilter/nf_queue.h> |
13 | 13 | ||
14 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | 14 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
15 | int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | 15 | int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) |
16 | { | 16 | { |
17 | struct net *net = dev_net(skb_dst(skb)->dev); | 17 | struct net *net = dev_net(skb_dst(skb)->dev); |
18 | const struct iphdr *iph = ip_hdr(skb); | 18 | const struct iphdr *iph = ip_hdr(skb); |
@@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void) | |||
237 | 237 | ||
238 | module_init(ipv4_netfilter_init); | 238 | module_init(ipv4_netfilter_init); |
239 | module_exit(ipv4_netfilter_fini); | 239 | module_exit(ipv4_netfilter_fini); |
240 | |||
241 | #ifdef CONFIG_SYSCTL | ||
242 | struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { | ||
243 | { .procname = "net", }, | ||
244 | { .procname = "ipv4", }, | ||
245 | { .procname = "netfilter", }, | ||
246 | { } | ||
247 | }; | ||
248 | EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); | ||
249 | #endif /* CONFIG_SYSCTL */ | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 240b68469a7a..c20674dc9452 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
66 | 66 | ||
67 | # just filtering instance of ARP tables for now | 67 | # just filtering instance of ARP tables for now |
68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o |
69 | |||
70 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | ||
71 | |||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fd7a3f68917f..97e61eadf580 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -221,9 +221,8 @@ static inline int arp_checkentry(const struct arpt_arp *arp) | |||
221 | static unsigned int | 221 | static unsigned int |
222 | arpt_error(struct sk_buff *skb, const struct xt_action_param *par) | 222 | arpt_error(struct sk_buff *skb, const struct xt_action_param *par) |
223 | { | 223 | { |
224 | if (net_ratelimit()) | 224 | net_err_ratelimited("arp_tables: error: '%s'\n", |
225 | pr_err("arp_tables: error: '%s'\n", | 225 | (const char *)par->targinfo); |
226 | (const char *)par->targinfo); | ||
227 | 226 | ||
228 | return NF_DROP; | 227 | return NF_DROP; |
229 | } | 228 | } |
@@ -303,7 +302,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
303 | if (v < 0) { | 302 | if (v < 0) { |
304 | /* Pop from stack? */ | 303 | /* Pop from stack? */ |
305 | if (v != XT_RETURN) { | 304 | if (v != XT_RETURN) { |
306 | verdict = (unsigned)(-v) - 1; | 305 | verdict = (unsigned int)(-v) - 1; |
307 | break; | 306 | break; |
308 | } | 307 | } |
309 | e = back; | 308 | e = back; |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c deleted file mode 100644 index 94d45e1f8882..000000000000 --- a/net/ipv4/netfilter/ip_queue.c +++ /dev/null | |||
@@ -1,639 +0,0 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing IPv4 packets and | ||
3 | * communicating with userspace via netlink. | ||
4 | * | ||
5 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
6 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/notifier.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/netfilter_ipv4/ip_queue.h> | ||
20 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
21 | #include <linux/netlink.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/proc_fs.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | #include <linux/security.h> | ||
27 | #include <linux/net.h> | ||
28 | #include <linux/mutex.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <net/net_namespace.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/route.h> | ||
33 | #include <net/netfilter/nf_queue.h> | ||
34 | #include <net/ip.h> | ||
35 | |||
36 | #define IPQ_QMAX_DEFAULT 1024 | ||
37 | #define IPQ_PROC_FS_NAME "ip_queue" | ||
38 | #define NET_IPQ_QMAX 2088 | ||
39 | #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" | ||
40 | |||
41 | typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
42 | |||
43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | ||
44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | ||
45 | static DEFINE_SPINLOCK(queue_lock); | ||
46 | static int peer_pid __read_mostly; | ||
47 | static unsigned int copy_range __read_mostly; | ||
48 | static unsigned int queue_total; | ||
49 | static unsigned int queue_dropped = 0; | ||
50 | static unsigned int queue_user_dropped = 0; | ||
51 | static struct sock *ipqnl __read_mostly; | ||
52 | static LIST_HEAD(queue_list); | ||
53 | static DEFINE_MUTEX(ipqnl_mutex); | ||
54 | |||
55 | static inline void | ||
56 | __ipq_enqueue_entry(struct nf_queue_entry *entry) | ||
57 | { | ||
58 | list_add_tail(&entry->list, &queue_list); | ||
59 | queue_total++; | ||
60 | } | ||
61 | |||
62 | static inline int | ||
63 | __ipq_set_mode(unsigned char mode, unsigned int range) | ||
64 | { | ||
65 | int status = 0; | ||
66 | |||
67 | switch(mode) { | ||
68 | case IPQ_COPY_NONE: | ||
69 | case IPQ_COPY_META: | ||
70 | copy_mode = mode; | ||
71 | copy_range = 0; | ||
72 | break; | ||
73 | |||
74 | case IPQ_COPY_PACKET: | ||
75 | if (range > 0xFFFF) | ||
76 | range = 0xFFFF; | ||
77 | copy_range = range; | ||
78 | copy_mode = mode; | ||
79 | break; | ||
80 | |||
81 | default: | ||
82 | status = -EINVAL; | ||
83 | |||
84 | } | ||
85 | return status; | ||
86 | } | ||
87 | |||
88 | static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); | ||
89 | |||
90 | static inline void | ||
91 | __ipq_reset(void) | ||
92 | { | ||
93 | peer_pid = 0; | ||
94 | net_disable_timestamp(); | ||
95 | __ipq_set_mode(IPQ_COPY_NONE, 0); | ||
96 | __ipq_flush(NULL, 0); | ||
97 | } | ||
98 | |||
99 | static struct nf_queue_entry * | ||
100 | ipq_find_dequeue_entry(unsigned long id) | ||
101 | { | ||
102 | struct nf_queue_entry *entry = NULL, *i; | ||
103 | |||
104 | spin_lock_bh(&queue_lock); | ||
105 | |||
106 | list_for_each_entry(i, &queue_list, list) { | ||
107 | if ((unsigned long)i == id) { | ||
108 | entry = i; | ||
109 | break; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (entry) { | ||
114 | list_del(&entry->list); | ||
115 | queue_total--; | ||
116 | } | ||
117 | |||
118 | spin_unlock_bh(&queue_lock); | ||
119 | return entry; | ||
120 | } | ||
121 | |||
122 | static void | ||
123 | __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
124 | { | ||
125 | struct nf_queue_entry *entry, *next; | ||
126 | |||
127 | list_for_each_entry_safe(entry, next, &queue_list, list) { | ||
128 | if (!cmpfn || cmpfn(entry, data)) { | ||
129 | list_del(&entry->list); | ||
130 | queue_total--; | ||
131 | nf_reinject(entry, NF_DROP); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static void | ||
137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
138 | { | ||
139 | spin_lock_bh(&queue_lock); | ||
140 | __ipq_flush(cmpfn, data); | ||
141 | spin_unlock_bh(&queue_lock); | ||
142 | } | ||
143 | |||
144 | static struct sk_buff * | ||
145 | ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | ||
146 | { | ||
147 | sk_buff_data_t old_tail; | ||
148 | size_t size = 0; | ||
149 | size_t data_len = 0; | ||
150 | struct sk_buff *skb; | ||
151 | struct ipq_packet_msg *pmsg; | ||
152 | struct nlmsghdr *nlh; | ||
153 | struct timeval tv; | ||
154 | |||
155 | switch (ACCESS_ONCE(copy_mode)) { | ||
156 | case IPQ_COPY_META: | ||
157 | case IPQ_COPY_NONE: | ||
158 | size = NLMSG_SPACE(sizeof(*pmsg)); | ||
159 | break; | ||
160 | |||
161 | case IPQ_COPY_PACKET: | ||
162 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && | ||
163 | (*errp = skb_checksum_help(entry->skb))) | ||
164 | return NULL; | ||
165 | |||
166 | data_len = ACCESS_ONCE(copy_range); | ||
167 | if (data_len == 0 || data_len > entry->skb->len) | ||
168 | data_len = entry->skb->len; | ||
169 | |||
170 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | ||
171 | break; | ||
172 | |||
173 | default: | ||
174 | *errp = -EINVAL; | ||
175 | return NULL; | ||
176 | } | ||
177 | |||
178 | skb = alloc_skb(size, GFP_ATOMIC); | ||
179 | if (!skb) | ||
180 | goto nlmsg_failure; | ||
181 | |||
182 | old_tail = skb->tail; | ||
183 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); | ||
184 | pmsg = NLMSG_DATA(nlh); | ||
185 | memset(pmsg, 0, sizeof(*pmsg)); | ||
186 | |||
187 | pmsg->packet_id = (unsigned long )entry; | ||
188 | pmsg->data_len = data_len; | ||
189 | tv = ktime_to_timeval(entry->skb->tstamp); | ||
190 | pmsg->timestamp_sec = tv.tv_sec; | ||
191 | pmsg->timestamp_usec = tv.tv_usec; | ||
192 | pmsg->mark = entry->skb->mark; | ||
193 | pmsg->hook = entry->hook; | ||
194 | pmsg->hw_protocol = entry->skb->protocol; | ||
195 | |||
196 | if (entry->indev) | ||
197 | strcpy(pmsg->indev_name, entry->indev->name); | ||
198 | else | ||
199 | pmsg->indev_name[0] = '\0'; | ||
200 | |||
201 | if (entry->outdev) | ||
202 | strcpy(pmsg->outdev_name, entry->outdev->name); | ||
203 | else | ||
204 | pmsg->outdev_name[0] = '\0'; | ||
205 | |||
206 | if (entry->indev && entry->skb->dev && | ||
207 | entry->skb->mac_header != entry->skb->network_header) { | ||
208 | pmsg->hw_type = entry->skb->dev->type; | ||
209 | pmsg->hw_addrlen = dev_parse_header(entry->skb, | ||
210 | pmsg->hw_addr); | ||
211 | } | ||
212 | |||
213 | if (data_len) | ||
214 | if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) | ||
215 | BUG(); | ||
216 | |||
217 | nlh->nlmsg_len = skb->tail - old_tail; | ||
218 | return skb; | ||
219 | |||
220 | nlmsg_failure: | ||
221 | kfree_skb(skb); | ||
222 | *errp = -EINVAL; | ||
223 | printk(KERN_ERR "ip_queue: error creating packet message\n"); | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | static int | ||
228 | ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
229 | { | ||
230 | int status = -EINVAL; | ||
231 | struct sk_buff *nskb; | ||
232 | |||
233 | if (copy_mode == IPQ_COPY_NONE) | ||
234 | return -EAGAIN; | ||
235 | |||
236 | nskb = ipq_build_packet_message(entry, &status); | ||
237 | if (nskb == NULL) | ||
238 | return status; | ||
239 | |||
240 | spin_lock_bh(&queue_lock); | ||
241 | |||
242 | if (!peer_pid) | ||
243 | goto err_out_free_nskb; | ||
244 | |||
245 | if (queue_total >= queue_maxlen) { | ||
246 | queue_dropped++; | ||
247 | status = -ENOSPC; | ||
248 | if (net_ratelimit()) | ||
249 | printk (KERN_WARNING "ip_queue: full at %d entries, " | ||
250 | "dropping packets(s). Dropped: %d\n", queue_total, | ||
251 | queue_dropped); | ||
252 | goto err_out_free_nskb; | ||
253 | } | ||
254 | |||
255 | /* netlink_unicast will either free the nskb or attach it to a socket */ | ||
256 | status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); | ||
257 | if (status < 0) { | ||
258 | queue_user_dropped++; | ||
259 | goto err_out_unlock; | ||
260 | } | ||
261 | |||
262 | __ipq_enqueue_entry(entry); | ||
263 | |||
264 | spin_unlock_bh(&queue_lock); | ||
265 | return status; | ||
266 | |||
267 | err_out_free_nskb: | ||
268 | kfree_skb(nskb); | ||
269 | |||
270 | err_out_unlock: | ||
271 | spin_unlock_bh(&queue_lock); | ||
272 | return status; | ||
273 | } | ||
274 | |||
275 | static int | ||
276 | ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) | ||
277 | { | ||
278 | int diff; | ||
279 | struct iphdr *user_iph = (struct iphdr *)v->payload; | ||
280 | struct sk_buff *nskb; | ||
281 | |||
282 | if (v->data_len < sizeof(*user_iph)) | ||
283 | return 0; | ||
284 | diff = v->data_len - e->skb->len; | ||
285 | if (diff < 0) { | ||
286 | if (pskb_trim(e->skb, v->data_len)) | ||
287 | return -ENOMEM; | ||
288 | } else if (diff > 0) { | ||
289 | if (v->data_len > 0xFFFF) | ||
290 | return -EINVAL; | ||
291 | if (diff > skb_tailroom(e->skb)) { | ||
292 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
293 | diff, GFP_ATOMIC); | ||
294 | if (!nskb) { | ||
295 | printk(KERN_WARNING "ip_queue: error " | ||
296 | "in mangle, dropping packet\n"); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | kfree_skb(e->skb); | ||
300 | e->skb = nskb; | ||
301 | } | ||
302 | skb_put(e->skb, diff); | ||
303 | } | ||
304 | if (!skb_make_writable(e->skb, v->data_len)) | ||
305 | return -ENOMEM; | ||
306 | skb_copy_to_linear_data(e->skb, v->payload, v->data_len); | ||
307 | e->skb->ip_summed = CHECKSUM_NONE; | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int | ||
313 | ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) | ||
314 | { | ||
315 | struct nf_queue_entry *entry; | ||
316 | |||
317 | if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) | ||
318 | return -EINVAL; | ||
319 | |||
320 | entry = ipq_find_dequeue_entry(vmsg->id); | ||
321 | if (entry == NULL) | ||
322 | return -ENOENT; | ||
323 | else { | ||
324 | int verdict = vmsg->value; | ||
325 | |||
326 | if (vmsg->data_len && vmsg->data_len == len) | ||
327 | if (ipq_mangle_ipv4(vmsg, entry) < 0) | ||
328 | verdict = NF_DROP; | ||
329 | |||
330 | nf_reinject(entry, verdict); | ||
331 | return 0; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static int | ||
336 | ipq_set_mode(unsigned char mode, unsigned int range) | ||
337 | { | ||
338 | int status; | ||
339 | |||
340 | spin_lock_bh(&queue_lock); | ||
341 | status = __ipq_set_mode(mode, range); | ||
342 | spin_unlock_bh(&queue_lock); | ||
343 | return status; | ||
344 | } | ||
345 | |||
346 | static int | ||
347 | ipq_receive_peer(struct ipq_peer_msg *pmsg, | ||
348 | unsigned char type, unsigned int len) | ||
349 | { | ||
350 | int status = 0; | ||
351 | |||
352 | if (len < sizeof(*pmsg)) | ||
353 | return -EINVAL; | ||
354 | |||
355 | switch (type) { | ||
356 | case IPQM_MODE: | ||
357 | status = ipq_set_mode(pmsg->msg.mode.value, | ||
358 | pmsg->msg.mode.range); | ||
359 | break; | ||
360 | |||
361 | case IPQM_VERDICT: | ||
362 | status = ipq_set_verdict(&pmsg->msg.verdict, | ||
363 | len - sizeof(*pmsg)); | ||
364 | break; | ||
365 | default: | ||
366 | status = -EINVAL; | ||
367 | } | ||
368 | return status; | ||
369 | } | ||
370 | |||
371 | static int | ||
372 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
373 | { | ||
374 | if (entry->indev) | ||
375 | if (entry->indev->ifindex == ifindex) | ||
376 | return 1; | ||
377 | if (entry->outdev) | ||
378 | if (entry->outdev->ifindex == ifindex) | ||
379 | return 1; | ||
380 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
381 | if (entry->skb->nf_bridge) { | ||
382 | if (entry->skb->nf_bridge->physindev && | ||
383 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
384 | return 1; | ||
385 | if (entry->skb->nf_bridge->physoutdev && | ||
386 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
387 | return 1; | ||
388 | } | ||
389 | #endif | ||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | ipq_dev_drop(int ifindex) | ||
395 | { | ||
396 | ipq_flush(dev_cmp, ifindex); | ||
397 | } | ||
398 | |||
399 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
400 | |||
401 | static inline void | ||
402 | __ipq_rcv_skb(struct sk_buff *skb) | ||
403 | { | ||
404 | int status, type, pid, flags; | ||
405 | unsigned int nlmsglen, skblen; | ||
406 | struct nlmsghdr *nlh; | ||
407 | bool enable_timestamp = false; | ||
408 | |||
409 | skblen = skb->len; | ||
410 | if (skblen < sizeof(*nlh)) | ||
411 | return; | ||
412 | |||
413 | nlh = nlmsg_hdr(skb); | ||
414 | nlmsglen = nlh->nlmsg_len; | ||
415 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) | ||
416 | return; | ||
417 | |||
418 | pid = nlh->nlmsg_pid; | ||
419 | flags = nlh->nlmsg_flags; | ||
420 | |||
421 | if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) | ||
422 | RCV_SKB_FAIL(-EINVAL); | ||
423 | |||
424 | if (flags & MSG_TRUNC) | ||
425 | RCV_SKB_FAIL(-ECOMM); | ||
426 | |||
427 | type = nlh->nlmsg_type; | ||
428 | if (type < NLMSG_NOOP || type >= IPQM_MAX) | ||
429 | RCV_SKB_FAIL(-EINVAL); | ||
430 | |||
431 | if (type <= IPQM_BASE) | ||
432 | return; | ||
433 | |||
434 | if (!capable(CAP_NET_ADMIN)) | ||
435 | RCV_SKB_FAIL(-EPERM); | ||
436 | |||
437 | spin_lock_bh(&queue_lock); | ||
438 | |||
439 | if (peer_pid) { | ||
440 | if (peer_pid != pid) { | ||
441 | spin_unlock_bh(&queue_lock); | ||
442 | RCV_SKB_FAIL(-EBUSY); | ||
443 | } | ||
444 | } else { | ||
445 | enable_timestamp = true; | ||
446 | peer_pid = pid; | ||
447 | } | ||
448 | |||
449 | spin_unlock_bh(&queue_lock); | ||
450 | if (enable_timestamp) | ||
451 | net_enable_timestamp(); | ||
452 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | ||
453 | nlmsglen - NLMSG_LENGTH(0)); | ||
454 | if (status < 0) | ||
455 | RCV_SKB_FAIL(status); | ||
456 | |||
457 | if (flags & NLM_F_ACK) | ||
458 | netlink_ack(skb, nlh, 0); | ||
459 | } | ||
460 | |||
461 | static void | ||
462 | ipq_rcv_skb(struct sk_buff *skb) | ||
463 | { | ||
464 | mutex_lock(&ipqnl_mutex); | ||
465 | __ipq_rcv_skb(skb); | ||
466 | mutex_unlock(&ipqnl_mutex); | ||
467 | } | ||
468 | |||
469 | static int | ||
470 | ipq_rcv_dev_event(struct notifier_block *this, | ||
471 | unsigned long event, void *ptr) | ||
472 | { | ||
473 | struct net_device *dev = ptr; | ||
474 | |||
475 | if (!net_eq(dev_net(dev), &init_net)) | ||
476 | return NOTIFY_DONE; | ||
477 | |||
478 | /* Drop any packets associated with the downed device */ | ||
479 | if (event == NETDEV_DOWN) | ||
480 | ipq_dev_drop(dev->ifindex); | ||
481 | return NOTIFY_DONE; | ||
482 | } | ||
483 | |||
484 | static struct notifier_block ipq_dev_notifier = { | ||
485 | .notifier_call = ipq_rcv_dev_event, | ||
486 | }; | ||
487 | |||
488 | static int | ||
489 | ipq_rcv_nl_event(struct notifier_block *this, | ||
490 | unsigned long event, void *ptr) | ||
491 | { | ||
492 | struct netlink_notify *n = ptr; | ||
493 | |||
494 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { | ||
495 | spin_lock_bh(&queue_lock); | ||
496 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | ||
497 | __ipq_reset(); | ||
498 | spin_unlock_bh(&queue_lock); | ||
499 | } | ||
500 | return NOTIFY_DONE; | ||
501 | } | ||
502 | |||
503 | static struct notifier_block ipq_nl_notifier = { | ||
504 | .notifier_call = ipq_rcv_nl_event, | ||
505 | }; | ||
506 | |||
507 | #ifdef CONFIG_SYSCTL | ||
508 | static struct ctl_table_header *ipq_sysctl_header; | ||
509 | |||
510 | static ctl_table ipq_table[] = { | ||
511 | { | ||
512 | .procname = NET_IPQ_QMAX_NAME, | ||
513 | .data = &queue_maxlen, | ||
514 | .maxlen = sizeof(queue_maxlen), | ||
515 | .mode = 0644, | ||
516 | .proc_handler = proc_dointvec | ||
517 | }, | ||
518 | { } | ||
519 | }; | ||
520 | #endif | ||
521 | |||
522 | #ifdef CONFIG_PROC_FS | ||
523 | static int ip_queue_show(struct seq_file *m, void *v) | ||
524 | { | ||
525 | spin_lock_bh(&queue_lock); | ||
526 | |||
527 | seq_printf(m, | ||
528 | "Peer PID : %d\n" | ||
529 | "Copy mode : %hu\n" | ||
530 | "Copy range : %u\n" | ||
531 | "Queue length : %u\n" | ||
532 | "Queue max. length : %u\n" | ||
533 | "Queue dropped : %u\n" | ||
534 | "Netlink dropped : %u\n", | ||
535 | peer_pid, | ||
536 | copy_mode, | ||
537 | copy_range, | ||
538 | queue_total, | ||
539 | queue_maxlen, | ||
540 | queue_dropped, | ||
541 | queue_user_dropped); | ||
542 | |||
543 | spin_unlock_bh(&queue_lock); | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static int ip_queue_open(struct inode *inode, struct file *file) | ||
548 | { | ||
549 | return single_open(file, ip_queue_show, NULL); | ||
550 | } | ||
551 | |||
552 | static const struct file_operations ip_queue_proc_fops = { | ||
553 | .open = ip_queue_open, | ||
554 | .read = seq_read, | ||
555 | .llseek = seq_lseek, | ||
556 | .release = single_release, | ||
557 | .owner = THIS_MODULE, | ||
558 | }; | ||
559 | #endif | ||
560 | |||
561 | static const struct nf_queue_handler nfqh = { | ||
562 | .name = "ip_queue", | ||
563 | .outfn = &ipq_enqueue_packet, | ||
564 | }; | ||
565 | |||
566 | static int __init ip_queue_init(void) | ||
567 | { | ||
568 | int status = -ENOMEM; | ||
569 | struct proc_dir_entry *proc __maybe_unused; | ||
570 | |||
571 | netlink_register_notifier(&ipq_nl_notifier); | ||
572 | ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, | ||
573 | ipq_rcv_skb, NULL, THIS_MODULE); | ||
574 | if (ipqnl == NULL) { | ||
575 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); | ||
576 | goto cleanup_netlink_notifier; | ||
577 | } | ||
578 | |||
579 | #ifdef CONFIG_PROC_FS | ||
580 | proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, | ||
581 | &ip_queue_proc_fops); | ||
582 | if (!proc) { | ||
583 | printk(KERN_ERR "ip_queue: failed to create proc entry\n"); | ||
584 | goto cleanup_ipqnl; | ||
585 | } | ||
586 | #endif | ||
587 | register_netdevice_notifier(&ipq_dev_notifier); | ||
588 | #ifdef CONFIG_SYSCTL | ||
589 | ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); | ||
590 | #endif | ||
591 | status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); | ||
592 | if (status < 0) { | ||
593 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); | ||
594 | goto cleanup_sysctl; | ||
595 | } | ||
596 | return status; | ||
597 | |||
598 | cleanup_sysctl: | ||
599 | #ifdef CONFIG_SYSCTL | ||
600 | unregister_sysctl_table(ipq_sysctl_header); | ||
601 | #endif | ||
602 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
603 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
604 | cleanup_ipqnl: __maybe_unused | ||
605 | netlink_kernel_release(ipqnl); | ||
606 | mutex_lock(&ipqnl_mutex); | ||
607 | mutex_unlock(&ipqnl_mutex); | ||
608 | |||
609 | cleanup_netlink_notifier: | ||
610 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
611 | return status; | ||
612 | } | ||
613 | |||
614 | static void __exit ip_queue_fini(void) | ||
615 | { | ||
616 | nf_unregister_queue_handlers(&nfqh); | ||
617 | |||
618 | ipq_flush(NULL, 0); | ||
619 | |||
620 | #ifdef CONFIG_SYSCTL | ||
621 | unregister_sysctl_table(ipq_sysctl_header); | ||
622 | #endif | ||
623 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
624 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
625 | |||
626 | netlink_kernel_release(ipqnl); | ||
627 | mutex_lock(&ipqnl_mutex); | ||
628 | mutex_unlock(&ipqnl_mutex); | ||
629 | |||
630 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
631 | } | ||
632 | |||
633 | MODULE_DESCRIPTION("IPv4 packet queue handler"); | ||
634 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); | ||
637 | |||
638 | module_init(ip_queue_init); | ||
639 | module_exit(ip_queue_fini); | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 24e556e83a3b..170b1fdd6b72 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -153,8 +153,7 @@ ip_checkentry(const struct ipt_ip *ip) | |||
153 | static unsigned int | 153 | static unsigned int |
154 | ipt_error(struct sk_buff *skb, const struct xt_action_param *par) | 154 | ipt_error(struct sk_buff *skb, const struct xt_action_param *par) |
155 | { | 155 | { |
156 | if (net_ratelimit()) | 156 | net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); |
157 | pr_info("error: `%s'\n", (const char *)par->targinfo); | ||
158 | 157 | ||
159 | return NF_DROP; | 158 | return NF_DROP; |
160 | } | 159 | } |
@@ -377,7 +376,7 @@ ipt_do_table(struct sk_buff *skb, | |||
377 | if (v < 0) { | 376 | if (v < 0) { |
378 | /* Pop from stack? */ | 377 | /* Pop from stack? */ |
379 | if (v != XT_RETURN) { | 378 | if (v != XT_RETURN) { |
380 | verdict = (unsigned)(-v) - 1; | 379 | verdict = (unsigned int)(-v) - 1; |
381 | break; | 380 | break; |
382 | } | 381 | } |
383 | if (*stackptr <= origptr) { | 382 | if (*stackptr <= origptr) { |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a639967eb727..fe5daea5214d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -246,8 +246,7 @@ clusterip_hashfn(const struct sk_buff *skb, | |||
246 | dport = ports[1]; | 246 | dport = ports[1]; |
247 | } | 247 | } |
248 | } else { | 248 | } else { |
249 | if (net_ratelimit()) | 249 | net_info_ratelimited("unknown protocol %u\n", iph->protocol); |
250 | pr_info("unknown protocol %u\n", iph->protocol); | ||
251 | } | 250 | } |
252 | 251 | ||
253 | switch (config->hash_mode) { | 252 | switch (config->hash_mode) { |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index de9da21113a1..91747d4ebc26 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -74,16 +74,24 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | |||
74 | 74 | ||
75 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | 75 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); |
76 | if (iph == NULL) | 76 | if (iph == NULL) |
77 | return -NF_DROP; | 77 | return -NF_ACCEPT; |
78 | 78 | ||
79 | /* Conntrack defragments packets, we might still see fragments | 79 | /* Conntrack defragments packets, we might still see fragments |
80 | * inside ICMP packets though. */ | 80 | * inside ICMP packets though. */ |
81 | if (iph->frag_off & htons(IP_OFFSET)) | 81 | if (iph->frag_off & htons(IP_OFFSET)) |
82 | return -NF_DROP; | 82 | return -NF_ACCEPT; |
83 | 83 | ||
84 | *dataoff = nhoff + (iph->ihl << 2); | 84 | *dataoff = nhoff + (iph->ihl << 2); |
85 | *protonum = iph->protocol; | 85 | *protonum = iph->protocol; |
86 | 86 | ||
87 | /* Check bogus IP headers */ | ||
88 | if (*dataoff > skb->len) { | ||
89 | pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " | ||
90 | "nhoff %u, ihl %u, skblen %u\n", | ||
91 | nhoff, iph->ihl << 2, skb->len); | ||
92 | return -NF_ACCEPT; | ||
93 | } | ||
94 | |||
87 | return NF_ACCEPT; | 95 | return NF_ACCEPT; |
88 | } | 96 | } |
89 | 97 | ||
@@ -303,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
303 | static int ipv4_tuple_to_nlattr(struct sk_buff *skb, | 311 | static int ipv4_tuple_to_nlattr(struct sk_buff *skb, |
304 | const struct nf_conntrack_tuple *tuple) | 312 | const struct nf_conntrack_tuple *tuple) |
305 | { | 313 | { |
306 | NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip); | 314 | if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || |
307 | NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip); | 315 | nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) |
316 | goto nla_put_failure; | ||
308 | return 0; | 317 | return 0; |
309 | 318 | ||
310 | nla_put_failure: | 319 | nla_put_failure: |
@@ -356,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | |||
356 | .nla_policy = ipv4_nla_policy, | 365 | .nla_policy = ipv4_nla_policy, |
357 | #endif | 366 | #endif |
358 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) | 367 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) |
359 | .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, | 368 | .ctl_table_path = "net/ipv4/netfilter", |
360 | .ctl_table = ip_ct_sysctl_table, | 369 | .ctl_table = ip_ct_sysctl_table, |
361 | #endif | 370 | #endif |
362 | .me = THIS_MODULE, | 371 | .me = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7cbe9cb261c2..0847e373d33c 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl, | |||
228 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, | 228 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, |
229 | const struct nf_conntrack_tuple *t) | 229 | const struct nf_conntrack_tuple *t) |
230 | { | 230 | { |
231 | NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id); | 231 | if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || |
232 | NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type); | 232 | nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || |
233 | NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code); | 233 | nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) |
234 | 234 | goto nla_put_failure; | |
235 | return 0; | 235 | return 0; |
236 | 236 | ||
237 | nla_put_failure: | 237 | nla_put_failure: |
@@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | |||
293 | { | 293 | { |
294 | const unsigned int *timeout = data; | 294 | const unsigned int *timeout = data; |
295 | 295 | ||
296 | NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); | 296 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) |
297 | 297 | goto nla_put_failure; | |
298 | return 0; | 298 | return 0; |
299 | 299 | ||
300 | nla_put_failure: | 300 | nla_put_failure: |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 82536701e3a3..cad29c121318 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -42,9 +42,7 @@ static int set_addr(struct sk_buff *skb, | |||
42 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 42 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, |
43 | addroff, sizeof(buf), | 43 | addroff, sizeof(buf), |
44 | (char *) &buf, sizeof(buf))) { | 44 | (char *) &buf, sizeof(buf))) { |
45 | if (net_ratelimit()) | 45 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); |
46 | pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet" | ||
47 | " error\n"); | ||
48 | return -1; | 46 | return -1; |
49 | } | 47 | } |
50 | 48 | ||
@@ -58,9 +56,7 @@ static int set_addr(struct sk_buff *skb, | |||
58 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, |
59 | addroff, sizeof(buf), | 57 | addroff, sizeof(buf), |
60 | (char *) &buf, sizeof(buf))) { | 58 | (char *) &buf, sizeof(buf))) { |
61 | if (net_ratelimit()) | 59 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); |
62 | pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet" | ||
63 | " error\n"); | ||
64 | return -1; | 60 | return -1; |
65 | } | 61 | } |
66 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy | 62 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy |
@@ -214,8 +210,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
214 | 210 | ||
215 | /* Run out of expectations */ | 211 | /* Run out of expectations */ |
216 | if (i >= H323_RTP_CHANNEL_MAX) { | 212 | if (i >= H323_RTP_CHANNEL_MAX) { |
217 | if (net_ratelimit()) | 213 | net_notice_ratelimited("nf_nat_h323: out of expectations\n"); |
218 | pr_notice("nf_nat_h323: out of expectations\n"); | ||
219 | return 0; | 214 | return 0; |
220 | } | 215 | } |
221 | 216 | ||
@@ -244,8 +239,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
244 | } | 239 | } |
245 | 240 | ||
246 | if (nated_port == 0) { /* No port available */ | 241 | if (nated_port == 0) { /* No port available */ |
247 | if (net_ratelimit()) | 242 | net_notice_ratelimited("nf_nat_h323: out of RTP ports\n"); |
248 | pr_notice("nf_nat_h323: out of RTP ports\n"); | ||
249 | return 0; | 243 | return 0; |
250 | } | 244 | } |
251 | 245 | ||
@@ -308,8 +302,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
308 | } | 302 | } |
309 | 303 | ||
310 | if (nated_port == 0) { /* No port available */ | 304 | if (nated_port == 0) { /* No port available */ |
311 | if (net_ratelimit()) | 305 | net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); |
312 | pr_notice("nf_nat_h323: out of TCP ports\n"); | ||
313 | return 0; | 306 | return 0; |
314 | } | 307 | } |
315 | 308 | ||
@@ -365,8 +358,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
365 | } | 358 | } |
366 | 359 | ||
367 | if (nated_port == 0) { /* No port available */ | 360 | if (nated_port == 0) { /* No port available */ |
368 | if (net_ratelimit()) | 361 | net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); |
369 | pr_notice("nf_nat_q931: out of TCP ports\n"); | ||
370 | return 0; | 362 | return 0; |
371 | } | 363 | } |
372 | 364 | ||
@@ -456,8 +448,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
456 | } | 448 | } |
457 | 449 | ||
458 | if (nated_port == 0) { /* No port available */ | 450 | if (nated_port == 0) { /* No port available */ |
459 | if (net_ratelimit()) | 451 | net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); |
460 | pr_notice("nf_nat_ras: out of TCP ports\n"); | ||
461 | return 0; | 452 | return 0; |
462 | } | 453 | } |
463 | 454 | ||
@@ -545,8 +536,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
545 | } | 536 | } |
546 | 537 | ||
547 | if (nated_port == 0) { /* No port available */ | 538 | if (nated_port == 0) { /* No port available */ |
548 | if (net_ratelimit()) | 539 | net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); |
549 | pr_notice("nf_nat_q931: out of TCP ports\n"); | ||
550 | return 0; | 540 | return 0; |
551 | } | 541 | } |
552 | 542 | ||
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 57932c43960e..ea4a23813d26 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, | |||
283 | __be32 newip; | 283 | __be32 newip; |
284 | u_int16_t port; | 284 | u_int16_t port; |
285 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | 285 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
286 | unsigned buflen; | 286 | unsigned int buflen; |
287 | 287 | ||
288 | /* Connection will come from reply */ | 288 | /* Connection will come from reply */ |
289 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) | 289 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 2133c30a4a5f..746edec8b86e 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -1206,8 +1206,7 @@ static int snmp_translate(struct nf_conn *ct, | |||
1206 | 1206 | ||
1207 | if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), | 1207 | if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), |
1208 | paylen, &map, &udph->check)) { | 1208 | paylen, &map, &udph->check)) { |
1209 | if (net_ratelimit()) | 1209 | net_warn_ratelimited("bsalg: parser failed\n"); |
1210 | printk(KERN_WARNING "bsalg: parser failed\n"); | ||
1211 | return NF_DROP; | 1210 | return NF_DROP; |
1212 | } | 1211 | } |
1213 | return NF_ACCEPT; | 1212 | return NF_ACCEPT; |
@@ -1241,9 +1240,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, | |||
1241 | * can mess around with the payload. | 1240 | * can mess around with the payload. |
1242 | */ | 1241 | */ |
1243 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { | 1242 | if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { |
1244 | if (net_ratelimit()) | 1243 | net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", |
1245 | printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", | 1244 | &iph->saddr, &iph->daddr); |
1246 | &iph->saddr, &iph->daddr); | ||
1247 | return NF_DROP; | 1245 | return NF_DROP; |
1248 | } | 1246 | } |
1249 | 1247 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 50009c787bcd..6e930c7174dd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
@@ -51,15 +51,16 @@ static struct ping_table ping_table; | |||
51 | 51 | ||
52 | static u16 ping_port_rover; | 52 | static u16 ping_port_rover; |
53 | 53 | ||
54 | static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask) | 54 | static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask) |
55 | { | 55 | { |
56 | int res = (num + net_hash_mix(net)) & mask; | 56 | int res = (num + net_hash_mix(net)) & mask; |
57 | |||
57 | pr_debug("hash(%d) = %d\n", num, res); | 58 | pr_debug("hash(%d) = %d\n", num, res); |
58 | return res; | 59 | return res; |
59 | } | 60 | } |
60 | 61 | ||
61 | static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, | 62 | static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, |
62 | struct net *net, unsigned num) | 63 | struct net *net, unsigned int num) |
63 | { | 64 | { |
64 | return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; | 65 | return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; |
65 | } | 66 | } |
@@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, | |||
188 | gid_t *high) | 189 | gid_t *high) |
189 | { | 190 | { |
190 | gid_t *data = net->ipv4.sysctl_ping_group_range; | 191 | gid_t *data = net->ipv4.sysctl_ping_group_range; |
191 | unsigned seq; | 192 | unsigned int seq; |
193 | |||
192 | do { | 194 | do { |
193 | seq = read_seqbegin(&sysctl_local_ports.lock); | 195 | seq = read_seqbegin(&sysctl_local_ports.lock); |
194 | 196 | ||
@@ -410,7 +412,7 @@ struct pingfakehdr { | |||
410 | __wsum wcheck; | 412 | __wsum wcheck; |
411 | }; | 413 | }; |
412 | 414 | ||
413 | static int ping_getfrag(void *from, char * to, | 415 | static int ping_getfrag(void *from, char *to, |
414 | int offset, int fraglen, int odd, struct sk_buff *skb) | 416 | int offset, int fraglen, int odd, struct sk_buff *skb) |
415 | { | 417 | { |
416 | struct pingfakehdr *pfh = (struct pingfakehdr *)from; | 418 | struct pingfakehdr *pfh = (struct pingfakehdr *)from; |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbd604c68e68..4032b818f3e4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
288 | read_unlock(&raw_v4_hashinfo.lock); | 288 | read_unlock(&raw_v4_hashinfo.lock); |
289 | } | 289 | } |
290 | 290 | ||
291 | static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) | 291 | static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) |
292 | { | 292 | { |
293 | /* Charge it to the socket. */ | 293 | /* Charge it to the socket. */ |
294 | 294 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4dc1c104c942..ffcb3b016843 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -109,6 +109,7 @@ | |||
109 | #include <net/rtnetlink.h> | 109 | #include <net/rtnetlink.h> |
110 | #ifdef CONFIG_SYSCTL | 110 | #ifdef CONFIG_SYSCTL |
111 | #include <linux/sysctl.h> | 111 | #include <linux/sysctl.h> |
112 | #include <linux/kmemleak.h> | ||
112 | #endif | 113 | #endif |
113 | #include <net/secure_seq.h> | 114 | #include <net/secure_seq.h> |
114 | 115 | ||
@@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = { | |||
229 | TC_PRIO_INTERACTIVE_BULK, | 230 | TC_PRIO_INTERACTIVE_BULK, |
230 | ECN_OR_COST(INTERACTIVE_BULK) | 231 | ECN_OR_COST(INTERACTIVE_BULK) |
231 | }; | 232 | }; |
232 | 233 | EXPORT_SYMBOL(ip_tos2prio); | |
233 | 234 | ||
234 | /* | 235 | /* |
235 | * Route cache. | 236 | * Route cache. |
@@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void) | |||
296 | #endif | 297 | #endif |
297 | 298 | ||
298 | static struct rt_hash_bucket *rt_hash_table __read_mostly; | 299 | static struct rt_hash_bucket *rt_hash_table __read_mostly; |
299 | static unsigned rt_hash_mask __read_mostly; | 300 | static unsigned int rt_hash_mask __read_mostly; |
300 | static unsigned int rt_hash_log __read_mostly; | 301 | static unsigned int rt_hash_log __read_mostly; |
301 | 302 | ||
302 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 303 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
@@ -959,8 +960,7 @@ void rt_cache_flush_batch(struct net *net) | |||
959 | 960 | ||
960 | static void rt_emergency_hash_rebuild(struct net *net) | 961 | static void rt_emergency_hash_rebuild(struct net *net) |
961 | { | 962 | { |
962 | if (net_ratelimit()) | 963 | net_warn_ratelimited("Route hash chain too long!\n"); |
963 | pr_warn("Route hash chain too long!\n"); | ||
964 | rt_cache_invalidate(net); | 964 | rt_cache_invalidate(net); |
965 | } | 965 | } |
966 | 966 | ||
@@ -1083,8 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1083 | goto out; | 1083 | goto out; |
1084 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | 1084 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) |
1085 | goto out; | 1085 | goto out; |
1086 | if (net_ratelimit()) | 1086 | net_warn_ratelimited("dst cache overflow\n"); |
1087 | pr_warn("dst cache overflow\n"); | ||
1088 | RT_CACHE_STAT_INC(gc_dst_overflow); | 1087 | RT_CACHE_STAT_INC(gc_dst_overflow); |
1089 | return 1; | 1088 | return 1; |
1090 | 1089 | ||
@@ -1143,7 +1142,7 @@ static int rt_bind_neighbour(struct rtable *rt) | |||
1143 | return 0; | 1142 | return 0; |
1144 | } | 1143 | } |
1145 | 1144 | ||
1146 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, | 1145 | static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, |
1147 | struct sk_buff *skb, int ifindex) | 1146 | struct sk_buff *skb, int ifindex) |
1148 | { | 1147 | { |
1149 | struct rtable *rth, *cand; | 1148 | struct rtable *rth, *cand; |
@@ -1181,8 +1180,7 @@ restart: | |||
1181 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1180 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1182 | int err = rt_bind_neighbour(rt); | 1181 | int err = rt_bind_neighbour(rt); |
1183 | if (err) { | 1182 | if (err) { |
1184 | if (net_ratelimit()) | 1183 | net_warn_ratelimited("Neighbour table failure & not caching routes\n"); |
1185 | pr_warn("Neighbour table failure & not caching routes\n"); | ||
1186 | ip_rt_put(rt); | 1184 | ip_rt_put(rt); |
1187 | return ERR_PTR(err); | 1185 | return ERR_PTR(err); |
1188 | } | 1186 | } |
@@ -1298,8 +1296,7 @@ restart: | |||
1298 | goto restart; | 1296 | goto restart; |
1299 | } | 1297 | } |
1300 | 1298 | ||
1301 | if (net_ratelimit()) | 1299 | net_warn_ratelimited("Neighbour table overflow\n"); |
1302 | pr_warn("Neighbour table overflow\n"); | ||
1303 | rt_drop(rt); | 1300 | rt_drop(rt); |
1304 | return ERR_PTR(-ENOBUFS); | 1301 | return ERR_PTR(-ENOBUFS); |
1305 | } | 1302 | } |
@@ -1377,14 +1374,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1377 | return; | 1374 | return; |
1378 | } | 1375 | } |
1379 | } else if (!rt) | 1376 | } else if (!rt) |
1380 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", | 1377 | pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0)); |
1381 | __builtin_return_address(0)); | ||
1382 | 1378 | ||
1383 | ip_select_fb_ident(iph); | 1379 | ip_select_fb_ident(iph); |
1384 | } | 1380 | } |
1385 | EXPORT_SYMBOL(__ip_select_ident); | 1381 | EXPORT_SYMBOL(__ip_select_ident); |
1386 | 1382 | ||
1387 | static void rt_del(unsigned hash, struct rtable *rt) | 1383 | static void rt_del(unsigned int hash, struct rtable *rt) |
1388 | { | 1384 | { |
1389 | struct rtable __rcu **rthp; | 1385 | struct rtable __rcu **rthp; |
1390 | struct rtable *aux; | 1386 | struct rtable *aux; |
@@ -1502,11 +1498,11 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1502 | 1498 | ||
1503 | reject_redirect: | 1499 | reject_redirect: |
1504 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1500 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1505 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1501 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
1506 | pr_info("Redirect from %pI4 on %s about %pI4 ignored\n" | 1502 | net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" |
1507 | " Advised path = %pI4 -> %pI4\n", | 1503 | " Advised path = %pI4 -> %pI4\n", |
1508 | &old_gw, dev->name, &new_gw, | 1504 | &old_gw, dev->name, &new_gw, |
1509 | &saddr, &daddr); | 1505 | &saddr, &daddr); |
1510 | #endif | 1506 | #endif |
1511 | ; | 1507 | ; |
1512 | } | 1508 | } |
@@ -1538,7 +1534,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1538 | ip_rt_put(rt); | 1534 | ip_rt_put(rt); |
1539 | ret = NULL; | 1535 | ret = NULL; |
1540 | } else if (rt->rt_flags & RTCF_REDIRECTED) { | 1536 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1541 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1537 | unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1542 | rt->rt_oif, | 1538 | rt->rt_oif, |
1543 | rt_genid(dev_net(dst->dev))); | 1539 | rt_genid(dev_net(dst->dev))); |
1544 | rt_del(hash, rt); | 1540 | rt_del(hash, rt); |
@@ -1616,11 +1612,10 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1616 | ++peer->rate_tokens; | 1612 | ++peer->rate_tokens; |
1617 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1613 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1618 | if (log_martians && | 1614 | if (log_martians && |
1619 | peer->rate_tokens == ip_rt_redirect_number && | 1615 | peer->rate_tokens == ip_rt_redirect_number) |
1620 | net_ratelimit()) | 1616 | net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", |
1621 | pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", | 1617 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1622 | &ip_hdr(skb)->saddr, rt->rt_iif, | 1618 | &rt->rt_dst, &rt->rt_gateway); |
1623 | &rt->rt_dst, &rt->rt_gateway); | ||
1624 | #endif | 1619 | #endif |
1625 | } | 1620 | } |
1626 | } | 1621 | } |
@@ -1843,9 +1838,9 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1843 | 1838 | ||
1844 | static int ip_rt_bug(struct sk_buff *skb) | 1839 | static int ip_rt_bug(struct sk_buff *skb) |
1845 | { | 1840 | { |
1846 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", | 1841 | pr_debug("%s: %pI4 -> %pI4, %s\n", |
1847 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 1842 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1848 | skb->dev ? skb->dev->name : "?"); | 1843 | skb->dev ? skb->dev->name : "?"); |
1849 | kfree_skb(skb); | 1844 | kfree_skb(skb); |
1850 | WARN_ON(1); | 1845 | WARN_ON(1); |
1851 | return 0; | 1846 | return 0; |
@@ -2041,7 +2036,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2041 | if (err < 0) | 2036 | if (err < 0) |
2042 | goto e_err; | 2037 | goto e_err; |
2043 | } | 2038 | } |
2044 | rth = rt_dst_alloc(init_net.loopback_dev, | 2039 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, |
2045 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 2040 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
2046 | if (!rth) | 2041 | if (!rth) |
2047 | goto e_nobufs; | 2042 | goto e_nobufs; |
@@ -2134,8 +2129,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2134 | /* get a working reference to the output device */ | 2129 | /* get a working reference to the output device */ |
2135 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); | 2130 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
2136 | if (out_dev == NULL) { | 2131 | if (out_dev == NULL) { |
2137 | if (net_ratelimit()) | 2132 | net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); |
2138 | pr_crit("Bug in ip_route_input_slow(). Please report.\n"); | ||
2139 | return -EINVAL; | 2133 | return -EINVAL; |
2140 | } | 2134 | } |
2141 | 2135 | ||
@@ -2215,9 +2209,9 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2215 | struct in_device *in_dev, | 2209 | struct in_device *in_dev, |
2216 | __be32 daddr, __be32 saddr, u32 tos) | 2210 | __be32 daddr, __be32 saddr, u32 tos) |
2217 | { | 2211 | { |
2218 | struct rtable* rth = NULL; | 2212 | struct rtable *rth = NULL; |
2219 | int err; | 2213 | int err; |
2220 | unsigned hash; | 2214 | unsigned int hash; |
2221 | 2215 | ||
2222 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2216 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2223 | if (res->fi && res->fi->fib_nhs > 1) | 2217 | if (res->fi && res->fi->fib_nhs > 1) |
@@ -2255,13 +2249,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2255 | struct fib_result res; | 2249 | struct fib_result res; |
2256 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2250 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2257 | struct flowi4 fl4; | 2251 | struct flowi4 fl4; |
2258 | unsigned flags = 0; | 2252 | unsigned int flags = 0; |
2259 | u32 itag = 0; | 2253 | u32 itag = 0; |
2260 | struct rtable * rth; | 2254 | struct rtable *rth; |
2261 | unsigned hash; | 2255 | unsigned int hash; |
2262 | __be32 spec_dst; | 2256 | __be32 spec_dst; |
2263 | int err = -EINVAL; | 2257 | int err = -EINVAL; |
2264 | struct net * net = dev_net(dev); | 2258 | struct net *net = dev_net(dev); |
2265 | 2259 | ||
2266 | /* IP on this device is disabled. */ | 2260 | /* IP on this device is disabled. */ |
2267 | 2261 | ||
@@ -2406,9 +2400,9 @@ no_route: | |||
2406 | martian_destination: | 2400 | martian_destination: |
2407 | RT_CACHE_STAT_INC(in_martian_dst); | 2401 | RT_CACHE_STAT_INC(in_martian_dst); |
2408 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2402 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2409 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2403 | if (IN_DEV_LOG_MARTIANS(in_dev)) |
2410 | pr_warn("martian destination %pI4 from %pI4, dev %s\n", | 2404 | net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", |
2411 | &daddr, &saddr, dev->name); | 2405 | &daddr, &saddr, dev->name); |
2412 | #endif | 2406 | #endif |
2413 | 2407 | ||
2414 | e_hostunreach: | 2408 | e_hostunreach: |
@@ -2433,8 +2427,8 @@ martian_source_keep_err: | |||
2433 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2427 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2434 | u8 tos, struct net_device *dev, bool noref) | 2428 | u8 tos, struct net_device *dev, bool noref) |
2435 | { | 2429 | { |
2436 | struct rtable * rth; | 2430 | struct rtable *rth; |
2437 | unsigned hash; | 2431 | unsigned int hash; |
2438 | int iif = dev->ifindex; | 2432 | int iif = dev->ifindex; |
2439 | struct net *net; | 2433 | struct net *net; |
2440 | int res; | 2434 | int res; |
@@ -2972,7 +2966,8 @@ static int rt_fill_info(struct net *net, | |||
2972 | r->rtm_src_len = 0; | 2966 | r->rtm_src_len = 0; |
2973 | r->rtm_tos = rt->rt_key_tos; | 2967 | r->rtm_tos = rt->rt_key_tos; |
2974 | r->rtm_table = RT_TABLE_MAIN; | 2968 | r->rtm_table = RT_TABLE_MAIN; |
2975 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2969 | if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) |
2970 | goto nla_put_failure; | ||
2976 | r->rtm_type = rt->rt_type; | 2971 | r->rtm_type = rt->rt_type; |
2977 | r->rtm_scope = RT_SCOPE_UNIVERSE; | 2972 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
2978 | r->rtm_protocol = RTPROT_UNSPEC; | 2973 | r->rtm_protocol = RTPROT_UNSPEC; |
@@ -2980,31 +2975,38 @@ static int rt_fill_info(struct net *net, | |||
2980 | if (rt->rt_flags & RTCF_NOTIFY) | 2975 | if (rt->rt_flags & RTCF_NOTIFY) |
2981 | r->rtm_flags |= RTM_F_NOTIFY; | 2976 | r->rtm_flags |= RTM_F_NOTIFY; |
2982 | 2977 | ||
2983 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2978 | if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) |
2984 | 2979 | goto nla_put_failure; | |
2985 | if (rt->rt_key_src) { | 2980 | if (rt->rt_key_src) { |
2986 | r->rtm_src_len = 32; | 2981 | r->rtm_src_len = 32; |
2987 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); | 2982 | if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) |
2983 | goto nla_put_failure; | ||
2988 | } | 2984 | } |
2989 | if (rt->dst.dev) | 2985 | if (rt->dst.dev && |
2990 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2986 | nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) |
2987 | goto nla_put_failure; | ||
2991 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2988 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2992 | if (rt->dst.tclassid) | 2989 | if (rt->dst.tclassid && |
2993 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2990 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) |
2991 | goto nla_put_failure; | ||
2994 | #endif | 2992 | #endif |
2995 | if (rt_is_input_route(rt)) | 2993 | if (rt_is_input_route(rt)) { |
2996 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2994 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) |
2997 | else if (rt->rt_src != rt->rt_key_src) | 2995 | goto nla_put_failure; |
2998 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2996 | } else if (rt->rt_src != rt->rt_key_src) { |
2999 | 2997 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) | |
3000 | if (rt->rt_dst != rt->rt_gateway) | 2998 | goto nla_put_failure; |
3001 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2999 | } |
3000 | if (rt->rt_dst != rt->rt_gateway && | ||
3001 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) | ||
3002 | goto nla_put_failure; | ||
3002 | 3003 | ||
3003 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 3004 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
3004 | goto nla_put_failure; | 3005 | goto nla_put_failure; |
3005 | 3006 | ||
3006 | if (rt->rt_mark) | 3007 | if (rt->rt_mark && |
3007 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); | 3008 | nla_put_be32(skb, RTA_MARK, rt->rt_mark)) |
3009 | goto nla_put_failure; | ||
3008 | 3010 | ||
3009 | error = rt->dst.error; | 3011 | error = rt->dst.error; |
3010 | if (peer) { | 3012 | if (peer) { |
@@ -3045,7 +3047,8 @@ static int rt_fill_info(struct net *net, | |||
3045 | } | 3047 | } |
3046 | } else | 3048 | } else |
3047 | #endif | 3049 | #endif |
3048 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); | 3050 | if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) |
3051 | goto nla_put_failure; | ||
3049 | } | 3052 | } |
3050 | 3053 | ||
3051 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 3054 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
@@ -3059,7 +3062,7 @@ nla_put_failure: | |||
3059 | return -EMSGSIZE; | 3062 | return -EMSGSIZE; |
3060 | } | 3063 | } |
3061 | 3064 | ||
3062 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 3065 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) |
3063 | { | 3066 | { |
3064 | struct net *net = sock_net(in_skb->sk); | 3067 | struct net *net = sock_net(in_skb->sk); |
3065 | struct rtmsg *rtm; | 3068 | struct rtmsg *rtm; |
@@ -3334,23 +3337,6 @@ static ctl_table ipv4_route_table[] = { | |||
3334 | { } | 3337 | { } |
3335 | }; | 3338 | }; |
3336 | 3339 | ||
3337 | static struct ctl_table empty[1]; | ||
3338 | |||
3339 | static struct ctl_table ipv4_skeleton[] = | ||
3340 | { | ||
3341 | { .procname = "route", | ||
3342 | .mode = 0555, .child = ipv4_route_table}, | ||
3343 | { .procname = "neigh", | ||
3344 | .mode = 0555, .child = empty}, | ||
3345 | { } | ||
3346 | }; | ||
3347 | |||
3348 | static __net_initdata struct ctl_path ipv4_path[] = { | ||
3349 | { .procname = "net", }, | ||
3350 | { .procname = "ipv4", }, | ||
3351 | { }, | ||
3352 | }; | ||
3353 | |||
3354 | static struct ctl_table ipv4_route_flush_table[] = { | 3340 | static struct ctl_table ipv4_route_flush_table[] = { |
3355 | { | 3341 | { |
3356 | .procname = "flush", | 3342 | .procname = "flush", |
@@ -3361,13 +3347,6 @@ static struct ctl_table ipv4_route_flush_table[] = { | |||
3361 | { }, | 3347 | { }, |
3362 | }; | 3348 | }; |
3363 | 3349 | ||
3364 | static __net_initdata struct ctl_path ipv4_route_path[] = { | ||
3365 | { .procname = "net", }, | ||
3366 | { .procname = "ipv4", }, | ||
3367 | { .procname = "route", }, | ||
3368 | { }, | ||
3369 | }; | ||
3370 | |||
3371 | static __net_init int sysctl_route_net_init(struct net *net) | 3350 | static __net_init int sysctl_route_net_init(struct net *net) |
3372 | { | 3351 | { |
3373 | struct ctl_table *tbl; | 3352 | struct ctl_table *tbl; |
@@ -3380,8 +3359,7 @@ static __net_init int sysctl_route_net_init(struct net *net) | |||
3380 | } | 3359 | } |
3381 | tbl[0].extra1 = net; | 3360 | tbl[0].extra1 = net; |
3382 | 3361 | ||
3383 | net->ipv4.route_hdr = | 3362 | net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); |
3384 | register_net_sysctl_table(net, ipv4_route_path, tbl); | ||
3385 | if (net->ipv4.route_hdr == NULL) | 3363 | if (net->ipv4.route_hdr == NULL) |
3386 | goto err_reg; | 3364 | goto err_reg; |
3387 | return 0; | 3365 | return 0; |
@@ -3430,9 +3408,15 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | |||
3430 | static __initdata unsigned long rhash_entries; | 3408 | static __initdata unsigned long rhash_entries; |
3431 | static int __init set_rhash_entries(char *str) | 3409 | static int __init set_rhash_entries(char *str) |
3432 | { | 3410 | { |
3411 | ssize_t ret; | ||
3412 | |||
3433 | if (!str) | 3413 | if (!str) |
3434 | return 0; | 3414 | return 0; |
3435 | rhash_entries = simple_strtoul(str, &str, 0); | 3415 | |
3416 | ret = kstrtoul(str, 0, &rhash_entries); | ||
3417 | if (ret) | ||
3418 | return 0; | ||
3419 | |||
3436 | return 1; | 3420 | return 1; |
3437 | } | 3421 | } |
3438 | __setup("rhash_entries=", set_rhash_entries); | 3422 | __setup("rhash_entries=", set_rhash_entries); |
@@ -3505,6 +3489,6 @@ int __init ip_rt_init(void) | |||
3505 | */ | 3489 | */ |
3506 | void __init ip_static_sysctl_init(void) | 3490 | void __init ip_static_sysctl_init(void) |
3507 | { | 3491 | { |
3508 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3492 | register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); |
3509 | } | 3493 | } |
3510 | #endif | 3494 | #endif |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7a7724da9bff..ef32956ed655 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <net/tcp_memcontrol.h> | 27 | #include <net/tcp_memcontrol.h> |
28 | 28 | ||
29 | static int zero; | 29 | static int zero; |
30 | static int two = 2; | ||
30 | static int tcp_retr1_max = 255; | 31 | static int tcp_retr1_max = 255; |
31 | static int ip_local_port_range_min[] = { 1, 1 }; | 32 | static int ip_local_port_range_min[] = { 1, 1 }; |
32 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 33 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
@@ -78,7 +79,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, | |||
78 | static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) | 79 | static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) |
79 | { | 80 | { |
80 | gid_t *data = table->data; | 81 | gid_t *data = table->data; |
81 | unsigned seq; | 82 | unsigned int seq; |
82 | do { | 83 | do { |
83 | seq = read_seqbegin(&sysctl_local_ports.lock); | 84 | seq = read_seqbegin(&sysctl_local_ports.lock); |
84 | 85 | ||
@@ -677,6 +678,15 @@ static struct ctl_table ipv4_table[] = { | |||
677 | .proc_handler = proc_dointvec | 678 | .proc_handler = proc_dointvec |
678 | }, | 679 | }, |
679 | { | 680 | { |
681 | .procname = "tcp_early_retrans", | ||
682 | .data = &sysctl_tcp_early_retrans, | ||
683 | .maxlen = sizeof(int), | ||
684 | .mode = 0644, | ||
685 | .proc_handler = proc_dointvec_minmax, | ||
686 | .extra1 = &zero, | ||
687 | .extra2 = &two, | ||
688 | }, | ||
689 | { | ||
680 | .procname = "udp_mem", | 690 | .procname = "udp_mem", |
681 | .data = &sysctl_udp_mem, | 691 | .data = &sysctl_udp_mem, |
682 | .maxlen = sizeof(sysctl_udp_mem), | 692 | .maxlen = sizeof(sysctl_udp_mem), |
@@ -768,13 +778,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
768 | { } | 778 | { } |
769 | }; | 779 | }; |
770 | 780 | ||
771 | struct ctl_path net_ipv4_ctl_path[] = { | ||
772 | { .procname = "net", }, | ||
773 | { .procname = "ipv4", }, | ||
774 | { }, | ||
775 | }; | ||
776 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); | ||
777 | |||
778 | static __net_init int ipv4_sysctl_init_net(struct net *net) | 781 | static __net_init int ipv4_sysctl_init_net(struct net *net) |
779 | { | 782 | { |
780 | struct ctl_table *table; | 783 | struct ctl_table *table; |
@@ -815,8 +818,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
815 | 818 | ||
816 | tcp_init_mem(net); | 819 | tcp_init_mem(net); |
817 | 820 | ||
818 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 821 | net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); |
819 | net_ipv4_ctl_path, table); | ||
820 | if (net->ipv4.ipv4_hdr == NULL) | 822 | if (net->ipv4.ipv4_hdr == NULL) |
821 | goto err_reg; | 823 | goto err_reg; |
822 | 824 | ||
@@ -857,12 +859,12 @@ static __init int sysctl_ipv4_init(void) | |||
857 | if (!i->procname) | 859 | if (!i->procname) |
858 | return -EINVAL; | 860 | return -EINVAL; |
859 | 861 | ||
860 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); | 862 | hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); |
861 | if (hdr == NULL) | 863 | if (hdr == NULL) |
862 | return -ENOMEM; | 864 | return -ENOMEM; |
863 | 865 | ||
864 | if (register_pernet_subsys(&ipv4_sysctl_ops)) { | 866 | if (register_pernet_subsys(&ipv4_sysctl_ops)) { |
865 | unregister_sysctl_table(hdr); | 867 | unregister_net_sysctl_table(hdr); |
866 | return -ENOMEM; | 868 | return -ENOMEM; |
867 | } | 869 | } |
868 | 870 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cfd7edda0a8e..bb485fcb077e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) | |||
363 | return period; | 363 | return period; |
364 | } | 364 | } |
365 | 365 | ||
366 | /* Address-family independent initialization for a tcp_sock. | ||
367 | * | ||
368 | * NOTE: A lot of things set to zero explicitly by call to | ||
369 | * sk_alloc() so need not be done here. | ||
370 | */ | ||
371 | void tcp_init_sock(struct sock *sk) | ||
372 | { | ||
373 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
374 | struct tcp_sock *tp = tcp_sk(sk); | ||
375 | |||
376 | skb_queue_head_init(&tp->out_of_order_queue); | ||
377 | tcp_init_xmit_timers(sk); | ||
378 | tcp_prequeue_init(tp); | ||
379 | |||
380 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
381 | tp->mdev = TCP_TIMEOUT_INIT; | ||
382 | |||
383 | /* So many TCP implementations out there (incorrectly) count the | ||
384 | * initial SYN frame in their delayed-ACK and congestion control | ||
385 | * algorithms that we must have the following bandaid to talk | ||
386 | * efficiently to them. -DaveM | ||
387 | */ | ||
388 | tp->snd_cwnd = TCP_INIT_CWND; | ||
389 | |||
390 | /* See draft-stevens-tcpca-spec-01 for discussion of the | ||
391 | * initialization of these values. | ||
392 | */ | ||
393 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
394 | tp->snd_cwnd_clamp = ~0; | ||
395 | tp->mss_cache = TCP_MSS_DEFAULT; | ||
396 | |||
397 | tp->reordering = sysctl_tcp_reordering; | ||
398 | tcp_enable_early_retrans(tp); | ||
399 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
400 | |||
401 | sk->sk_state = TCP_CLOSE; | ||
402 | |||
403 | sk->sk_write_space = sk_stream_write_space; | ||
404 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
405 | |||
406 | icsk->icsk_sync_mss = tcp_sync_mss; | ||
407 | |||
408 | /* TCP Cookie Transactions */ | ||
409 | if (sysctl_tcp_cookie_size > 0) { | ||
410 | /* Default, cookies without s_data_payload. */ | ||
411 | tp->cookie_values = | ||
412 | kzalloc(sizeof(*tp->cookie_values), | ||
413 | sk->sk_allocation); | ||
414 | if (tp->cookie_values != NULL) | ||
415 | kref_init(&tp->cookie_values->kref); | ||
416 | } | ||
417 | /* Presumed zeroed, in order of appearance: | ||
418 | * cookie_in_always, cookie_out_never, | ||
419 | * s_data_constant, s_data_in, s_data_out | ||
420 | */ | ||
421 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | ||
422 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | ||
423 | |||
424 | local_bh_disable(); | ||
425 | sock_update_memcg(sk); | ||
426 | sk_sockets_allocated_inc(sk); | ||
427 | local_bh_enable(); | ||
428 | } | ||
429 | EXPORT_SYMBOL(tcp_init_sock); | ||
430 | |||
366 | /* | 431 | /* |
367 | * Wait for a TCP event. | 432 | * Wait for a TCP event. |
368 | * | 433 | * |
@@ -528,7 +593,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | |||
528 | tp->pushed_seq = tp->write_seq; | 593 | tp->pushed_seq = tp->write_seq; |
529 | } | 594 | } |
530 | 595 | ||
531 | static inline int forced_push(const struct tcp_sock *tp) | 596 | static inline bool forced_push(const struct tcp_sock *tp) |
532 | { | 597 | { |
533 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); | 598 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); |
534 | } | 599 | } |
@@ -701,11 +766,12 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | |||
701 | skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); | 766 | skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); |
702 | if (skb) { | 767 | if (skb) { |
703 | if (sk_wmem_schedule(sk, skb->truesize)) { | 768 | if (sk_wmem_schedule(sk, skb->truesize)) { |
769 | skb_reserve(skb, sk->sk_prot->max_header); | ||
704 | /* | 770 | /* |
705 | * Make sure that we have exactly size bytes | 771 | * Make sure that we have exactly size bytes |
706 | * available to the caller, no more, no less. | 772 | * available to the caller, no more, no less. |
707 | */ | 773 | */ |
708 | skb_reserve(skb, skb_tailroom(skb) - size); | 774 | skb->avail_size = size; |
709 | return skb; | 775 | return skb; |
710 | } | 776 | } |
711 | __kfree_skb(skb); | 777 | __kfree_skb(skb); |
@@ -783,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
783 | while (psize > 0) { | 849 | while (psize > 0) { |
784 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 850 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
785 | struct page *page = pages[poffset / PAGE_SIZE]; | 851 | struct page *page = pages[poffset / PAGE_SIZE]; |
786 | int copy, i, can_coalesce; | 852 | int copy, i; |
787 | int offset = poffset % PAGE_SIZE; | 853 | int offset = poffset % PAGE_SIZE; |
788 | int size = min_t(size_t, psize, PAGE_SIZE - offset); | 854 | int size = min_t(size_t, psize, PAGE_SIZE - offset); |
855 | bool can_coalesce; | ||
789 | 856 | ||
790 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { | 857 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { |
791 | new_segment: | 858 | new_segment: |
@@ -850,8 +917,7 @@ new_segment: | |||
850 | wait_for_sndbuf: | 917 | wait_for_sndbuf: |
851 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 918 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
852 | wait_for_memory: | 919 | wait_for_memory: |
853 | if (copied) | 920 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); |
854 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | ||
855 | 921 | ||
856 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 922 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
857 | goto do_error; | 923 | goto do_error; |
@@ -860,7 +926,7 @@ wait_for_memory: | |||
860 | } | 926 | } |
861 | 927 | ||
862 | out: | 928 | out: |
863 | if (copied) | 929 | if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) |
864 | tcp_push(sk, flags, mss_now, tp->nonagle); | 930 | tcp_push(sk, flags, mss_now, tp->nonagle); |
865 | return copied; | 931 | return copied; |
866 | 932 | ||
@@ -918,7 +984,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
918 | struct tcp_sock *tp = tcp_sk(sk); | 984 | struct tcp_sock *tp = tcp_sk(sk); |
919 | struct sk_buff *skb; | 985 | struct sk_buff *skb; |
920 | int iovlen, flags, err, copied; | 986 | int iovlen, flags, err, copied; |
921 | int mss_now, size_goal; | 987 | int mss_now = 0, size_goal; |
922 | bool sg; | 988 | bool sg; |
923 | long timeo; | 989 | long timeo; |
924 | 990 | ||
@@ -932,6 +998,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
932 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 998 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
933 | goto out_err; | 999 | goto out_err; |
934 | 1000 | ||
1001 | if (unlikely(tp->repair)) { | ||
1002 | if (tp->repair_queue == TCP_RECV_QUEUE) { | ||
1003 | copied = tcp_send_rcvq(sk, msg, size); | ||
1004 | goto out; | ||
1005 | } | ||
1006 | |||
1007 | err = -EINVAL; | ||
1008 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1009 | goto out_err; | ||
1010 | |||
1011 | /* 'common' sending to sendq */ | ||
1012 | } | ||
1013 | |||
935 | /* This should be in poll */ | 1014 | /* This should be in poll */ |
936 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 1015 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
937 | 1016 | ||
@@ -995,15 +1074,14 @@ new_segment: | |||
995 | copy = seglen; | 1074 | copy = seglen; |
996 | 1075 | ||
997 | /* Where to copy to? */ | 1076 | /* Where to copy to? */ |
998 | if (skb_tailroom(skb) > 0) { | 1077 | if (skb_availroom(skb) > 0) { |
999 | /* We have some space in skb head. Superb! */ | 1078 | /* We have some space in skb head. Superb! */ |
1000 | if (copy > skb_tailroom(skb)) | 1079 | copy = min_t(int, copy, skb_availroom(skb)); |
1001 | copy = skb_tailroom(skb); | ||
1002 | err = skb_add_data_nocache(sk, skb, from, copy); | 1080 | err = skb_add_data_nocache(sk, skb, from, copy); |
1003 | if (err) | 1081 | if (err) |
1004 | goto do_fault; | 1082 | goto do_fault; |
1005 | } else { | 1083 | } else { |
1006 | int merge = 0; | 1084 | bool merge = false; |
1007 | int i = skb_shinfo(skb)->nr_frags; | 1085 | int i = skb_shinfo(skb)->nr_frags; |
1008 | struct page *page = sk->sk_sndmsg_page; | 1086 | struct page *page = sk->sk_sndmsg_page; |
1009 | int off; | 1087 | int off; |
@@ -1017,7 +1095,7 @@ new_segment: | |||
1017 | off != PAGE_SIZE) { | 1095 | off != PAGE_SIZE) { |
1018 | /* We can extend the last page | 1096 | /* We can extend the last page |
1019 | * fragment. */ | 1097 | * fragment. */ |
1020 | merge = 1; | 1098 | merge = true; |
1021 | } else if (i == MAX_SKB_FRAGS || !sg) { | 1099 | } else if (i == MAX_SKB_FRAGS || !sg) { |
1022 | /* Need to add new fragment and cannot | 1100 | /* Need to add new fragment and cannot |
1023 | * do this because interface is non-SG, | 1101 | * do this because interface is non-SG, |
@@ -1089,7 +1167,7 @@ new_segment: | |||
1089 | if ((seglen -= copy) == 0 && iovlen == 0) | 1167 | if ((seglen -= copy) == 0 && iovlen == 0) |
1090 | goto out; | 1168 | goto out; |
1091 | 1169 | ||
1092 | if (skb->len < max || (flags & MSG_OOB)) | 1170 | if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) |
1093 | continue; | 1171 | continue; |
1094 | 1172 | ||
1095 | if (forced_push(tp)) { | 1173 | if (forced_push(tp)) { |
@@ -1102,7 +1180,7 @@ new_segment: | |||
1102 | wait_for_sndbuf: | 1180 | wait_for_sndbuf: |
1103 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1181 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1104 | wait_for_memory: | 1182 | wait_for_memory: |
1105 | if (copied) | 1183 | if (copied && likely(!tp->repair)) |
1106 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 1184 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); |
1107 | 1185 | ||
1108 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1186 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
@@ -1113,7 +1191,7 @@ wait_for_memory: | |||
1113 | } | 1191 | } |
1114 | 1192 | ||
1115 | out: | 1193 | out: |
1116 | if (copied) | 1194 | if (copied && likely(!tp->repair)) |
1117 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1195 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1118 | release_sock(sk); | 1196 | release_sock(sk); |
1119 | return copied; | 1197 | return copied; |
@@ -1187,6 +1265,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) | |||
1187 | return -EAGAIN; | 1265 | return -EAGAIN; |
1188 | } | 1266 | } |
1189 | 1267 | ||
1268 | static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) | ||
1269 | { | ||
1270 | struct sk_buff *skb; | ||
1271 | int copied = 0, err = 0; | ||
1272 | |||
1273 | /* XXX -- need to support SO_PEEK_OFF */ | ||
1274 | |||
1275 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
1276 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len); | ||
1277 | if (err) | ||
1278 | break; | ||
1279 | |||
1280 | copied += skb->len; | ||
1281 | } | ||
1282 | |||
1283 | return err ?: copied; | ||
1284 | } | ||
1285 | |||
1190 | /* Clean up the receive buffer for full frames taken by the user, | 1286 | /* Clean up the receive buffer for full frames taken by the user, |
1191 | * then send an ACK if necessary. COPIED is the number of bytes | 1287 | * then send an ACK if necessary. COPIED is the number of bytes |
1192 | * tcp_recvmsg has given to the user so far, it speeds up the | 1288 | * tcp_recvmsg has given to the user so far, it speeds up the |
@@ -1196,7 +1292,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) | |||
1196 | void tcp_cleanup_rbuf(struct sock *sk, int copied) | 1292 | void tcp_cleanup_rbuf(struct sock *sk, int copied) |
1197 | { | 1293 | { |
1198 | struct tcp_sock *tp = tcp_sk(sk); | 1294 | struct tcp_sock *tp = tcp_sk(sk); |
1199 | int time_to_ack = 0; | 1295 | bool time_to_ack = false; |
1200 | 1296 | ||
1201 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | 1297 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); |
1202 | 1298 | ||
@@ -1222,7 +1318,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) | |||
1222 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && | 1318 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && |
1223 | !icsk->icsk_ack.pingpong)) && | 1319 | !icsk->icsk_ack.pingpong)) && |
1224 | !atomic_read(&sk->sk_rmem_alloc))) | 1320 | !atomic_read(&sk->sk_rmem_alloc))) |
1225 | time_to_ack = 1; | 1321 | time_to_ack = true; |
1226 | } | 1322 | } |
1227 | 1323 | ||
1228 | /* We send an ACK if we can now advertise a non-zero window | 1324 | /* We send an ACK if we can now advertise a non-zero window |
@@ -1244,7 +1340,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) | |||
1244 | * "Lots" means "at least twice" here. | 1340 | * "Lots" means "at least twice" here. |
1245 | */ | 1341 | */ |
1246 | if (new_window && new_window >= 2 * rcv_window_now) | 1342 | if (new_window && new_window >= 2 * rcv_window_now) |
1247 | time_to_ack = 1; | 1343 | time_to_ack = true; |
1248 | } | 1344 | } |
1249 | } | 1345 | } |
1250 | if (time_to_ack) | 1346 | if (time_to_ack) |
@@ -1376,11 +1472,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1376 | break; | 1472 | break; |
1377 | } | 1473 | } |
1378 | if (tcp_hdr(skb)->fin) { | 1474 | if (tcp_hdr(skb)->fin) { |
1379 | sk_eat_skb(sk, skb, 0); | 1475 | sk_eat_skb(sk, skb, false); |
1380 | ++seq; | 1476 | ++seq; |
1381 | break; | 1477 | break; |
1382 | } | 1478 | } |
1383 | sk_eat_skb(sk, skb, 0); | 1479 | sk_eat_skb(sk, skb, false); |
1384 | if (!desc->count) | 1480 | if (!desc->count) |
1385 | break; | 1481 | break; |
1386 | tp->copied_seq = seq; | 1482 | tp->copied_seq = seq; |
@@ -1416,7 +1512,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1416 | int target; /* Read at least this many bytes */ | 1512 | int target; /* Read at least this many bytes */ |
1417 | long timeo; | 1513 | long timeo; |
1418 | struct task_struct *user_recv = NULL; | 1514 | struct task_struct *user_recv = NULL; |
1419 | int copied_early = 0; | 1515 | bool copied_early = false; |
1420 | struct sk_buff *skb; | 1516 | struct sk_buff *skb; |
1421 | u32 urg_hole = 0; | 1517 | u32 urg_hole = 0; |
1422 | 1518 | ||
@@ -1432,6 +1528,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1432 | if (flags & MSG_OOB) | 1528 | if (flags & MSG_OOB) |
1433 | goto recv_urg; | 1529 | goto recv_urg; |
1434 | 1530 | ||
1531 | if (unlikely(tp->repair)) { | ||
1532 | err = -EPERM; | ||
1533 | if (!(flags & MSG_PEEK)) | ||
1534 | goto out; | ||
1535 | |||
1536 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
1537 | goto recv_sndq; | ||
1538 | |||
1539 | err = -EINVAL; | ||
1540 | if (tp->repair_queue == TCP_NO_QUEUE) | ||
1541 | goto out; | ||
1542 | |||
1543 | /* 'common' recv queue MSG_PEEK-ing */ | ||
1544 | } | ||
1545 | |||
1435 | seq = &tp->copied_seq; | 1546 | seq = &tp->copied_seq; |
1436 | if (flags & MSG_PEEK) { | 1547 | if (flags & MSG_PEEK) { |
1437 | peek_seq = tp->copied_seq; | 1548 | peek_seq = tp->copied_seq; |
@@ -1452,7 +1563,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1452 | if ((available < target) && | 1563 | if ((available < target) && |
1453 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && | 1564 | (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && |
1454 | !sysctl_tcp_low_latency && | 1565 | !sysctl_tcp_low_latency && |
1455 | dma_find_channel(DMA_MEMCPY)) { | 1566 | net_dma_find_channel()) { |
1456 | preempt_enable_no_resched(); | 1567 | preempt_enable_no_resched(); |
1457 | tp->ucopy.pinned_list = | 1568 | tp->ucopy.pinned_list = |
1458 | dma_pin_iovec_pages(msg->msg_iov, len); | 1569 | dma_pin_iovec_pages(msg->msg_iov, len); |
@@ -1633,9 +1744,9 @@ do_prequeue: | |||
1633 | } | 1744 | } |
1634 | if ((flags & MSG_PEEK) && | 1745 | if ((flags & MSG_PEEK) && |
1635 | (peek_seq - copied - urg_hole != tp->copied_seq)) { | 1746 | (peek_seq - copied - urg_hole != tp->copied_seq)) { |
1636 | if (net_ratelimit()) | 1747 | net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", |
1637 | printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", | 1748 | current->comm, |
1638 | current->comm, task_pid_nr(current)); | 1749 | task_pid_nr(current)); |
1639 | peek_seq = tp->copied_seq; | 1750 | peek_seq = tp->copied_seq; |
1640 | } | 1751 | } |
1641 | continue; | 1752 | continue; |
@@ -1667,7 +1778,7 @@ do_prequeue: | |||
1667 | if (!(flags & MSG_TRUNC)) { | 1778 | if (!(flags & MSG_TRUNC)) { |
1668 | #ifdef CONFIG_NET_DMA | 1779 | #ifdef CONFIG_NET_DMA |
1669 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 1780 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
1670 | tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); | 1781 | tp->ucopy.dma_chan = net_dma_find_channel(); |
1671 | 1782 | ||
1672 | if (tp->ucopy.dma_chan) { | 1783 | if (tp->ucopy.dma_chan) { |
1673 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( | 1784 | tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( |
@@ -1689,7 +1800,7 @@ do_prequeue: | |||
1689 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); | 1800 | dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); |
1690 | 1801 | ||
1691 | if ((offset + used) == skb->len) | 1802 | if ((offset + used) == skb->len) |
1692 | copied_early = 1; | 1803 | copied_early = true; |
1693 | 1804 | ||
1694 | } else | 1805 | } else |
1695 | #endif | 1806 | #endif |
@@ -1723,7 +1834,7 @@ skip_copy: | |||
1723 | goto found_fin_ok; | 1834 | goto found_fin_ok; |
1724 | if (!(flags & MSG_PEEK)) { | 1835 | if (!(flags & MSG_PEEK)) { |
1725 | sk_eat_skb(sk, skb, copied_early); | 1836 | sk_eat_skb(sk, skb, copied_early); |
1726 | copied_early = 0; | 1837 | copied_early = false; |
1727 | } | 1838 | } |
1728 | continue; | 1839 | continue; |
1729 | 1840 | ||
@@ -1732,7 +1843,7 @@ skip_copy: | |||
1732 | ++*seq; | 1843 | ++*seq; |
1733 | if (!(flags & MSG_PEEK)) { | 1844 | if (!(flags & MSG_PEEK)) { |
1734 | sk_eat_skb(sk, skb, copied_early); | 1845 | sk_eat_skb(sk, skb, copied_early); |
1735 | copied_early = 0; | 1846 | copied_early = false; |
1736 | } | 1847 | } |
1737 | break; | 1848 | break; |
1738 | } while (len > 0); | 1849 | } while (len > 0); |
@@ -1783,6 +1894,10 @@ out: | |||
1783 | recv_urg: | 1894 | recv_urg: |
1784 | err = tcp_recv_urg(sk, msg, len, flags); | 1895 | err = tcp_recv_urg(sk, msg, len, flags); |
1785 | goto out; | 1896 | goto out; |
1897 | |||
1898 | recv_sndq: | ||
1899 | err = tcp_peek_sndq(sk, msg, len); | ||
1900 | goto out; | ||
1786 | } | 1901 | } |
1787 | EXPORT_SYMBOL(tcp_recvmsg); | 1902 | EXPORT_SYMBOL(tcp_recvmsg); |
1788 | 1903 | ||
@@ -1886,10 +2001,10 @@ bool tcp_check_oom(struct sock *sk, int shift) | |||
1886 | too_many_orphans = tcp_too_many_orphans(sk, shift); | 2001 | too_many_orphans = tcp_too_many_orphans(sk, shift); |
1887 | out_of_socket_memory = tcp_out_of_memory(sk); | 2002 | out_of_socket_memory = tcp_out_of_memory(sk); |
1888 | 2003 | ||
1889 | if (too_many_orphans && net_ratelimit()) | 2004 | if (too_many_orphans) |
1890 | pr_info("too many orphaned sockets\n"); | 2005 | net_info_ratelimited("too many orphaned sockets\n"); |
1891 | if (out_of_socket_memory && net_ratelimit()) | 2006 | if (out_of_socket_memory) |
1892 | pr_info("out of memory -- consider tuning tcp_mem\n"); | 2007 | net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); |
1893 | return too_many_orphans || out_of_socket_memory; | 2008 | return too_many_orphans || out_of_socket_memory; |
1894 | } | 2009 | } |
1895 | 2010 | ||
@@ -1935,7 +2050,9 @@ void tcp_close(struct sock *sk, long timeout) | |||
1935 | * advertise a zero window, then kill -9 the FTP client, wheee... | 2050 | * advertise a zero window, then kill -9 the FTP client, wheee... |
1936 | * Note: timeout is always zero in such a case. | 2051 | * Note: timeout is always zero in such a case. |
1937 | */ | 2052 | */ |
1938 | if (data_was_unread) { | 2053 | if (unlikely(tcp_sk(sk)->repair)) { |
2054 | sk->sk_prot->disconnect(sk, 0); | ||
2055 | } else if (data_was_unread) { | ||
1939 | /* Unread data was tossed, zap the connection. */ | 2056 | /* Unread data was tossed, zap the connection. */ |
1940 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); | 2057 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); |
1941 | tcp_set_state(sk, TCP_CLOSE); | 2058 | tcp_set_state(sk, TCP_CLOSE); |
@@ -2053,7 +2170,7 @@ EXPORT_SYMBOL(tcp_close); | |||
2053 | 2170 | ||
2054 | /* These states need RST on ABORT according to RFC793 */ | 2171 | /* These states need RST on ABORT according to RFC793 */ |
2055 | 2172 | ||
2056 | static inline int tcp_need_reset(int state) | 2173 | static inline bool tcp_need_reset(int state) |
2057 | { | 2174 | { |
2058 | return (1 << state) & | 2175 | return (1 << state) & |
2059 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | | 2176 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | |
@@ -2074,6 +2191,8 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2074 | /* ABORT function of RFC793 */ | 2191 | /* ABORT function of RFC793 */ |
2075 | if (old_state == TCP_LISTEN) { | 2192 | if (old_state == TCP_LISTEN) { |
2076 | inet_csk_listen_stop(sk); | 2193 | inet_csk_listen_stop(sk); |
2194 | } else if (unlikely(tp->repair)) { | ||
2195 | sk->sk_err = ECONNABORTED; | ||
2077 | } else if (tcp_need_reset(old_state) || | 2196 | } else if (tcp_need_reset(old_state) || |
2078 | (tp->snd_nxt != tp->write_seq && | 2197 | (tp->snd_nxt != tp->write_seq && |
2079 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { | 2198 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { |
@@ -2125,6 +2244,54 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2125 | } | 2244 | } |
2126 | EXPORT_SYMBOL(tcp_disconnect); | 2245 | EXPORT_SYMBOL(tcp_disconnect); |
2127 | 2246 | ||
2247 | static inline bool tcp_can_repair_sock(const struct sock *sk) | ||
2248 | { | ||
2249 | return capable(CAP_NET_ADMIN) && | ||
2250 | ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); | ||
2251 | } | ||
2252 | |||
2253 | static int tcp_repair_options_est(struct tcp_sock *tp, | ||
2254 | struct tcp_repair_opt __user *optbuf, unsigned int len) | ||
2255 | { | ||
2256 | struct tcp_repair_opt opt; | ||
2257 | |||
2258 | while (len >= sizeof(opt)) { | ||
2259 | if (copy_from_user(&opt, optbuf, sizeof(opt))) | ||
2260 | return -EFAULT; | ||
2261 | |||
2262 | optbuf++; | ||
2263 | len -= sizeof(opt); | ||
2264 | |||
2265 | switch (opt.opt_code) { | ||
2266 | case TCPOPT_MSS: | ||
2267 | tp->rx_opt.mss_clamp = opt.opt_val; | ||
2268 | break; | ||
2269 | case TCPOPT_WINDOW: | ||
2270 | if (opt.opt_val > 14) | ||
2271 | return -EFBIG; | ||
2272 | |||
2273 | tp->rx_opt.snd_wscale = opt.opt_val; | ||
2274 | break; | ||
2275 | case TCPOPT_SACK_PERM: | ||
2276 | if (opt.opt_val != 0) | ||
2277 | return -EINVAL; | ||
2278 | |||
2279 | tp->rx_opt.sack_ok |= TCP_SACK_SEEN; | ||
2280 | if (sysctl_tcp_fack) | ||
2281 | tcp_enable_fack(tp); | ||
2282 | break; | ||
2283 | case TCPOPT_TIMESTAMP: | ||
2284 | if (opt.opt_val != 0) | ||
2285 | return -EINVAL; | ||
2286 | |||
2287 | tp->rx_opt.tstamp_ok = 1; | ||
2288 | break; | ||
2289 | } | ||
2290 | } | ||
2291 | |||
2292 | return 0; | ||
2293 | } | ||
2294 | |||
2128 | /* | 2295 | /* |
2129 | * Socket option code for TCP. | 2296 | * Socket option code for TCP. |
2130 | */ | 2297 | */ |
@@ -2295,6 +2462,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2295 | err = -EINVAL; | 2462 | err = -EINVAL; |
2296 | else | 2463 | else |
2297 | tp->thin_dupack = val; | 2464 | tp->thin_dupack = val; |
2465 | if (tp->thin_dupack) | ||
2466 | tcp_disable_early_retrans(tp); | ||
2467 | break; | ||
2468 | |||
2469 | case TCP_REPAIR: | ||
2470 | if (!tcp_can_repair_sock(sk)) | ||
2471 | err = -EPERM; | ||
2472 | else if (val == 1) { | ||
2473 | tp->repair = 1; | ||
2474 | sk->sk_reuse = SK_FORCE_REUSE; | ||
2475 | tp->repair_queue = TCP_NO_QUEUE; | ||
2476 | } else if (val == 0) { | ||
2477 | tp->repair = 0; | ||
2478 | sk->sk_reuse = SK_NO_REUSE; | ||
2479 | tcp_send_window_probe(sk); | ||
2480 | } else | ||
2481 | err = -EINVAL; | ||
2482 | |||
2483 | break; | ||
2484 | |||
2485 | case TCP_REPAIR_QUEUE: | ||
2486 | if (!tp->repair) | ||
2487 | err = -EPERM; | ||
2488 | else if (val < TCP_QUEUES_NR) | ||
2489 | tp->repair_queue = val; | ||
2490 | else | ||
2491 | err = -EINVAL; | ||
2492 | break; | ||
2493 | |||
2494 | case TCP_QUEUE_SEQ: | ||
2495 | if (sk->sk_state != TCP_CLOSE) | ||
2496 | err = -EPERM; | ||
2497 | else if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2498 | tp->write_seq = val; | ||
2499 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2500 | tp->rcv_nxt = val; | ||
2501 | else | ||
2502 | err = -EINVAL; | ||
2503 | break; | ||
2504 | |||
2505 | case TCP_REPAIR_OPTIONS: | ||
2506 | if (!tp->repair) | ||
2507 | err = -EINVAL; | ||
2508 | else if (sk->sk_state == TCP_ESTABLISHED) | ||
2509 | err = tcp_repair_options_est(tp, | ||
2510 | (struct tcp_repair_opt __user *)optval, | ||
2511 | optlen); | ||
2512 | else | ||
2513 | err = -EPERM; | ||
2298 | break; | 2514 | break; |
2299 | 2515 | ||
2300 | case TCP_CORK: | 2516 | case TCP_CORK: |
@@ -2530,6 +2746,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2530 | val = tp->mss_cache; | 2746 | val = tp->mss_cache; |
2531 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) | 2747 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
2532 | val = tp->rx_opt.user_mss; | 2748 | val = tp->rx_opt.user_mss; |
2749 | if (tp->repair) | ||
2750 | val = tp->rx_opt.mss_clamp; | ||
2533 | break; | 2751 | break; |
2534 | case TCP_NODELAY: | 2752 | case TCP_NODELAY: |
2535 | val = !!(tp->nonagle&TCP_NAGLE_OFF); | 2753 | val = !!(tp->nonagle&TCP_NAGLE_OFF); |
@@ -2632,6 +2850,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2632 | val = tp->thin_dupack; | 2850 | val = tp->thin_dupack; |
2633 | break; | 2851 | break; |
2634 | 2852 | ||
2853 | case TCP_REPAIR: | ||
2854 | val = tp->repair; | ||
2855 | break; | ||
2856 | |||
2857 | case TCP_REPAIR_QUEUE: | ||
2858 | if (tp->repair) | ||
2859 | val = tp->repair_queue; | ||
2860 | else | ||
2861 | return -EINVAL; | ||
2862 | break; | ||
2863 | |||
2864 | case TCP_QUEUE_SEQ: | ||
2865 | if (tp->repair_queue == TCP_SEND_QUEUE) | ||
2866 | val = tp->write_seq; | ||
2867 | else if (tp->repair_queue == TCP_RECV_QUEUE) | ||
2868 | val = tp->rcv_nxt; | ||
2869 | else | ||
2870 | return -EINVAL; | ||
2871 | break; | ||
2872 | |||
2635 | case TCP_USER_TIMEOUT: | 2873 | case TCP_USER_TIMEOUT: |
2636 | val = jiffies_to_msecs(icsk->icsk_user_timeout); | 2874 | val = jiffies_to_msecs(icsk->icsk_user_timeout); |
2637 | break; | 2875 | break; |
@@ -2675,7 +2913,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
2675 | { | 2913 | { |
2676 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2914 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2677 | struct tcphdr *th; | 2915 | struct tcphdr *th; |
2678 | unsigned thlen; | 2916 | unsigned int thlen; |
2679 | unsigned int seq; | 2917 | unsigned int seq; |
2680 | __be32 delta; | 2918 | __be32 delta; |
2681 | unsigned int oldlen; | 2919 | unsigned int oldlen; |
@@ -2933,13 +3171,13 @@ out_free: | |||
2933 | struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) | 3171 | struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) |
2934 | { | 3172 | { |
2935 | struct tcp_md5sig_pool __percpu *pool; | 3173 | struct tcp_md5sig_pool __percpu *pool; |
2936 | int alloc = 0; | 3174 | bool alloc = false; |
2937 | 3175 | ||
2938 | retry: | 3176 | retry: |
2939 | spin_lock_bh(&tcp_md5sig_pool_lock); | 3177 | spin_lock_bh(&tcp_md5sig_pool_lock); |
2940 | pool = tcp_md5sig_pool; | 3178 | pool = tcp_md5sig_pool; |
2941 | if (tcp_md5sig_users++ == 0) { | 3179 | if (tcp_md5sig_users++ == 0) { |
2942 | alloc = 1; | 3180 | alloc = true; |
2943 | spin_unlock_bh(&tcp_md5sig_pool_lock); | 3181 | spin_unlock_bh(&tcp_md5sig_pool_lock); |
2944 | } else if (!pool) { | 3182 | } else if (!pool) { |
2945 | tcp_md5sig_users--; | 3183 | tcp_md5sig_users--; |
@@ -3033,9 +3271,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3033 | struct scatterlist sg; | 3271 | struct scatterlist sg; |
3034 | const struct tcphdr *tp = tcp_hdr(skb); | 3272 | const struct tcphdr *tp = tcp_hdr(skb); |
3035 | struct hash_desc *desc = &hp->md5_desc; | 3273 | struct hash_desc *desc = &hp->md5_desc; |
3036 | unsigned i; | 3274 | unsigned int i; |
3037 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | 3275 | const unsigned int head_data_len = skb_headlen(skb) > header_len ? |
3038 | skb_headlen(skb) - header_len : 0; | 3276 | skb_headlen(skb) - header_len : 0; |
3039 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3277 | const struct skb_shared_info *shi = skb_shinfo(skb); |
3040 | struct sk_buff *frag_iter; | 3278 | struct sk_buff *frag_iter; |
3041 | 3279 | ||
@@ -3223,9 +3461,15 @@ extern struct tcp_congestion_ops tcp_reno; | |||
3223 | static __initdata unsigned long thash_entries; | 3461 | static __initdata unsigned long thash_entries; |
3224 | static int __init set_thash_entries(char *str) | 3462 | static int __init set_thash_entries(char *str) |
3225 | { | 3463 | { |
3464 | ssize_t ret; | ||
3465 | |||
3226 | if (!str) | 3466 | if (!str) |
3227 | return 0; | 3467 | return 0; |
3228 | thash_entries = simple_strtoul(str, &str, 0); | 3468 | |
3469 | ret = kstrtoul(str, 0, &thash_entries); | ||
3470 | if (ret) | ||
3471 | return 0; | ||
3472 | |||
3229 | return 1; | 3473 | return 1; |
3230 | } | 3474 | } |
3231 | __setup("thash_entries=", set_thash_entries); | 3475 | __setup("thash_entries=", set_thash_entries); |
@@ -3243,7 +3487,7 @@ void __init tcp_init(void) | |||
3243 | { | 3487 | { |
3244 | struct sk_buff *skb = NULL; | 3488 | struct sk_buff *skb = NULL; |
3245 | unsigned long limit; | 3489 | unsigned long limit; |
3246 | int max_share, cnt; | 3490 | int max_rshare, max_wshare, cnt; |
3247 | unsigned int i; | 3491 | unsigned int i; |
3248 | unsigned long jiffy = jiffies; | 3492 | unsigned long jiffy = jiffies; |
3249 | 3493 | ||
@@ -3302,17 +3546,17 @@ void __init tcp_init(void) | |||
3302 | 3546 | ||
3303 | tcp_init_mem(&init_net); | 3547 | tcp_init_mem(&init_net); |
3304 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | 3548 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ |
3305 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); | 3549 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); |
3306 | limit = max(limit, 128UL); | 3550 | max_wshare = min(4UL*1024*1024, limit); |
3307 | max_share = min(4UL*1024*1024, limit); | 3551 | max_rshare = min(6UL*1024*1024, limit); |
3308 | 3552 | ||
3309 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; | 3553 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; |
3310 | sysctl_tcp_wmem[1] = 16*1024; | 3554 | sysctl_tcp_wmem[1] = 16*1024; |
3311 | sysctl_tcp_wmem[2] = max(64*1024, max_share); | 3555 | sysctl_tcp_wmem[2] = max(64*1024, max_wshare); |
3312 | 3556 | ||
3313 | sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; | 3557 | sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; |
3314 | sysctl_tcp_rmem[1] = 87380; | 3558 | sysctl_tcp_rmem[1] = 87380; |
3315 | sysctl_tcp_rmem[2] = max(87380, max_share); | 3559 | sysctl_tcp_rmem[2] = max(87380, max_rshare); |
3316 | 3560 | ||
3317 | pr_info("Hash tables configured (established %u bind %u)\n", | 3561 | pr_info("Hash tables configured (established %u bind %u)\n", |
3318 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); | 3562 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 272a84593c85..04dbd7ae7c62 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -280,19 +280,19 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
280 | /* RFC2861 Check whether we are limited by application or congestion window | 280 | /* RFC2861 Check whether we are limited by application or congestion window |
281 | * This is the inverse of cwnd check in tcp_tso_should_defer | 281 | * This is the inverse of cwnd check in tcp_tso_should_defer |
282 | */ | 282 | */ |
283 | int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | 283 | bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) |
284 | { | 284 | { |
285 | const struct tcp_sock *tp = tcp_sk(sk); | 285 | const struct tcp_sock *tp = tcp_sk(sk); |
286 | u32 left; | 286 | u32 left; |
287 | 287 | ||
288 | if (in_flight >= tp->snd_cwnd) | 288 | if (in_flight >= tp->snd_cwnd) |
289 | return 1; | 289 | return true; |
290 | 290 | ||
291 | left = tp->snd_cwnd - in_flight; | 291 | left = tp->snd_cwnd - in_flight; |
292 | if (sk_can_gso(sk) && | 292 | if (sk_can_gso(sk) && |
293 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | 293 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && |
294 | left * tp->mss_cache < sk->sk_gso_max_size) | 294 | left * tp->mss_cache < sk->sk_gso_max_size) |
295 | return 1; | 295 | return true; |
296 | return left <= tcp_max_tso_deferred_mss(tp); | 296 | return left <= tcp_max_tso_deferred_mss(tp); |
297 | } | 297 | } |
298 | EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); | 298 | EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index fe3ecf484b44..57bdd17dff4d 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | /* Tcp Hybla structure. */ | 16 | /* Tcp Hybla structure. */ |
17 | struct hybla { | 17 | struct hybla { |
18 | u8 hybla_en; | 18 | bool hybla_en; |
19 | u32 snd_cwnd_cents; /* Keeps increment values when it is <1, <<7 */ | 19 | u32 snd_cwnd_cents; /* Keeps increment values when it is <1, <<7 */ |
20 | u32 rho; /* Rho parameter, integer part */ | 20 | u32 rho; /* Rho parameter, integer part */ |
21 | u32 rho2; /* Rho * Rho, integer part */ | 21 | u32 rho2; /* Rho * Rho, integer part */ |
@@ -24,8 +24,7 @@ struct hybla { | |||
24 | u32 minrtt; /* Minimum smoothed round trip time value seen */ | 24 | u32 minrtt; /* Minimum smoothed round trip time value seen */ |
25 | }; | 25 | }; |
26 | 26 | ||
27 | /* Hybla reference round trip time (default= 1/40 sec = 25 ms), | 27 | /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ |
28 | expressed in jiffies */ | ||
29 | static int rtt0 = 25; | 28 | static int rtt0 = 25; |
30 | module_param(rtt0, int, 0644); | 29 | module_param(rtt0, int, 0644); |
31 | MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); | 30 | MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); |
@@ -39,7 +38,7 @@ static inline void hybla_recalc_param (struct sock *sk) | |||
39 | ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); | 38 | ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); |
40 | ca->rho = ca->rho_3ls >> 3; | 39 | ca->rho = ca->rho_3ls >> 3; |
41 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; | 40 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; |
42 | ca->rho2 = ca->rho2_7ls >>7; | 41 | ca->rho2 = ca->rho2_7ls >> 7; |
43 | } | 42 | } |
44 | 43 | ||
45 | static void hybla_init(struct sock *sk) | 44 | static void hybla_init(struct sock *sk) |
@@ -52,7 +51,7 @@ static void hybla_init(struct sock *sk) | |||
52 | ca->rho_3ls = 0; | 51 | ca->rho_3ls = 0; |
53 | ca->rho2_7ls = 0; | 52 | ca->rho2_7ls = 0; |
54 | ca->snd_cwnd_cents = 0; | 53 | ca->snd_cwnd_cents = 0; |
55 | ca->hybla_en = 1; | 54 | ca->hybla_en = true; |
56 | tp->snd_cwnd = 2; | 55 | tp->snd_cwnd = 2; |
57 | tp->snd_cwnd_clamp = 65535; | 56 | tp->snd_cwnd_clamp = 65535; |
58 | 57 | ||
@@ -67,6 +66,7 @@ static void hybla_init(struct sock *sk) | |||
67 | static void hybla_state(struct sock *sk, u8 ca_state) | 66 | static void hybla_state(struct sock *sk, u8 ca_state) |
68 | { | 67 | { |
69 | struct hybla *ca = inet_csk_ca(sk); | 68 | struct hybla *ca = inet_csk_ca(sk); |
69 | |||
70 | ca->hybla_en = (ca_state == TCP_CA_Open); | 70 | ca->hybla_en = (ca_state == TCP_CA_Open); |
71 | } | 71 | } |
72 | 72 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e886e2f7fa8d..cfa2aa128342 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2; | |||
85 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 85 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
86 | int sysctl_tcp_dsack __read_mostly = 1; | 86 | int sysctl_tcp_dsack __read_mostly = 1; |
87 | int sysctl_tcp_app_win __read_mostly = 31; | 87 | int sysctl_tcp_app_win __read_mostly = 31; |
88 | int sysctl_tcp_adv_win_scale __read_mostly = 2; | 88 | int sysctl_tcp_adv_win_scale __read_mostly = 1; |
89 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | 89 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); |
90 | 90 | ||
91 | int sysctl_tcp_stdurg __read_mostly; | 91 | int sysctl_tcp_stdurg __read_mostly; |
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly; | |||
99 | 99 | ||
100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
101 | int sysctl_tcp_abc __read_mostly; | 101 | int sysctl_tcp_abc __read_mostly; |
102 | int sysctl_tcp_early_retrans __read_mostly = 2; | ||
102 | 103 | ||
103 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 104 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
104 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 105 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -175,7 +176,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) | |||
175 | static void tcp_incr_quickack(struct sock *sk) | 176 | static void tcp_incr_quickack(struct sock *sk) |
176 | { | 177 | { |
177 | struct inet_connection_sock *icsk = inet_csk(sk); | 178 | struct inet_connection_sock *icsk = inet_csk(sk); |
178 | unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); | 179 | unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); |
179 | 180 | ||
180 | if (quickacks == 0) | 181 | if (quickacks == 0) |
181 | quickacks = 2; | 182 | quickacks = 2; |
@@ -195,9 +196,10 @@ static void tcp_enter_quickack_mode(struct sock *sk) | |||
195 | * and the session is not interactive. | 196 | * and the session is not interactive. |
196 | */ | 197 | */ |
197 | 198 | ||
198 | static inline int tcp_in_quickack_mode(const struct sock *sk) | 199 | static inline bool tcp_in_quickack_mode(const struct sock *sk) |
199 | { | 200 | { |
200 | const struct inet_connection_sock *icsk = inet_csk(sk); | 201 | const struct inet_connection_sock *icsk = inet_csk(sk); |
202 | |||
201 | return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; | 203 | return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; |
202 | } | 204 | } |
203 | 205 | ||
@@ -252,11 +254,11 @@ static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) | |||
252 | tp->ecn_flags &= ~TCP_ECN_OK; | 254 | tp->ecn_flags &= ~TCP_ECN_OK; |
253 | } | 255 | } |
254 | 256 | ||
255 | static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) | 257 | static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) |
256 | { | 258 | { |
257 | if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) | 259 | if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) |
258 | return 1; | 260 | return true; |
259 | return 0; | 261 | return false; |
260 | } | 262 | } |
261 | 263 | ||
262 | /* Buffer size and advertised window tuning. | 264 | /* Buffer size and advertised window tuning. |
@@ -335,6 +337,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) | |||
335 | incr = __tcp_grow_window(sk, skb); | 337 | incr = __tcp_grow_window(sk, skb); |
336 | 338 | ||
337 | if (incr) { | 339 | if (incr) { |
340 | incr = max_t(int, incr, 2 * skb->len); | ||
338 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, | 341 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, |
339 | tp->window_clamp); | 342 | tp->window_clamp); |
340 | inet_csk(sk)->icsk_ack.quick |= 1; | 343 | inet_csk(sk)->icsk_ack.quick |= 1; |
@@ -474,8 +477,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
474 | if (!win_dep) { | 477 | if (!win_dep) { |
475 | m -= (new_sample >> 3); | 478 | m -= (new_sample >> 3); |
476 | new_sample += m; | 479 | new_sample += m; |
477 | } else if (m < new_sample) | 480 | } else { |
478 | new_sample = m << 3; | 481 | m <<= 3; |
482 | if (m < new_sample) | ||
483 | new_sample = m; | ||
484 | } | ||
479 | } else { | 485 | } else { |
480 | /* No previous measure. */ | 486 | /* No previous measure. */ |
481 | new_sample = m << 3; | 487 | new_sample = m << 3; |
@@ -491,7 +497,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) | |||
491 | goto new_measure; | 497 | goto new_measure; |
492 | if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) | 498 | if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) |
493 | return; | 499 | return; |
494 | tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1); | 500 | tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1); |
495 | 501 | ||
496 | new_measure: | 502 | new_measure: |
497 | tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; | 503 | tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; |
@@ -902,6 +908,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
902 | if (dst_metric(dst, RTAX_REORDERING) && | 908 | if (dst_metric(dst, RTAX_REORDERING) && |
903 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 909 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
904 | tcp_disable_fack(tp); | 910 | tcp_disable_fack(tp); |
911 | tcp_disable_early_retrans(tp); | ||
905 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 912 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
906 | } | 913 | } |
907 | 914 | ||
@@ -933,7 +940,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
933 | tcp_set_rto(sk); | 940 | tcp_set_rto(sk); |
934 | reset: | 941 | reset: |
935 | if (tp->srtt == 0) { | 942 | if (tp->srtt == 0) { |
936 | /* RFC2988bis: We've failed to get a valid RTT sample from | 943 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from |
937 | * 3WHS. This is most likely due to retransmission, | 944 | * 3WHS. This is most likely due to retransmission, |
938 | * including spurious one. Reset the RTO back to 3secs | 945 | * including spurious one. Reset the RTO back to 3secs |
939 | * from the more aggressive 1sec to avoid more spurious | 946 | * from the more aggressive 1sec to avoid more spurious |
@@ -943,7 +950,7 @@ reset: | |||
943 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | 950 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
944 | } | 951 | } |
945 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | 952 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
946 | * retransmitted. In light of RFC2988bis' more aggressive 1sec | 953 | * retransmitted. In light of RFC6298 more aggressive 1sec |
947 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | 954 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK |
948 | * retransmission has occurred. | 955 | * retransmission has occurred. |
949 | */ | 956 | */ |
@@ -975,15 +982,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
975 | 982 | ||
976 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 983 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
977 | #if FASTRETRANS_DEBUG > 1 | 984 | #if FASTRETRANS_DEBUG > 1 |
978 | printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", | 985 | pr_debug("Disorder%d %d %u f%u s%u rr%d\n", |
979 | tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, | 986 | tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, |
980 | tp->reordering, | 987 | tp->reordering, |
981 | tp->fackets_out, | 988 | tp->fackets_out, |
982 | tp->sacked_out, | 989 | tp->sacked_out, |
983 | tp->undo_marker ? tp->undo_retrans : 0); | 990 | tp->undo_marker ? tp->undo_retrans : 0); |
984 | #endif | 991 | #endif |
985 | tcp_disable_fack(tp); | 992 | tcp_disable_fack(tp); |
986 | } | 993 | } |
994 | |||
995 | if (metric > 0) | ||
996 | tcp_disable_early_retrans(tp); | ||
987 | } | 997 | } |
988 | 998 | ||
989 | /* This must be called before lost_out is incremented */ | 999 | /* This must be called before lost_out is incremented */ |
@@ -1114,36 +1124,36 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, | |||
1114 | * the exact amount is rather hard to quantify. However, tp->max_window can | 1124 | * the exact amount is rather hard to quantify. However, tp->max_window can |
1115 | * be used as an exaggerated estimate. | 1125 | * be used as an exaggerated estimate. |
1116 | */ | 1126 | */ |
1117 | static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | 1127 | static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, |
1118 | u32 start_seq, u32 end_seq) | 1128 | u32 start_seq, u32 end_seq) |
1119 | { | 1129 | { |
1120 | /* Too far in future, or reversed (interpretation is ambiguous) */ | 1130 | /* Too far in future, or reversed (interpretation is ambiguous) */ |
1121 | if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) | 1131 | if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) |
1122 | return 0; | 1132 | return false; |
1123 | 1133 | ||
1124 | /* Nasty start_seq wrap-around check (see comments above) */ | 1134 | /* Nasty start_seq wrap-around check (see comments above) */ |
1125 | if (!before(start_seq, tp->snd_nxt)) | 1135 | if (!before(start_seq, tp->snd_nxt)) |
1126 | return 0; | 1136 | return false; |
1127 | 1137 | ||
1128 | /* In outstanding window? ...This is valid exit for D-SACKs too. | 1138 | /* In outstanding window? ...This is valid exit for D-SACKs too. |
1129 | * start_seq == snd_una is non-sensical (see comments above) | 1139 | * start_seq == snd_una is non-sensical (see comments above) |
1130 | */ | 1140 | */ |
1131 | if (after(start_seq, tp->snd_una)) | 1141 | if (after(start_seq, tp->snd_una)) |
1132 | return 1; | 1142 | return true; |
1133 | 1143 | ||
1134 | if (!is_dsack || !tp->undo_marker) | 1144 | if (!is_dsack || !tp->undo_marker) |
1135 | return 0; | 1145 | return false; |
1136 | 1146 | ||
1137 | /* ...Then it's D-SACK, and must reside below snd_una completely */ | 1147 | /* ...Then it's D-SACK, and must reside below snd_una completely */ |
1138 | if (after(end_seq, tp->snd_una)) | 1148 | if (after(end_seq, tp->snd_una)) |
1139 | return 0; | 1149 | return false; |
1140 | 1150 | ||
1141 | if (!before(start_seq, tp->undo_marker)) | 1151 | if (!before(start_seq, tp->undo_marker)) |
1142 | return 1; | 1152 | return true; |
1143 | 1153 | ||
1144 | /* Too old */ | 1154 | /* Too old */ |
1145 | if (!after(end_seq, tp->undo_marker)) | 1155 | if (!after(end_seq, tp->undo_marker)) |
1146 | return 0; | 1156 | return false; |
1147 | 1157 | ||
1148 | /* Undo_marker boundary crossing (overestimates a lot). Known already: | 1158 | /* Undo_marker boundary crossing (overestimates a lot). Known already: |
1149 | * start_seq < undo_marker and end_seq >= undo_marker. | 1159 | * start_seq < undo_marker and end_seq >= undo_marker. |
@@ -1215,17 +1225,17 @@ static void tcp_mark_lost_retrans(struct sock *sk) | |||
1215 | tp->lost_retrans_low = new_low_seq; | 1225 | tp->lost_retrans_low = new_low_seq; |
1216 | } | 1226 | } |
1217 | 1227 | ||
1218 | static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, | 1228 | static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, |
1219 | struct tcp_sack_block_wire *sp, int num_sacks, | 1229 | struct tcp_sack_block_wire *sp, int num_sacks, |
1220 | u32 prior_snd_una) | 1230 | u32 prior_snd_una) |
1221 | { | 1231 | { |
1222 | struct tcp_sock *tp = tcp_sk(sk); | 1232 | struct tcp_sock *tp = tcp_sk(sk); |
1223 | u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); | 1233 | u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); |
1224 | u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); | 1234 | u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); |
1225 | int dup_sack = 0; | 1235 | bool dup_sack = false; |
1226 | 1236 | ||
1227 | if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { | 1237 | if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { |
1228 | dup_sack = 1; | 1238 | dup_sack = true; |
1229 | tcp_dsack_seen(tp); | 1239 | tcp_dsack_seen(tp); |
1230 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); | 1240 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); |
1231 | } else if (num_sacks > 1) { | 1241 | } else if (num_sacks > 1) { |
@@ -1234,7 +1244,7 @@ static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, | |||
1234 | 1244 | ||
1235 | if (!after(end_seq_0, end_seq_1) && | 1245 | if (!after(end_seq_0, end_seq_1) && |
1236 | !before(start_seq_0, start_seq_1)) { | 1246 | !before(start_seq_0, start_seq_1)) { |
1237 | dup_sack = 1; | 1247 | dup_sack = true; |
1238 | tcp_dsack_seen(tp); | 1248 | tcp_dsack_seen(tp); |
1239 | NET_INC_STATS_BH(sock_net(sk), | 1249 | NET_INC_STATS_BH(sock_net(sk), |
1240 | LINUX_MIB_TCPDSACKOFORECV); | 1250 | LINUX_MIB_TCPDSACKOFORECV); |
@@ -1265,9 +1275,10 @@ struct tcp_sacktag_state { | |||
1265 | * FIXME: this could be merged to shift decision code | 1275 | * FIXME: this could be merged to shift decision code |
1266 | */ | 1276 | */ |
1267 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | 1277 | static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, |
1268 | u32 start_seq, u32 end_seq) | 1278 | u32 start_seq, u32 end_seq) |
1269 | { | 1279 | { |
1270 | int in_sack, err; | 1280 | int err; |
1281 | bool in_sack; | ||
1271 | unsigned int pkt_len; | 1282 | unsigned int pkt_len; |
1272 | unsigned int mss; | 1283 | unsigned int mss; |
1273 | 1284 | ||
@@ -1313,7 +1324,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1313 | static u8 tcp_sacktag_one(struct sock *sk, | 1324 | static u8 tcp_sacktag_one(struct sock *sk, |
1314 | struct tcp_sacktag_state *state, u8 sacked, | 1325 | struct tcp_sacktag_state *state, u8 sacked, |
1315 | u32 start_seq, u32 end_seq, | 1326 | u32 start_seq, u32 end_seq, |
1316 | int dup_sack, int pcount) | 1327 | bool dup_sack, int pcount) |
1317 | { | 1328 | { |
1318 | struct tcp_sock *tp = tcp_sk(sk); | 1329 | struct tcp_sock *tp = tcp_sk(sk); |
1319 | int fack_count = state->fack_count; | 1330 | int fack_count = state->fack_count; |
@@ -1393,10 +1404,10 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1393 | /* Shift newly-SACKed bytes from this skb to the immediately previous | 1404 | /* Shift newly-SACKed bytes from this skb to the immediately previous |
1394 | * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. | 1405 | * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. |
1395 | */ | 1406 | */ |
1396 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | 1407 | static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, |
1397 | struct tcp_sacktag_state *state, | 1408 | struct tcp_sacktag_state *state, |
1398 | unsigned int pcount, int shifted, int mss, | 1409 | unsigned int pcount, int shifted, int mss, |
1399 | int dup_sack) | 1410 | bool dup_sack) |
1400 | { | 1411 | { |
1401 | struct tcp_sock *tp = tcp_sk(sk); | 1412 | struct tcp_sock *tp = tcp_sk(sk); |
1402 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | 1413 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); |
@@ -1446,7 +1457,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1446 | if (skb->len > 0) { | 1457 | if (skb->len > 0) { |
1447 | BUG_ON(!tcp_skb_pcount(skb)); | 1458 | BUG_ON(!tcp_skb_pcount(skb)); |
1448 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); | 1459 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); |
1449 | return 0; | 1460 | return false; |
1450 | } | 1461 | } |
1451 | 1462 | ||
1452 | /* Whole SKB was eaten :-) */ | 1463 | /* Whole SKB was eaten :-) */ |
@@ -1469,7 +1480,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1469 | 1480 | ||
1470 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); | 1481 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); |
1471 | 1482 | ||
1472 | return 1; | 1483 | return true; |
1473 | } | 1484 | } |
1474 | 1485 | ||
1475 | /* I wish gso_size would have a bit more sane initialization than | 1486 | /* I wish gso_size would have a bit more sane initialization than |
@@ -1492,7 +1503,7 @@ static int skb_can_shift(const struct sk_buff *skb) | |||
1492 | static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | 1503 | static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, |
1493 | struct tcp_sacktag_state *state, | 1504 | struct tcp_sacktag_state *state, |
1494 | u32 start_seq, u32 end_seq, | 1505 | u32 start_seq, u32 end_seq, |
1495 | int dup_sack) | 1506 | bool dup_sack) |
1496 | { | 1507 | { |
1497 | struct tcp_sock *tp = tcp_sk(sk); | 1508 | struct tcp_sock *tp = tcp_sk(sk); |
1498 | struct sk_buff *prev; | 1509 | struct sk_buff *prev; |
@@ -1631,14 +1642,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1631 | struct tcp_sack_block *next_dup, | 1642 | struct tcp_sack_block *next_dup, |
1632 | struct tcp_sacktag_state *state, | 1643 | struct tcp_sacktag_state *state, |
1633 | u32 start_seq, u32 end_seq, | 1644 | u32 start_seq, u32 end_seq, |
1634 | int dup_sack_in) | 1645 | bool dup_sack_in) |
1635 | { | 1646 | { |
1636 | struct tcp_sock *tp = tcp_sk(sk); | 1647 | struct tcp_sock *tp = tcp_sk(sk); |
1637 | struct sk_buff *tmp; | 1648 | struct sk_buff *tmp; |
1638 | 1649 | ||
1639 | tcp_for_write_queue_from(skb, sk) { | 1650 | tcp_for_write_queue_from(skb, sk) { |
1640 | int in_sack = 0; | 1651 | int in_sack = 0; |
1641 | int dup_sack = dup_sack_in; | 1652 | bool dup_sack = dup_sack_in; |
1642 | 1653 | ||
1643 | if (skb == tcp_send_head(sk)) | 1654 | if (skb == tcp_send_head(sk)) |
1644 | break; | 1655 | break; |
@@ -1653,7 +1664,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1653 | next_dup->start_seq, | 1664 | next_dup->start_seq, |
1654 | next_dup->end_seq); | 1665 | next_dup->end_seq); |
1655 | if (in_sack > 0) | 1666 | if (in_sack > 0) |
1656 | dup_sack = 1; | 1667 | dup_sack = true; |
1657 | } | 1668 | } |
1658 | 1669 | ||
1659 | /* skb reference here is a bit tricky to get right, since | 1670 | /* skb reference here is a bit tricky to get right, since |
@@ -1758,7 +1769,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1758 | struct sk_buff *skb; | 1769 | struct sk_buff *skb; |
1759 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); | 1770 | int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); |
1760 | int used_sacks; | 1771 | int used_sacks; |
1761 | int found_dup_sack = 0; | 1772 | bool found_dup_sack = false; |
1762 | int i, j; | 1773 | int i, j; |
1763 | int first_sack_index; | 1774 | int first_sack_index; |
1764 | 1775 | ||
@@ -1789,7 +1800,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1789 | used_sacks = 0; | 1800 | used_sacks = 0; |
1790 | first_sack_index = 0; | 1801 | first_sack_index = 0; |
1791 | for (i = 0; i < num_sacks; i++) { | 1802 | for (i = 0; i < num_sacks; i++) { |
1792 | int dup_sack = !i && found_dup_sack; | 1803 | bool dup_sack = !i && found_dup_sack; |
1793 | 1804 | ||
1794 | sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); | 1805 | sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); |
1795 | sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); | 1806 | sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); |
@@ -1856,7 +1867,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1856 | while (i < used_sacks) { | 1867 | while (i < used_sacks) { |
1857 | u32 start_seq = sp[i].start_seq; | 1868 | u32 start_seq = sp[i].start_seq; |
1858 | u32 end_seq = sp[i].end_seq; | 1869 | u32 end_seq = sp[i].end_seq; |
1859 | int dup_sack = (found_dup_sack && (i == first_sack_index)); | 1870 | bool dup_sack = (found_dup_sack && (i == first_sack_index)); |
1860 | struct tcp_sack_block *next_dup = NULL; | 1871 | struct tcp_sack_block *next_dup = NULL; |
1861 | 1872 | ||
1862 | if (found_dup_sack && ((i + 1) == first_sack_index)) | 1873 | if (found_dup_sack && ((i + 1) == first_sack_index)) |
@@ -1958,9 +1969,9 @@ out: | |||
1958 | } | 1969 | } |
1959 | 1970 | ||
1960 | /* Limits sacked_out so that sum with lost_out isn't ever larger than | 1971 | /* Limits sacked_out so that sum with lost_out isn't ever larger than |
1961 | * packets_out. Returns zero if sacked_out adjustement wasn't necessary. | 1972 | * packets_out. Returns false if sacked_out adjustement wasn't necessary. |
1962 | */ | 1973 | */ |
1963 | static int tcp_limit_reno_sacked(struct tcp_sock *tp) | 1974 | static bool tcp_limit_reno_sacked(struct tcp_sock *tp) |
1964 | { | 1975 | { |
1965 | u32 holes; | 1976 | u32 holes; |
1966 | 1977 | ||
@@ -1969,9 +1980,9 @@ static int tcp_limit_reno_sacked(struct tcp_sock *tp) | |||
1969 | 1980 | ||
1970 | if ((tp->sacked_out + holes) > tp->packets_out) { | 1981 | if ((tp->sacked_out + holes) > tp->packets_out) { |
1971 | tp->sacked_out = tp->packets_out - holes; | 1982 | tp->sacked_out = tp->packets_out - holes; |
1972 | return 1; | 1983 | return true; |
1973 | } | 1984 | } |
1974 | return 0; | 1985 | return false; |
1975 | } | 1986 | } |
1976 | 1987 | ||
1977 | /* If we receive more dupacks than we expected counting segments | 1988 | /* If we receive more dupacks than we expected counting segments |
@@ -2025,40 +2036,40 @@ static int tcp_is_sackfrto(const struct tcp_sock *tp) | |||
2025 | /* F-RTO can only be used if TCP has never retransmitted anything other than | 2036 | /* F-RTO can only be used if TCP has never retransmitted anything other than |
2026 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) | 2037 | * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) |
2027 | */ | 2038 | */ |
2028 | int tcp_use_frto(struct sock *sk) | 2039 | bool tcp_use_frto(struct sock *sk) |
2029 | { | 2040 | { |
2030 | const struct tcp_sock *tp = tcp_sk(sk); | 2041 | const struct tcp_sock *tp = tcp_sk(sk); |
2031 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2042 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2032 | struct sk_buff *skb; | 2043 | struct sk_buff *skb; |
2033 | 2044 | ||
2034 | if (!sysctl_tcp_frto) | 2045 | if (!sysctl_tcp_frto) |
2035 | return 0; | 2046 | return false; |
2036 | 2047 | ||
2037 | /* MTU probe and F-RTO won't really play nicely along currently */ | 2048 | /* MTU probe and F-RTO won't really play nicely along currently */ |
2038 | if (icsk->icsk_mtup.probe_size) | 2049 | if (icsk->icsk_mtup.probe_size) |
2039 | return 0; | 2050 | return false; |
2040 | 2051 | ||
2041 | if (tcp_is_sackfrto(tp)) | 2052 | if (tcp_is_sackfrto(tp)) |
2042 | return 1; | 2053 | return true; |
2043 | 2054 | ||
2044 | /* Avoid expensive walking of rexmit queue if possible */ | 2055 | /* Avoid expensive walking of rexmit queue if possible */ |
2045 | if (tp->retrans_out > 1) | 2056 | if (tp->retrans_out > 1) |
2046 | return 0; | 2057 | return false; |
2047 | 2058 | ||
2048 | skb = tcp_write_queue_head(sk); | 2059 | skb = tcp_write_queue_head(sk); |
2049 | if (tcp_skb_is_last(sk, skb)) | 2060 | if (tcp_skb_is_last(sk, skb)) |
2050 | return 1; | 2061 | return true; |
2051 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ | 2062 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ |
2052 | tcp_for_write_queue_from(skb, sk) { | 2063 | tcp_for_write_queue_from(skb, sk) { |
2053 | if (skb == tcp_send_head(sk)) | 2064 | if (skb == tcp_send_head(sk)) |
2054 | break; | 2065 | break; |
2055 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | 2066 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) |
2056 | return 0; | 2067 | return false; |
2057 | /* Short-circuit when first non-SACKed skb has been checked */ | 2068 | /* Short-circuit when first non-SACKed skb has been checked */ |
2058 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 2069 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
2059 | break; | 2070 | break; |
2060 | } | 2071 | } |
2061 | return 1; | 2072 | return true; |
2062 | } | 2073 | } |
2063 | 2074 | ||
2064 | /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO | 2075 | /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO |
@@ -2294,7 +2305,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
2294 | * | 2305 | * |
2295 | * Do processing similar to RTO timeout. | 2306 | * Do processing similar to RTO timeout. |
2296 | */ | 2307 | */ |
2297 | static int tcp_check_sack_reneging(struct sock *sk, int flag) | 2308 | static bool tcp_check_sack_reneging(struct sock *sk, int flag) |
2298 | { | 2309 | { |
2299 | if (flag & FLAG_SACK_RENEGING) { | 2310 | if (flag & FLAG_SACK_RENEGING) { |
2300 | struct inet_connection_sock *icsk = inet_csk(sk); | 2311 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2305,9 +2316,9 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) | |||
2305 | tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); | 2316 | tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); |
2306 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2317 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
2307 | icsk->icsk_rto, TCP_RTO_MAX); | 2318 | icsk->icsk_rto, TCP_RTO_MAX); |
2308 | return 1; | 2319 | return true; |
2309 | } | 2320 | } |
2310 | return 0; | 2321 | return false; |
2311 | } | 2322 | } |
2312 | 2323 | ||
2313 | static inline int tcp_fackets_out(const struct tcp_sock *tp) | 2324 | static inline int tcp_fackets_out(const struct tcp_sock *tp) |
@@ -2335,6 +2346,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) | |||
2335 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; | 2346 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; |
2336 | } | 2347 | } |
2337 | 2348 | ||
2349 | static bool tcp_pause_early_retransmit(struct sock *sk, int flag) | ||
2350 | { | ||
2351 | struct tcp_sock *tp = tcp_sk(sk); | ||
2352 | unsigned long delay; | ||
2353 | |||
2354 | /* Delay early retransmit and entering fast recovery for | ||
2355 | * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples | ||
2356 | * available, or RTO is scheduled to fire first. | ||
2357 | */ | ||
2358 | if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) | ||
2359 | return false; | ||
2360 | |||
2361 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); | ||
2362 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) | ||
2363 | return false; | ||
2364 | |||
2365 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); | ||
2366 | tp->early_retrans_delayed = 1; | ||
2367 | return true; | ||
2368 | } | ||
2369 | |||
2338 | static inline int tcp_skb_timedout(const struct sock *sk, | 2370 | static inline int tcp_skb_timedout(const struct sock *sk, |
2339 | const struct sk_buff *skb) | 2371 | const struct sk_buff *skb) |
2340 | { | 2372 | { |
@@ -2442,28 +2474,28 @@ static inline int tcp_head_timedout(const struct sock *sk) | |||
2442 | * Main question: may we further continue forward transmission | 2474 | * Main question: may we further continue forward transmission |
2443 | * with the same cwnd? | 2475 | * with the same cwnd? |
2444 | */ | 2476 | */ |
2445 | static int tcp_time_to_recover(struct sock *sk) | 2477 | static bool tcp_time_to_recover(struct sock *sk, int flag) |
2446 | { | 2478 | { |
2447 | struct tcp_sock *tp = tcp_sk(sk); | 2479 | struct tcp_sock *tp = tcp_sk(sk); |
2448 | __u32 packets_out; | 2480 | __u32 packets_out; |
2449 | 2481 | ||
2450 | /* Do not perform any recovery during F-RTO algorithm */ | 2482 | /* Do not perform any recovery during F-RTO algorithm */ |
2451 | if (tp->frto_counter) | 2483 | if (tp->frto_counter) |
2452 | return 0; | 2484 | return false; |
2453 | 2485 | ||
2454 | /* Trick#1: The loss is proven. */ | 2486 | /* Trick#1: The loss is proven. */ |
2455 | if (tp->lost_out) | 2487 | if (tp->lost_out) |
2456 | return 1; | 2488 | return true; |
2457 | 2489 | ||
2458 | /* Not-A-Trick#2 : Classic rule... */ | 2490 | /* Not-A-Trick#2 : Classic rule... */ |
2459 | if (tcp_dupack_heuristics(tp) > tp->reordering) | 2491 | if (tcp_dupack_heuristics(tp) > tp->reordering) |
2460 | return 1; | 2492 | return true; |
2461 | 2493 | ||
2462 | /* Trick#3 : when we use RFC2988 timer restart, fast | 2494 | /* Trick#3 : when we use RFC2988 timer restart, fast |
2463 | * retransmit can be triggered by timeout of queue head. | 2495 | * retransmit can be triggered by timeout of queue head. |
2464 | */ | 2496 | */ |
2465 | if (tcp_is_fack(tp) && tcp_head_timedout(sk)) | 2497 | if (tcp_is_fack(tp) && tcp_head_timedout(sk)) |
2466 | return 1; | 2498 | return true; |
2467 | 2499 | ||
2468 | /* Trick#4: It is still not OK... But will it be useful to delay | 2500 | /* Trick#4: It is still not OK... But will it be useful to delay |
2469 | * recovery more? | 2501 | * recovery more? |
@@ -2475,7 +2507,7 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2475 | /* We have nothing to send. This connection is limited | 2507 | /* We have nothing to send. This connection is limited |
2476 | * either by receiver window or by application. | 2508 | * either by receiver window or by application. |
2477 | */ | 2509 | */ |
2478 | return 1; | 2510 | return true; |
2479 | } | 2511 | } |
2480 | 2512 | ||
2481 | /* If a thin stream is detected, retransmit after first | 2513 | /* If a thin stream is detected, retransmit after first |
@@ -2486,9 +2518,19 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2486 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && | 2518 | if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && |
2487 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && | 2519 | tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && |
2488 | tcp_is_sack(tp) && !tcp_send_head(sk)) | 2520 | tcp_is_sack(tp) && !tcp_send_head(sk)) |
2489 | return 1; | 2521 | return true; |
2490 | 2522 | ||
2491 | return 0; | 2523 | /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious |
2524 | * retransmissions due to small network reorderings, we implement | ||
2525 | * Mitigation A.3 in the RFC and delay the retransmission for a short | ||
2526 | * interval if appropriate. | ||
2527 | */ | ||
2528 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && | ||
2529 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && | ||
2530 | !tcp_may_send_now(sk)) | ||
2531 | return !tcp_pause_early_retransmit(sk, flag); | ||
2532 | |||
2533 | return false; | ||
2492 | } | 2534 | } |
2493 | 2535 | ||
2494 | /* New heuristics: it is possible only after we switched to restart timer | 2536 | /* New heuristics: it is possible only after we switched to restart timer |
@@ -2676,22 +2718,22 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2676 | struct inet_sock *inet = inet_sk(sk); | 2718 | struct inet_sock *inet = inet_sk(sk); |
2677 | 2719 | ||
2678 | if (sk->sk_family == AF_INET) { | 2720 | if (sk->sk_family == AF_INET) { |
2679 | printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", | 2721 | pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", |
2680 | msg, | 2722 | msg, |
2681 | &inet->inet_daddr, ntohs(inet->inet_dport), | 2723 | &inet->inet_daddr, ntohs(inet->inet_dport), |
2682 | tp->snd_cwnd, tcp_left_out(tp), | 2724 | tp->snd_cwnd, tcp_left_out(tp), |
2683 | tp->snd_ssthresh, tp->prior_ssthresh, | 2725 | tp->snd_ssthresh, tp->prior_ssthresh, |
2684 | tp->packets_out); | 2726 | tp->packets_out); |
2685 | } | 2727 | } |
2686 | #if IS_ENABLED(CONFIG_IPV6) | 2728 | #if IS_ENABLED(CONFIG_IPV6) |
2687 | else if (sk->sk_family == AF_INET6) { | 2729 | else if (sk->sk_family == AF_INET6) { |
2688 | struct ipv6_pinfo *np = inet6_sk(sk); | 2730 | struct ipv6_pinfo *np = inet6_sk(sk); |
2689 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2731 | pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
2690 | msg, | 2732 | msg, |
2691 | &np->daddr, ntohs(inet->inet_dport), | 2733 | &np->daddr, ntohs(inet->inet_dport), |
2692 | tp->snd_cwnd, tcp_left_out(tp), | 2734 | tp->snd_cwnd, tcp_left_out(tp), |
2693 | tp->snd_ssthresh, tp->prior_ssthresh, | 2735 | tp->snd_ssthresh, tp->prior_ssthresh, |
2694 | tp->packets_out); | 2736 | tp->packets_out); |
2695 | } | 2737 | } |
2696 | #endif | 2738 | #endif |
2697 | } | 2739 | } |
@@ -2727,7 +2769,7 @@ static inline int tcp_may_undo(const struct tcp_sock *tp) | |||
2727 | } | 2769 | } |
2728 | 2770 | ||
2729 | /* People celebrate: "We love our President!" */ | 2771 | /* People celebrate: "We love our President!" */ |
2730 | static int tcp_try_undo_recovery(struct sock *sk) | 2772 | static bool tcp_try_undo_recovery(struct sock *sk) |
2731 | { | 2773 | { |
2732 | struct tcp_sock *tp = tcp_sk(sk); | 2774 | struct tcp_sock *tp = tcp_sk(sk); |
2733 | 2775 | ||
@@ -2752,10 +2794,10 @@ static int tcp_try_undo_recovery(struct sock *sk) | |||
2752 | * is ACKed. For Reno it is MUST to prevent false | 2794 | * is ACKed. For Reno it is MUST to prevent false |
2753 | * fast retransmits (RFC2582). SACK TCP is safe. */ | 2795 | * fast retransmits (RFC2582). SACK TCP is safe. */ |
2754 | tcp_moderate_cwnd(tp); | 2796 | tcp_moderate_cwnd(tp); |
2755 | return 1; | 2797 | return true; |
2756 | } | 2798 | } |
2757 | tcp_set_ca_state(sk, TCP_CA_Open); | 2799 | tcp_set_ca_state(sk, TCP_CA_Open); |
2758 | return 0; | 2800 | return false; |
2759 | } | 2801 | } |
2760 | 2802 | ||
2761 | /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ | 2803 | /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ |
@@ -2785,19 +2827,19 @@ static void tcp_try_undo_dsack(struct sock *sk) | |||
2785 | * that successive retransmissions of a segment must not advance | 2827 | * that successive retransmissions of a segment must not advance |
2786 | * retrans_stamp under any conditions. | 2828 | * retrans_stamp under any conditions. |
2787 | */ | 2829 | */ |
2788 | static int tcp_any_retrans_done(const struct sock *sk) | 2830 | static bool tcp_any_retrans_done(const struct sock *sk) |
2789 | { | 2831 | { |
2790 | const struct tcp_sock *tp = tcp_sk(sk); | 2832 | const struct tcp_sock *tp = tcp_sk(sk); |
2791 | struct sk_buff *skb; | 2833 | struct sk_buff *skb; |
2792 | 2834 | ||
2793 | if (tp->retrans_out) | 2835 | if (tp->retrans_out) |
2794 | return 1; | 2836 | return true; |
2795 | 2837 | ||
2796 | skb = tcp_write_queue_head(sk); | 2838 | skb = tcp_write_queue_head(sk); |
2797 | if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) | 2839 | if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) |
2798 | return 1; | 2840 | return true; |
2799 | 2841 | ||
2800 | return 0; | 2842 | return false; |
2801 | } | 2843 | } |
2802 | 2844 | ||
2803 | /* Undo during fast recovery after partial ACK. */ | 2845 | /* Undo during fast recovery after partial ACK. */ |
@@ -2831,7 +2873,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) | |||
2831 | } | 2873 | } |
2832 | 2874 | ||
2833 | /* Undo during loss recovery after partial ACK. */ | 2875 | /* Undo during loss recovery after partial ACK. */ |
2834 | static int tcp_try_undo_loss(struct sock *sk) | 2876 | static bool tcp_try_undo_loss(struct sock *sk) |
2835 | { | 2877 | { |
2836 | struct tcp_sock *tp = tcp_sk(sk); | 2878 | struct tcp_sock *tp = tcp_sk(sk); |
2837 | 2879 | ||
@@ -2853,9 +2895,9 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
2853 | tp->undo_marker = 0; | 2895 | tp->undo_marker = 0; |
2854 | if (tcp_is_sack(tp)) | 2896 | if (tcp_is_sack(tp)) |
2855 | tcp_set_ca_state(sk, TCP_CA_Open); | 2897 | tcp_set_ca_state(sk, TCP_CA_Open); |
2856 | return 1; | 2898 | return true; |
2857 | } | 2899 | } |
2858 | return 0; | 2900 | return false; |
2859 | } | 2901 | } |
2860 | 2902 | ||
2861 | static inline void tcp_complete_cwr(struct sock *sk) | 2903 | static inline void tcp_complete_cwr(struct sock *sk) |
@@ -2864,11 +2906,14 @@ static inline void tcp_complete_cwr(struct sock *sk) | |||
2864 | 2906 | ||
2865 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ | 2907 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ |
2866 | if (tp->undo_marker) { | 2908 | if (tp->undo_marker) { |
2867 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) | 2909 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { |
2868 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2910 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); |
2869 | else /* PRR */ | 2911 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2912 | } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { | ||
2913 | /* PRR algorithm. */ | ||
2870 | tp->snd_cwnd = tp->snd_ssthresh; | 2914 | tp->snd_cwnd = tp->snd_ssthresh; |
2871 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2915 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2916 | } | ||
2872 | } | 2917 | } |
2873 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2918 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2874 | } | 2919 | } |
@@ -3018,6 +3063,38 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | |||
3018 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | 3063 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; |
3019 | } | 3064 | } |
3020 | 3065 | ||
3066 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | ||
3067 | { | ||
3068 | struct tcp_sock *tp = tcp_sk(sk); | ||
3069 | int mib_idx; | ||
3070 | |||
3071 | if (tcp_is_reno(tp)) | ||
3072 | mib_idx = LINUX_MIB_TCPRENORECOVERY; | ||
3073 | else | ||
3074 | mib_idx = LINUX_MIB_TCPSACKRECOVERY; | ||
3075 | |||
3076 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
3077 | |||
3078 | tp->high_seq = tp->snd_nxt; | ||
3079 | tp->prior_ssthresh = 0; | ||
3080 | tp->undo_marker = tp->snd_una; | ||
3081 | tp->undo_retrans = tp->retrans_out; | ||
3082 | |||
3083 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | ||
3084 | if (!ece_ack) | ||
3085 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | ||
3086 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
3087 | TCP_ECN_queue_cwr(tp); | ||
3088 | } | ||
3089 | |||
3090 | tp->bytes_acked = 0; | ||
3091 | tp->snd_cwnd_cnt = 0; | ||
3092 | tp->prior_cwnd = tp->snd_cwnd; | ||
3093 | tp->prr_delivered = 0; | ||
3094 | tp->prr_out = 0; | ||
3095 | tcp_set_ca_state(sk, TCP_CA_Recovery); | ||
3096 | } | ||
3097 | |||
3021 | /* Process an event, which can update packets-in-flight not trivially. | 3098 | /* Process an event, which can update packets-in-flight not trivially. |
3022 | * Main goal of this function is to calculate new estimate for left_out, | 3099 | * Main goal of this function is to calculate new estimate for left_out, |
3023 | * taking into account both packets sitting in receiver's buffer and | 3100 | * taking into account both packets sitting in receiver's buffer and |
@@ -3037,7 +3114,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3037 | struct tcp_sock *tp = tcp_sk(sk); | 3114 | struct tcp_sock *tp = tcp_sk(sk); |
3038 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && | 3115 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && |
3039 | (tcp_fackets_out(tp) > tp->reordering)); | 3116 | (tcp_fackets_out(tp) > tp->reordering)); |
3040 | int fast_rexmit = 0, mib_idx; | 3117 | int fast_rexmit = 0; |
3041 | 3118 | ||
3042 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) | 3119 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) |
3043 | tp->sacked_out = 0; | 3120 | tp->sacked_out = 0; |
@@ -3121,7 +3198,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3121 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) | 3198 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) |
3122 | tcp_try_undo_dsack(sk); | 3199 | tcp_try_undo_dsack(sk); |
3123 | 3200 | ||
3124 | if (!tcp_time_to_recover(sk)) { | 3201 | if (!tcp_time_to_recover(sk, flag)) { |
3125 | tcp_try_to_open(sk, flag); | 3202 | tcp_try_to_open(sk, flag); |
3126 | return; | 3203 | return; |
3127 | } | 3204 | } |
@@ -3138,32 +3215,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3138 | } | 3215 | } |
3139 | 3216 | ||
3140 | /* Otherwise enter Recovery state */ | 3217 | /* Otherwise enter Recovery state */ |
3141 | 3218 | tcp_enter_recovery(sk, (flag & FLAG_ECE)); | |
3142 | if (tcp_is_reno(tp)) | ||
3143 | mib_idx = LINUX_MIB_TCPRENORECOVERY; | ||
3144 | else | ||
3145 | mib_idx = LINUX_MIB_TCPSACKRECOVERY; | ||
3146 | |||
3147 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
3148 | |||
3149 | tp->high_seq = tp->snd_nxt; | ||
3150 | tp->prior_ssthresh = 0; | ||
3151 | tp->undo_marker = tp->snd_una; | ||
3152 | tp->undo_retrans = tp->retrans_out; | ||
3153 | |||
3154 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | ||
3155 | if (!(flag & FLAG_ECE)) | ||
3156 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | ||
3157 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
3158 | TCP_ECN_queue_cwr(tp); | ||
3159 | } | ||
3160 | |||
3161 | tp->bytes_acked = 0; | ||
3162 | tp->snd_cwnd_cnt = 0; | ||
3163 | tp->prior_cwnd = tp->snd_cwnd; | ||
3164 | tp->prr_delivered = 0; | ||
3165 | tp->prr_out = 0; | ||
3166 | tcp_set_ca_state(sk, TCP_CA_Recovery); | ||
3167 | fast_rexmit = 1; | 3219 | fast_rexmit = 1; |
3168 | } | 3220 | } |
3169 | 3221 | ||
@@ -3245,16 +3297,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
3245 | /* Restart timer after forward progress on connection. | 3297 | /* Restart timer after forward progress on connection. |
3246 | * RFC2988 recommends to restart timer to now+rto. | 3298 | * RFC2988 recommends to restart timer to now+rto. |
3247 | */ | 3299 | */ |
3248 | static void tcp_rearm_rto(struct sock *sk) | 3300 | void tcp_rearm_rto(struct sock *sk) |
3249 | { | 3301 | { |
3250 | const struct tcp_sock *tp = tcp_sk(sk); | 3302 | struct tcp_sock *tp = tcp_sk(sk); |
3251 | 3303 | ||
3252 | if (!tp->packets_out) { | 3304 | if (!tp->packets_out) { |
3253 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3305 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3254 | } else { | 3306 | } else { |
3255 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 3307 | u32 rto = inet_csk(sk)->icsk_rto; |
3256 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | 3308 | /* Offset the time elapsed after installing regular RTO */ |
3309 | if (tp->early_retrans_delayed) { | ||
3310 | struct sk_buff *skb = tcp_write_queue_head(sk); | ||
3311 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; | ||
3312 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); | ||
3313 | /* delta may not be positive if the socket is locked | ||
3314 | * when the delayed ER timer fires and is rescheduled. | ||
3315 | */ | ||
3316 | if (delta > 0) | ||
3317 | rto = delta; | ||
3318 | } | ||
3319 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, | ||
3320 | TCP_RTO_MAX); | ||
3257 | } | 3321 | } |
3322 | tp->early_retrans_delayed = 0; | ||
3323 | } | ||
3324 | |||
3325 | /* This function is called when the delayed ER timer fires. TCP enters | ||
3326 | * fast recovery and performs fast-retransmit. | ||
3327 | */ | ||
3328 | void tcp_resume_early_retransmit(struct sock *sk) | ||
3329 | { | ||
3330 | struct tcp_sock *tp = tcp_sk(sk); | ||
3331 | |||
3332 | tcp_rearm_rto(sk); | ||
3333 | |||
3334 | /* Stop if ER is disabled after the delayed ER timer is scheduled */ | ||
3335 | if (!tp->do_early_retrans) | ||
3336 | return; | ||
3337 | |||
3338 | tcp_enter_recovery(sk, false); | ||
3339 | tcp_update_scoreboard(sk, 1); | ||
3340 | tcp_xmit_retransmit_queue(sk); | ||
3258 | } | 3341 | } |
3259 | 3342 | ||
3260 | /* If we get here, the whole TSO packet has not been acked. */ | 3343 | /* If we get here, the whole TSO packet has not been acked. */ |
@@ -3289,7 +3372,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3289 | const struct inet_connection_sock *icsk = inet_csk(sk); | 3372 | const struct inet_connection_sock *icsk = inet_csk(sk); |
3290 | struct sk_buff *skb; | 3373 | struct sk_buff *skb; |
3291 | u32 now = tcp_time_stamp; | 3374 | u32 now = tcp_time_stamp; |
3292 | int fully_acked = 1; | 3375 | int fully_acked = true; |
3293 | int flag = 0; | 3376 | int flag = 0; |
3294 | u32 pkts_acked = 0; | 3377 | u32 pkts_acked = 0; |
3295 | u32 reord = tp->packets_out; | 3378 | u32 reord = tp->packets_out; |
@@ -3313,7 +3396,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3313 | if (!acked_pcount) | 3396 | if (!acked_pcount) |
3314 | break; | 3397 | break; |
3315 | 3398 | ||
3316 | fully_acked = 0; | 3399 | fully_acked = false; |
3317 | } else { | 3400 | } else { |
3318 | acked_pcount = tcp_skb_pcount(skb); | 3401 | acked_pcount = tcp_skb_pcount(skb); |
3319 | } | 3402 | } |
@@ -3430,18 +3513,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3430 | if (!tp->packets_out && tcp_is_sack(tp)) { | 3513 | if (!tp->packets_out && tcp_is_sack(tp)) { |
3431 | icsk = inet_csk(sk); | 3514 | icsk = inet_csk(sk); |
3432 | if (tp->lost_out) { | 3515 | if (tp->lost_out) { |
3433 | printk(KERN_DEBUG "Leak l=%u %d\n", | 3516 | pr_debug("Leak l=%u %d\n", |
3434 | tp->lost_out, icsk->icsk_ca_state); | 3517 | tp->lost_out, icsk->icsk_ca_state); |
3435 | tp->lost_out = 0; | 3518 | tp->lost_out = 0; |
3436 | } | 3519 | } |
3437 | if (tp->sacked_out) { | 3520 | if (tp->sacked_out) { |
3438 | printk(KERN_DEBUG "Leak s=%u %d\n", | 3521 | pr_debug("Leak s=%u %d\n", |
3439 | tp->sacked_out, icsk->icsk_ca_state); | 3522 | tp->sacked_out, icsk->icsk_ca_state); |
3440 | tp->sacked_out = 0; | 3523 | tp->sacked_out = 0; |
3441 | } | 3524 | } |
3442 | if (tp->retrans_out) { | 3525 | if (tp->retrans_out) { |
3443 | printk(KERN_DEBUG "Leak r=%u %d\n", | 3526 | pr_debug("Leak r=%u %d\n", |
3444 | tp->retrans_out, icsk->icsk_ca_state); | 3527 | tp->retrans_out, icsk->icsk_ca_state); |
3445 | tp->retrans_out = 0; | 3528 | tp->retrans_out = 0; |
3446 | } | 3529 | } |
3447 | } | 3530 | } |
@@ -3592,7 +3675,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) | |||
3592 | * to prove that the RTO is indeed spurious. It transfers the control | 3675 | * to prove that the RTO is indeed spurious. It transfers the control |
3593 | * from F-RTO to the conventional RTO recovery | 3676 | * from F-RTO to the conventional RTO recovery |
3594 | */ | 3677 | */ |
3595 | static int tcp_process_frto(struct sock *sk, int flag) | 3678 | static bool tcp_process_frto(struct sock *sk, int flag) |
3596 | { | 3679 | { |
3597 | struct tcp_sock *tp = tcp_sk(sk); | 3680 | struct tcp_sock *tp = tcp_sk(sk); |
3598 | 3681 | ||
@@ -3608,7 +3691,7 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3608 | 3691 | ||
3609 | if (!before(tp->snd_una, tp->frto_highmark)) { | 3692 | if (!before(tp->snd_una, tp->frto_highmark)) { |
3610 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); | 3693 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); |
3611 | return 1; | 3694 | return true; |
3612 | } | 3695 | } |
3613 | 3696 | ||
3614 | if (!tcp_is_sackfrto(tp)) { | 3697 | if (!tcp_is_sackfrto(tp)) { |
@@ -3617,19 +3700,19 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3617 | * data, winupdate | 3700 | * data, winupdate |
3618 | */ | 3701 | */ |
3619 | if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) | 3702 | if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) |
3620 | return 1; | 3703 | return true; |
3621 | 3704 | ||
3622 | if (!(flag & FLAG_DATA_ACKED)) { | 3705 | if (!(flag & FLAG_DATA_ACKED)) { |
3623 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), | 3706 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), |
3624 | flag); | 3707 | flag); |
3625 | return 1; | 3708 | return true; |
3626 | } | 3709 | } |
3627 | } else { | 3710 | } else { |
3628 | if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { | 3711 | if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { |
3629 | /* Prevent sending of new data. */ | 3712 | /* Prevent sending of new data. */ |
3630 | tp->snd_cwnd = min(tp->snd_cwnd, | 3713 | tp->snd_cwnd = min(tp->snd_cwnd, |
3631 | tcp_packets_in_flight(tp)); | 3714 | tcp_packets_in_flight(tp)); |
3632 | return 1; | 3715 | return true; |
3633 | } | 3716 | } |
3634 | 3717 | ||
3635 | if ((tp->frto_counter >= 2) && | 3718 | if ((tp->frto_counter >= 2) && |
@@ -3639,10 +3722,10 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3639 | /* RFC4138 shortcoming (see comment above) */ | 3722 | /* RFC4138 shortcoming (see comment above) */ |
3640 | if (!(flag & FLAG_FORWARD_PROGRESS) && | 3723 | if (!(flag & FLAG_FORWARD_PROGRESS) && |
3641 | (flag & FLAG_NOT_DUP)) | 3724 | (flag & FLAG_NOT_DUP)) |
3642 | return 1; | 3725 | return true; |
3643 | 3726 | ||
3644 | tcp_enter_frto_loss(sk, 3, flag); | 3727 | tcp_enter_frto_loss(sk, 3, flag); |
3645 | return 1; | 3728 | return true; |
3646 | } | 3729 | } |
3647 | } | 3730 | } |
3648 | 3731 | ||
@@ -3654,7 +3737,7 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3654 | if (!tcp_may_send_now(sk)) | 3737 | if (!tcp_may_send_now(sk)) |
3655 | tcp_enter_frto_loss(sk, 2, flag); | 3738 | tcp_enter_frto_loss(sk, 2, flag); |
3656 | 3739 | ||
3657 | return 1; | 3740 | return true; |
3658 | } else { | 3741 | } else { |
3659 | switch (sysctl_tcp_frto_response) { | 3742 | switch (sysctl_tcp_frto_response) { |
3660 | case 2: | 3743 | case 2: |
@@ -3671,7 +3754,7 @@ static int tcp_process_frto(struct sock *sk, int flag) | |||
3671 | tp->undo_marker = 0; | 3754 | tp->undo_marker = 0; |
3672 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); | 3755 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); |
3673 | } | 3756 | } |
3674 | return 0; | 3757 | return false; |
3675 | } | 3758 | } |
3676 | 3759 | ||
3677 | /* This routine deals with incoming acks, but not outgoing ones. */ | 3760 | /* This routine deals with incoming acks, but not outgoing ones. */ |
@@ -3689,7 +3772,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3689 | int prior_sacked = tp->sacked_out; | 3772 | int prior_sacked = tp->sacked_out; |
3690 | int pkts_acked = 0; | 3773 | int pkts_acked = 0; |
3691 | int newly_acked_sacked = 0; | 3774 | int newly_acked_sacked = 0; |
3692 | int frto_cwnd = 0; | 3775 | bool frto_cwnd = false; |
3693 | 3776 | ||
3694 | /* If the ack is older than previous acks | 3777 | /* If the ack is older than previous acks |
3695 | * then we can probably ignore it. | 3778 | * then we can probably ignore it. |
@@ -3703,6 +3786,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3703 | if (after(ack, tp->snd_nxt)) | 3786 | if (after(ack, tp->snd_nxt)) |
3704 | goto invalid_ack; | 3787 | goto invalid_ack; |
3705 | 3788 | ||
3789 | if (tp->early_retrans_delayed) | ||
3790 | tcp_rearm_rto(sk); | ||
3791 | |||
3706 | if (after(ack, prior_snd_una)) | 3792 | if (after(ack, prior_snd_una)) |
3707 | flag |= FLAG_SND_UNA_ADVANCED; | 3793 | flag |= FLAG_SND_UNA_ADVANCED; |
3708 | 3794 | ||
@@ -3868,10 +3954,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
3868 | __u8 snd_wscale = *(__u8 *)ptr; | 3954 | __u8 snd_wscale = *(__u8 *)ptr; |
3869 | opt_rx->wscale_ok = 1; | 3955 | opt_rx->wscale_ok = 1; |
3870 | if (snd_wscale > 14) { | 3956 | if (snd_wscale > 14) { |
3871 | if (net_ratelimit()) | 3957 | net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", |
3872 | pr_info("%s: Illegal window scaling value %d >14 received\n", | 3958 | __func__, |
3873 | __func__, | 3959 | snd_wscale); |
3874 | snd_wscale); | ||
3875 | snd_wscale = 14; | 3960 | snd_wscale = 14; |
3876 | } | 3961 | } |
3877 | opt_rx->snd_wscale = snd_wscale; | 3962 | opt_rx->snd_wscale = snd_wscale; |
@@ -3942,7 +4027,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
3942 | } | 4027 | } |
3943 | EXPORT_SYMBOL(tcp_parse_options); | 4028 | EXPORT_SYMBOL(tcp_parse_options); |
3944 | 4029 | ||
3945 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) | 4030 | static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) |
3946 | { | 4031 | { |
3947 | const __be32 *ptr = (const __be32 *)(th + 1); | 4032 | const __be32 *ptr = (const __be32 *)(th + 1); |
3948 | 4033 | ||
@@ -3953,31 +4038,31 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr | |||
3953 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | 4038 | tp->rx_opt.rcv_tsval = ntohl(*ptr); |
3954 | ++ptr; | 4039 | ++ptr; |
3955 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | 4040 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); |
3956 | return 1; | 4041 | return true; |
3957 | } | 4042 | } |
3958 | return 0; | 4043 | return false; |
3959 | } | 4044 | } |
3960 | 4045 | ||
3961 | /* Fast parse options. This hopes to only see timestamps. | 4046 | /* Fast parse options. This hopes to only see timestamps. |
3962 | * If it is wrong it falls back on tcp_parse_options(). | 4047 | * If it is wrong it falls back on tcp_parse_options(). |
3963 | */ | 4048 | */ |
3964 | static int tcp_fast_parse_options(const struct sk_buff *skb, | 4049 | static bool tcp_fast_parse_options(const struct sk_buff *skb, |
3965 | const struct tcphdr *th, | 4050 | const struct tcphdr *th, |
3966 | struct tcp_sock *tp, const u8 **hvpp) | 4051 | struct tcp_sock *tp, const u8 **hvpp) |
3967 | { | 4052 | { |
3968 | /* In the spirit of fast parsing, compare doff directly to constant | 4053 | /* In the spirit of fast parsing, compare doff directly to constant |
3969 | * values. Because equality is used, short doff can be ignored here. | 4054 | * values. Because equality is used, short doff can be ignored here. |
3970 | */ | 4055 | */ |
3971 | if (th->doff == (sizeof(*th) / 4)) { | 4056 | if (th->doff == (sizeof(*th) / 4)) { |
3972 | tp->rx_opt.saw_tstamp = 0; | 4057 | tp->rx_opt.saw_tstamp = 0; |
3973 | return 0; | 4058 | return false; |
3974 | } else if (tp->rx_opt.tstamp_ok && | 4059 | } else if (tp->rx_opt.tstamp_ok && |
3975 | th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { | 4060 | th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { |
3976 | if (tcp_parse_aligned_timestamp(tp, th)) | 4061 | if (tcp_parse_aligned_timestamp(tp, th)) |
3977 | return 1; | 4062 | return true; |
3978 | } | 4063 | } |
3979 | tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); | 4064 | tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); |
3980 | return 1; | 4065 | return true; |
3981 | } | 4066 | } |
3982 | 4067 | ||
3983 | #ifdef CONFIG_TCP_MD5SIG | 4068 | #ifdef CONFIG_TCP_MD5SIG |
@@ -4218,7 +4303,7 @@ static void tcp_fin(struct sock *sk) | |||
4218 | } | 4303 | } |
4219 | } | 4304 | } |
4220 | 4305 | ||
4221 | static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, | 4306 | static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, |
4222 | u32 end_seq) | 4307 | u32 end_seq) |
4223 | { | 4308 | { |
4224 | if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { | 4309 | if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { |
@@ -4226,9 +4311,9 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, | |||
4226 | sp->start_seq = seq; | 4311 | sp->start_seq = seq; |
4227 | if (after(end_seq, sp->end_seq)) | 4312 | if (after(end_seq, sp->end_seq)) |
4228 | sp->end_seq = end_seq; | 4313 | sp->end_seq = end_seq; |
4229 | return 1; | 4314 | return true; |
4230 | } | 4315 | } |
4231 | return 0; | 4316 | return false; |
4232 | } | 4317 | } |
4233 | 4318 | ||
4234 | static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) | 4319 | static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) |
@@ -4424,10 +4509,10 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4424 | } | 4509 | } |
4425 | } | 4510 | } |
4426 | 4511 | ||
4427 | static int tcp_prune_ofo_queue(struct sock *sk); | 4512 | static bool tcp_prune_ofo_queue(struct sock *sk); |
4428 | static int tcp_prune_queue(struct sock *sk); | 4513 | static int tcp_prune_queue(struct sock *sk); |
4429 | 4514 | ||
4430 | static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | 4515 | static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) |
4431 | { | 4516 | { |
4432 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 4517 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
4433 | !sk_rmem_schedule(sk, size)) { | 4518 | !sk_rmem_schedule(sk, size)) { |
@@ -4446,6 +4531,41 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | |||
4446 | return 0; | 4531 | return 0; |
4447 | } | 4532 | } |
4448 | 4533 | ||
4534 | /** | ||
4535 | * tcp_try_coalesce - try to merge skb to prior one | ||
4536 | * @sk: socket | ||
4537 | * @to: prior buffer | ||
4538 | * @from: buffer to add in queue | ||
4539 | * @fragstolen: pointer to boolean | ||
4540 | * | ||
4541 | * Before queueing skb @from after @to, try to merge them | ||
4542 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4543 | * Packets in ofo or receive queues can stay a long time. | ||
4544 | * Better try to coalesce them right now to avoid future collapses. | ||
4545 | * Returns true if caller should free @from instead of queueing it | ||
4546 | */ | ||
4547 | static bool tcp_try_coalesce(struct sock *sk, | ||
4548 | struct sk_buff *to, | ||
4549 | struct sk_buff *from, | ||
4550 | bool *fragstolen) | ||
4551 | { | ||
4552 | int delta; | ||
4553 | |||
4554 | *fragstolen = false; | ||
4555 | |||
4556 | if (tcp_hdr(from)->fin) | ||
4557 | return false; | ||
4558 | if (!skb_try_coalesce(to, from, fragstolen, &delta)) | ||
4559 | return false; | ||
4560 | |||
4561 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4562 | sk_mem_charge(sk, delta); | ||
4563 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4564 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4565 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4566 | return true; | ||
4567 | } | ||
4568 | |||
4449 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | 4569 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) |
4450 | { | 4570 | { |
4451 | struct tcp_sock *tp = tcp_sk(sk); | 4571 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4484,23 +4604,13 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4484 | end_seq = TCP_SKB_CB(skb)->end_seq; | 4604 | end_seq = TCP_SKB_CB(skb)->end_seq; |
4485 | 4605 | ||
4486 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | 4606 | if (seq == TCP_SKB_CB(skb1)->end_seq) { |
4487 | /* Packets in ofo can stay in queue a long time. | 4607 | bool fragstolen; |
4488 | * Better try to coalesce them right now | 4608 | |
4489 | * to avoid future tcp_collapse_ofo_queue(), | 4609 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { |
4490 | * probably the most expensive function in tcp stack. | ||
4491 | */ | ||
4492 | if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { | ||
4493 | NET_INC_STATS_BH(sock_net(sk), | ||
4494 | LINUX_MIB_TCPRCVCOALESCE); | ||
4495 | BUG_ON(skb_copy_bits(skb, 0, | ||
4496 | skb_put(skb1, skb->len), | ||
4497 | skb->len)); | ||
4498 | TCP_SKB_CB(skb1)->end_seq = end_seq; | ||
4499 | TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; | ||
4500 | __kfree_skb(skb); | ||
4501 | skb = NULL; | ||
4502 | } else { | ||
4503 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | 4610 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4611 | } else { | ||
4612 | kfree_skb_partial(skb, fragstolen); | ||
4613 | skb = NULL; | ||
4504 | } | 4614 | } |
4505 | 4615 | ||
4506 | if (!tp->rx_opt.num_sacks || | 4616 | if (!tp->rx_opt.num_sacks || |
@@ -4576,12 +4686,65 @@ end: | |||
4576 | skb_set_owner_r(skb, sk); | 4686 | skb_set_owner_r(skb, sk); |
4577 | } | 4687 | } |
4578 | 4688 | ||
4689 | static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, | ||
4690 | bool *fragstolen) | ||
4691 | { | ||
4692 | int eaten; | ||
4693 | struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); | ||
4694 | |||
4695 | __skb_pull(skb, hdrlen); | ||
4696 | eaten = (tail && | ||
4697 | tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; | ||
4698 | tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
4699 | if (!eaten) { | ||
4700 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4701 | skb_set_owner_r(skb, sk); | ||
4702 | } | ||
4703 | return eaten; | ||
4704 | } | ||
4705 | |||
4706 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | ||
4707 | { | ||
4708 | struct sk_buff *skb; | ||
4709 | struct tcphdr *th; | ||
4710 | bool fragstolen; | ||
4711 | |||
4712 | if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) | ||
4713 | goto err; | ||
4714 | |||
4715 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); | ||
4716 | if (!skb) | ||
4717 | goto err; | ||
4718 | |||
4719 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); | ||
4720 | skb_reset_transport_header(skb); | ||
4721 | memset(th, 0, sizeof(*th)); | ||
4722 | |||
4723 | if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size)) | ||
4724 | goto err_free; | ||
4725 | |||
4726 | TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; | ||
4727 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size; | ||
4728 | TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1; | ||
4729 | |||
4730 | if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) { | ||
4731 | WARN_ON_ONCE(fragstolen); /* should not happen */ | ||
4732 | __kfree_skb(skb); | ||
4733 | } | ||
4734 | return size; | ||
4735 | |||
4736 | err_free: | ||
4737 | kfree_skb(skb); | ||
4738 | err: | ||
4739 | return -ENOMEM; | ||
4740 | } | ||
4579 | 4741 | ||
4580 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 4742 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
4581 | { | 4743 | { |
4582 | const struct tcphdr *th = tcp_hdr(skb); | 4744 | const struct tcphdr *th = tcp_hdr(skb); |
4583 | struct tcp_sock *tp = tcp_sk(sk); | 4745 | struct tcp_sock *tp = tcp_sk(sk); |
4584 | int eaten = -1; | 4746 | int eaten = -1; |
4747 | bool fragstolen = false; | ||
4585 | 4748 | ||
4586 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4749 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
4587 | goto drop; | 4750 | goto drop; |
@@ -4626,8 +4789,7 @@ queue_and_out: | |||
4626 | tcp_try_rmem_schedule(sk, skb->truesize)) | 4789 | tcp_try_rmem_schedule(sk, skb->truesize)) |
4627 | goto drop; | 4790 | goto drop; |
4628 | 4791 | ||
4629 | skb_set_owner_r(skb, sk); | 4792 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
4630 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4631 | } | 4793 | } |
4632 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4794 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4633 | if (skb->len) | 4795 | if (skb->len) |
@@ -4651,7 +4813,7 @@ queue_and_out: | |||
4651 | tcp_fast_path_check(sk); | 4813 | tcp_fast_path_check(sk); |
4652 | 4814 | ||
4653 | if (eaten > 0) | 4815 | if (eaten > 0) |
4654 | __kfree_skb(skb); | 4816 | kfree_skb_partial(skb, fragstolen); |
4655 | else if (!sock_flag(sk, SOCK_DEAD)) | 4817 | else if (!sock_flag(sk, SOCK_DEAD)) |
4656 | sk->sk_data_ready(sk, 0); | 4818 | sk->sk_data_ready(sk, 0); |
4657 | return; | 4819 | return; |
@@ -4871,10 +5033,10 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
4871 | * Purge the out-of-order queue. | 5033 | * Purge the out-of-order queue. |
4872 | * Return true if queue was pruned. | 5034 | * Return true if queue was pruned. |
4873 | */ | 5035 | */ |
4874 | static int tcp_prune_ofo_queue(struct sock *sk) | 5036 | static bool tcp_prune_ofo_queue(struct sock *sk) |
4875 | { | 5037 | { |
4876 | struct tcp_sock *tp = tcp_sk(sk); | 5038 | struct tcp_sock *tp = tcp_sk(sk); |
4877 | int res = 0; | 5039 | bool res = false; |
4878 | 5040 | ||
4879 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | 5041 | if (!skb_queue_empty(&tp->out_of_order_queue)) { |
4880 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); | 5042 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); |
@@ -4888,7 +5050,7 @@ static int tcp_prune_ofo_queue(struct sock *sk) | |||
4888 | if (tp->rx_opt.sack_ok) | 5050 | if (tp->rx_opt.sack_ok) |
4889 | tcp_sack_reset(&tp->rx_opt); | 5051 | tcp_sack_reset(&tp->rx_opt); |
4890 | sk_mem_reclaim(sk); | 5052 | sk_mem_reclaim(sk); |
4891 | res = 1; | 5053 | res = true; |
4892 | } | 5054 | } |
4893 | return res; | 5055 | return res; |
4894 | } | 5056 | } |
@@ -4965,7 +5127,7 @@ void tcp_cwnd_application_limited(struct sock *sk) | |||
4965 | tp->snd_cwnd_stamp = tcp_time_stamp; | 5127 | tp->snd_cwnd_stamp = tcp_time_stamp; |
4966 | } | 5128 | } |
4967 | 5129 | ||
4968 | static int tcp_should_expand_sndbuf(const struct sock *sk) | 5130 | static bool tcp_should_expand_sndbuf(const struct sock *sk) |
4969 | { | 5131 | { |
4970 | const struct tcp_sock *tp = tcp_sk(sk); | 5132 | const struct tcp_sock *tp = tcp_sk(sk); |
4971 | 5133 | ||
@@ -4973,21 +5135,21 @@ static int tcp_should_expand_sndbuf(const struct sock *sk) | |||
4973 | * not modify it. | 5135 | * not modify it. |
4974 | */ | 5136 | */ |
4975 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) | 5137 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) |
4976 | return 0; | 5138 | return false; |
4977 | 5139 | ||
4978 | /* If we are under global TCP memory pressure, do not expand. */ | 5140 | /* If we are under global TCP memory pressure, do not expand. */ |
4979 | if (sk_under_memory_pressure(sk)) | 5141 | if (sk_under_memory_pressure(sk)) |
4980 | return 0; | 5142 | return false; |
4981 | 5143 | ||
4982 | /* If we are under soft global TCP memory pressure, do not expand. */ | 5144 | /* If we are under soft global TCP memory pressure, do not expand. */ |
4983 | if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) | 5145 | if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) |
4984 | return 0; | 5146 | return false; |
4985 | 5147 | ||
4986 | /* If we filled the congestion window, do not expand. */ | 5148 | /* If we filled the congestion window, do not expand. */ |
4987 | if (tp->packets_out >= tp->snd_cwnd) | 5149 | if (tp->packets_out >= tp->snd_cwnd) |
4988 | return 0; | 5150 | return false; |
4989 | 5151 | ||
4990 | return 1; | 5152 | return true; |
4991 | } | 5153 | } |
4992 | 5154 | ||
4993 | /* When incoming ACK allowed to free some skb from write_queue, | 5155 | /* When incoming ACK allowed to free some skb from write_queue, |
@@ -5213,19 +5375,19 @@ static inline int tcp_checksum_complete_user(struct sock *sk, | |||
5213 | } | 5375 | } |
5214 | 5376 | ||
5215 | #ifdef CONFIG_NET_DMA | 5377 | #ifdef CONFIG_NET_DMA |
5216 | static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, | 5378 | static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, |
5217 | int hlen) | 5379 | int hlen) |
5218 | { | 5380 | { |
5219 | struct tcp_sock *tp = tcp_sk(sk); | 5381 | struct tcp_sock *tp = tcp_sk(sk); |
5220 | int chunk = skb->len - hlen; | 5382 | int chunk = skb->len - hlen; |
5221 | int dma_cookie; | 5383 | int dma_cookie; |
5222 | int copied_early = 0; | 5384 | bool copied_early = false; |
5223 | 5385 | ||
5224 | if (tp->ucopy.wakeup) | 5386 | if (tp->ucopy.wakeup) |
5225 | return 0; | 5387 | return false; |
5226 | 5388 | ||
5227 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 5389 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
5228 | tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); | 5390 | tp->ucopy.dma_chan = net_dma_find_channel(); |
5229 | 5391 | ||
5230 | if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { | 5392 | if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { |
5231 | 5393 | ||
@@ -5238,7 +5400,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, | |||
5238 | goto out; | 5400 | goto out; |
5239 | 5401 | ||
5240 | tp->ucopy.dma_cookie = dma_cookie; | 5402 | tp->ucopy.dma_cookie = dma_cookie; |
5241 | copied_early = 1; | 5403 | copied_early = true; |
5242 | 5404 | ||
5243 | tp->ucopy.len -= chunk; | 5405 | tp->ucopy.len -= chunk; |
5244 | tp->copied_seq += chunk; | 5406 | tp->copied_seq += chunk; |
@@ -5430,6 +5592,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5430 | } else { | 5592 | } else { |
5431 | int eaten = 0; | 5593 | int eaten = 0; |
5432 | int copied_early = 0; | 5594 | int copied_early = 0; |
5595 | bool fragstolen = false; | ||
5433 | 5596 | ||
5434 | if (tp->copied_seq == tp->rcv_nxt && | 5597 | if (tp->copied_seq == tp->rcv_nxt && |
5435 | len - tcp_header_len <= tp->ucopy.len) { | 5598 | len - tcp_header_len <= tp->ucopy.len) { |
@@ -5487,10 +5650,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5487 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); | 5650 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); |
5488 | 5651 | ||
5489 | /* Bulk data transfer: receiver */ | 5652 | /* Bulk data transfer: receiver */ |
5490 | __skb_pull(skb, tcp_header_len); | 5653 | eaten = tcp_queue_rcv(sk, skb, tcp_header_len, |
5491 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 5654 | &fragstolen); |
5492 | skb_set_owner_r(skb, sk); | ||
5493 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
5494 | } | 5655 | } |
5495 | 5656 | ||
5496 | tcp_event_data_recv(sk, skb); | 5657 | tcp_event_data_recv(sk, skb); |
@@ -5512,7 +5673,7 @@ no_ack: | |||
5512 | else | 5673 | else |
5513 | #endif | 5674 | #endif |
5514 | if (eaten) | 5675 | if (eaten) |
5515 | __kfree_skb(skb); | 5676 | kfree_skb_partial(skb, fragstolen); |
5516 | else | 5677 | else |
5517 | sk->sk_data_ready(sk, 0); | 5678 | sk->sk_data_ready(sk, 0); |
5518 | return 0; | 5679 | return 0; |
@@ -5556,6 +5717,44 @@ discard: | |||
5556 | } | 5717 | } |
5557 | EXPORT_SYMBOL(tcp_rcv_established); | 5718 | EXPORT_SYMBOL(tcp_rcv_established); |
5558 | 5719 | ||
5720 | void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | ||
5721 | { | ||
5722 | struct tcp_sock *tp = tcp_sk(sk); | ||
5723 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
5724 | |||
5725 | tcp_set_state(sk, TCP_ESTABLISHED); | ||
5726 | |||
5727 | if (skb != NULL) | ||
5728 | security_inet_conn_established(sk, skb); | ||
5729 | |||
5730 | /* Make sure socket is routed, for correct metrics. */ | ||
5731 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5732 | |||
5733 | tcp_init_metrics(sk); | ||
5734 | |||
5735 | tcp_init_congestion_control(sk); | ||
5736 | |||
5737 | /* Prevent spurious tcp_cwnd_restart() on first data | ||
5738 | * packet. | ||
5739 | */ | ||
5740 | tp->lsndtime = tcp_time_stamp; | ||
5741 | |||
5742 | tcp_init_buffer_space(sk); | ||
5743 | |||
5744 | if (sock_flag(sk, SOCK_KEEPOPEN)) | ||
5745 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); | ||
5746 | |||
5747 | if (!tp->rx_opt.snd_wscale) | ||
5748 | __tcp_fast_path_on(tp, tp->snd_wnd); | ||
5749 | else | ||
5750 | tp->pred_flags = 0; | ||
5751 | |||
5752 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
5753 | sk->sk_state_change(sk); | ||
5754 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); | ||
5755 | } | ||
5756 | } | ||
5757 | |||
5559 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5758 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5560 | const struct tcphdr *th, unsigned int len) | 5759 | const struct tcphdr *th, unsigned int len) |
5561 | { | 5760 | { |
@@ -5688,36 +5887,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5688 | } | 5887 | } |
5689 | 5888 | ||
5690 | smp_mb(); | 5889 | smp_mb(); |
5691 | tcp_set_state(sk, TCP_ESTABLISHED); | ||
5692 | |||
5693 | security_inet_conn_established(sk, skb); | ||
5694 | |||
5695 | /* Make sure socket is routed, for correct metrics. */ | ||
5696 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5697 | 5890 | ||
5698 | tcp_init_metrics(sk); | 5891 | tcp_finish_connect(sk, skb); |
5699 | |||
5700 | tcp_init_congestion_control(sk); | ||
5701 | |||
5702 | /* Prevent spurious tcp_cwnd_restart() on first data | ||
5703 | * packet. | ||
5704 | */ | ||
5705 | tp->lsndtime = tcp_time_stamp; | ||
5706 | |||
5707 | tcp_init_buffer_space(sk); | ||
5708 | |||
5709 | if (sock_flag(sk, SOCK_KEEPOPEN)) | ||
5710 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); | ||
5711 | |||
5712 | if (!tp->rx_opt.snd_wscale) | ||
5713 | __tcp_fast_path_on(tp, tp->snd_wnd); | ||
5714 | else | ||
5715 | tp->pred_flags = 0; | ||
5716 | |||
5717 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
5718 | sk->sk_state_change(sk); | ||
5719 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); | ||
5720 | } | ||
5721 | 5892 | ||
5722 | if (sk->sk_write_pending || | 5893 | if (sk->sk_write_pending || |
5723 | icsk->icsk_accept_queue.rskq_defer_accept || | 5894 | icsk->icsk_accept_queue.rskq_defer_accept || |
@@ -5731,8 +5902,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5731 | */ | 5902 | */ |
5732 | inet_csk_schedule_ack(sk); | 5903 | inet_csk_schedule_ack(sk); |
5733 | icsk->icsk_ack.lrcvtime = tcp_time_stamp; | 5904 | icsk->icsk_ack.lrcvtime = tcp_time_stamp; |
5734 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
5735 | tcp_incr_quickack(sk); | ||
5736 | tcp_enter_quickack_mode(sk); | 5905 | tcp_enter_quickack_mode(sk); |
5737 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 5906 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
5738 | TCP_DELACK_MAX, TCP_RTO_MAX); | 5907 | TCP_DELACK_MAX, TCP_RTO_MAX); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3a25cf743f8b..a43b87dfe800 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
138 | } | 138 | } |
139 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); | 139 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); |
140 | 140 | ||
141 | static int tcp_repair_connect(struct sock *sk) | ||
142 | { | ||
143 | tcp_connect_init(sk); | ||
144 | tcp_finish_connect(sk, NULL); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
141 | /* This will initiate an outgoing connection. */ | 149 | /* This will initiate an outgoing connection. */ |
142 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 150 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
143 | { | 151 | { |
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
196 | /* Reset inherited state */ | 204 | /* Reset inherited state */ |
197 | tp->rx_opt.ts_recent = 0; | 205 | tp->rx_opt.ts_recent = 0; |
198 | tp->rx_opt.ts_recent_stamp = 0; | 206 | tp->rx_opt.ts_recent_stamp = 0; |
199 | tp->write_seq = 0; | 207 | if (likely(!tp->repair)) |
208 | tp->write_seq = 0; | ||
200 | } | 209 | } |
201 | 210 | ||
202 | if (tcp_death_row.sysctl_tw_recycle && | 211 | if (tcp_death_row.sysctl_tw_recycle && |
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
247 | sk->sk_gso_type = SKB_GSO_TCPV4; | 256 | sk->sk_gso_type = SKB_GSO_TCPV4; |
248 | sk_setup_caps(sk, &rt->dst); | 257 | sk_setup_caps(sk, &rt->dst); |
249 | 258 | ||
250 | if (!tp->write_seq) | 259 | if (!tp->write_seq && likely(!tp->repair)) |
251 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, | 260 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
252 | inet->inet_daddr, | 261 | inet->inet_daddr, |
253 | inet->inet_sport, | 262 | inet->inet_sport, |
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
255 | 264 | ||
256 | inet->inet_id = tp->write_seq ^ jiffies; | 265 | inet->inet_id = tp->write_seq ^ jiffies; |
257 | 266 | ||
258 | err = tcp_connect(sk); | 267 | if (likely(!tp->repair)) |
268 | err = tcp_connect(sk); | ||
269 | else | ||
270 | err = tcp_repair_connect(sk); | ||
271 | |||
259 | rt = NULL; | 272 | rt = NULL; |
260 | if (err) | 273 | if (err) |
261 | goto failure; | 274 | goto failure; |
@@ -853,14 +866,14 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
853 | } | 866 | } |
854 | 867 | ||
855 | /* | 868 | /* |
856 | * Return 1 if a syncookie should be sent | 869 | * Return true if a syncookie should be sent |
857 | */ | 870 | */ |
858 | int tcp_syn_flood_action(struct sock *sk, | 871 | bool tcp_syn_flood_action(struct sock *sk, |
859 | const struct sk_buff *skb, | 872 | const struct sk_buff *skb, |
860 | const char *proto) | 873 | const char *proto) |
861 | { | 874 | { |
862 | const char *msg = "Dropping request"; | 875 | const char *msg = "Dropping request"; |
863 | int want_cookie = 0; | 876 | bool want_cookie = false; |
864 | struct listen_sock *lopt; | 877 | struct listen_sock *lopt; |
865 | 878 | ||
866 | 879 | ||
@@ -868,7 +881,7 @@ int tcp_syn_flood_action(struct sock *sk, | |||
868 | #ifdef CONFIG_SYN_COOKIES | 881 | #ifdef CONFIG_SYN_COOKIES |
869 | if (sysctl_tcp_syncookies) { | 882 | if (sysctl_tcp_syncookies) { |
870 | msg = "Sending cookies"; | 883 | msg = "Sending cookies"; |
871 | want_cookie = 1; | 884 | want_cookie = true; |
872 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); | 885 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); |
873 | } else | 886 | } else |
874 | #endif | 887 | #endif |
@@ -1183,7 +1196,7 @@ clear_hash_noput: | |||
1183 | } | 1196 | } |
1184 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); | 1197 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); |
1185 | 1198 | ||
1186 | static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | 1199 | static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) |
1187 | { | 1200 | { |
1188 | /* | 1201 | /* |
1189 | * This gets called for each TCP segment that arrives | 1202 | * This gets called for each TCP segment that arrives |
@@ -1206,16 +1219,16 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | |||
1206 | 1219 | ||
1207 | /* We've parsed the options - do we have a hash? */ | 1220 | /* We've parsed the options - do we have a hash? */ |
1208 | if (!hash_expected && !hash_location) | 1221 | if (!hash_expected && !hash_location) |
1209 | return 0; | 1222 | return false; |
1210 | 1223 | ||
1211 | if (hash_expected && !hash_location) { | 1224 | if (hash_expected && !hash_location) { |
1212 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); | 1225 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); |
1213 | return 1; | 1226 | return true; |
1214 | } | 1227 | } |
1215 | 1228 | ||
1216 | if (!hash_expected && hash_location) { | 1229 | if (!hash_expected && hash_location) { |
1217 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); | 1230 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); |
1218 | return 1; | 1231 | return true; |
1219 | } | 1232 | } |
1220 | 1233 | ||
1221 | /* Okay, so this is hash_expected and hash_location - | 1234 | /* Okay, so this is hash_expected and hash_location - |
@@ -1226,15 +1239,14 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | |||
1226 | NULL, NULL, skb); | 1239 | NULL, NULL, skb); |
1227 | 1240 | ||
1228 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { | 1241 | if (genhash || memcmp(hash_location, newhash, 16) != 0) { |
1229 | if (net_ratelimit()) { | 1242 | net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", |
1230 | pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", | 1243 | &iph->saddr, ntohs(th->source), |
1231 | &iph->saddr, ntohs(th->source), | 1244 | &iph->daddr, ntohs(th->dest), |
1232 | &iph->daddr, ntohs(th->dest), | 1245 | genhash ? " tcp_v4_calc_md5_hash failed" |
1233 | genhash ? " tcp_v4_calc_md5_hash failed" : ""); | 1246 | : ""); |
1234 | } | 1247 | return true; |
1235 | return 1; | ||
1236 | } | 1248 | } |
1237 | return 0; | 1249 | return false; |
1238 | } | 1250 | } |
1239 | 1251 | ||
1240 | #endif | 1252 | #endif |
@@ -1268,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1268 | __be32 saddr = ip_hdr(skb)->saddr; | 1280 | __be32 saddr = ip_hdr(skb)->saddr; |
1269 | __be32 daddr = ip_hdr(skb)->daddr; | 1281 | __be32 daddr = ip_hdr(skb)->daddr; |
1270 | __u32 isn = TCP_SKB_CB(skb)->when; | 1282 | __u32 isn = TCP_SKB_CB(skb)->when; |
1271 | int want_cookie = 0; | 1283 | bool want_cookie = false; |
1272 | 1284 | ||
1273 | /* Never answer to SYNs send to broadcast or multicast */ | 1285 | /* Never answer to SYNs send to broadcast or multicast */ |
1274 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1286 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
@@ -1327,7 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1327 | while (l-- > 0) | 1339 | while (l-- > 0) |
1328 | *c++ ^= *hash_location++; | 1340 | *c++ ^= *hash_location++; |
1329 | 1341 | ||
1330 | want_cookie = 0; /* not our kind of cookie */ | 1342 | want_cookie = false; /* not our kind of cookie */ |
1331 | tmp_ext.cookie_out_never = 0; /* false */ | 1343 | tmp_ext.cookie_out_never = 0; /* false */ |
1332 | tmp_ext.cookie_plus = tmp_opt.cookie_plus; | 1344 | tmp_ext.cookie_plus = tmp_opt.cookie_plus; |
1333 | } else if (!tp->rx_opt.cookie_in_always) { | 1345 | } else if (!tp->rx_opt.cookie_in_always) { |
@@ -1355,7 +1367,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1355 | goto drop_and_free; | 1367 | goto drop_and_free; |
1356 | 1368 | ||
1357 | if (!want_cookie || tmp_opt.tstamp_ok) | 1369 | if (!want_cookie || tmp_opt.tstamp_ok) |
1358 | TCP_ECN_create_request(req, tcp_hdr(skb)); | 1370 | TCP_ECN_create_request(req, skb); |
1359 | 1371 | ||
1360 | if (want_cookie) { | 1372 | if (want_cookie) { |
1361 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1373 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
@@ -1730,7 +1742,7 @@ process: | |||
1730 | #ifdef CONFIG_NET_DMA | 1742 | #ifdef CONFIG_NET_DMA |
1731 | struct tcp_sock *tp = tcp_sk(sk); | 1743 | struct tcp_sock *tp = tcp_sk(sk); |
1732 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 1744 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
1733 | tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); | 1745 | tp->ucopy.dma_chan = net_dma_find_channel(); |
1734 | if (tp->ucopy.dma_chan) | 1746 | if (tp->ucopy.dma_chan) |
1735 | ret = tcp_v4_do_rcv(sk, skb); | 1747 | ret = tcp_v4_do_rcv(sk, skb); |
1736 | else | 1748 | else |
@@ -1739,7 +1751,8 @@ process: | |||
1739 | if (!tcp_prequeue(sk, skb)) | 1751 | if (!tcp_prequeue(sk, skb)) |
1740 | ret = tcp_v4_do_rcv(sk, skb); | 1752 | ret = tcp_v4_do_rcv(sk, skb); |
1741 | } | 1753 | } |
1742 | } else if (unlikely(sk_add_backlog(sk, skb))) { | 1754 | } else if (unlikely(sk_add_backlog(sk, skb, |
1755 | sk->sk_rcvbuf + sk->sk_sndbuf))) { | ||
1743 | bh_unlock_sock(sk); | 1756 | bh_unlock_sock(sk); |
1744 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); | 1757 | NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); |
1745 | goto discard_and_relse; | 1758 | goto discard_and_relse; |
@@ -1875,64 +1888,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { | |||
1875 | static int tcp_v4_init_sock(struct sock *sk) | 1888 | static int tcp_v4_init_sock(struct sock *sk) |
1876 | { | 1889 | { |
1877 | struct inet_connection_sock *icsk = inet_csk(sk); | 1890 | struct inet_connection_sock *icsk = inet_csk(sk); |
1878 | struct tcp_sock *tp = tcp_sk(sk); | ||
1879 | 1891 | ||
1880 | skb_queue_head_init(&tp->out_of_order_queue); | 1892 | tcp_init_sock(sk); |
1881 | tcp_init_xmit_timers(sk); | ||
1882 | tcp_prequeue_init(tp); | ||
1883 | |||
1884 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
1885 | tp->mdev = TCP_TIMEOUT_INIT; | ||
1886 | |||
1887 | /* So many TCP implementations out there (incorrectly) count the | ||
1888 | * initial SYN frame in their delayed-ACK and congestion control | ||
1889 | * algorithms that we must have the following bandaid to talk | ||
1890 | * efficiently to them. -DaveM | ||
1891 | */ | ||
1892 | tp->snd_cwnd = TCP_INIT_CWND; | ||
1893 | |||
1894 | /* See draft-stevens-tcpca-spec-01 for discussion of the | ||
1895 | * initialization of these values. | ||
1896 | */ | ||
1897 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
1898 | tp->snd_cwnd_clamp = ~0; | ||
1899 | tp->mss_cache = TCP_MSS_DEFAULT; | ||
1900 | |||
1901 | tp->reordering = sysctl_tcp_reordering; | ||
1902 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
1903 | |||
1904 | sk->sk_state = TCP_CLOSE; | ||
1905 | |||
1906 | sk->sk_write_space = sk_stream_write_space; | ||
1907 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | ||
1908 | 1893 | ||
1909 | icsk->icsk_af_ops = &ipv4_specific; | 1894 | icsk->icsk_af_ops = &ipv4_specific; |
1910 | icsk->icsk_sync_mss = tcp_sync_mss; | 1895 | |
1911 | #ifdef CONFIG_TCP_MD5SIG | 1896 | #ifdef CONFIG_TCP_MD5SIG |
1912 | tp->af_specific = &tcp_sock_ipv4_specific; | 1897 | tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; |
1913 | #endif | 1898 | #endif |
1914 | 1899 | ||
1915 | /* TCP Cookie Transactions */ | ||
1916 | if (sysctl_tcp_cookie_size > 0) { | ||
1917 | /* Default, cookies without s_data_payload. */ | ||
1918 | tp->cookie_values = | ||
1919 | kzalloc(sizeof(*tp->cookie_values), | ||
1920 | sk->sk_allocation); | ||
1921 | if (tp->cookie_values != NULL) | ||
1922 | kref_init(&tp->cookie_values->kref); | ||
1923 | } | ||
1924 | /* Presumed zeroed, in order of appearance: | ||
1925 | * cookie_in_always, cookie_out_never, | ||
1926 | * s_data_constant, s_data_in, s_data_out | ||
1927 | */ | ||
1928 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | ||
1929 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | ||
1930 | |||
1931 | local_bh_disable(); | ||
1932 | sock_update_memcg(sk); | ||
1933 | sk_sockets_allocated_inc(sk); | ||
1934 | local_bh_enable(); | ||
1935 | |||
1936 | return 0; | 1900 | return 0; |
1937 | } | 1901 | } |
1938 | 1902 | ||
@@ -2109,7 +2073,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
2109 | return rc; | 2073 | return rc; |
2110 | } | 2074 | } |
2111 | 2075 | ||
2112 | static inline int empty_bucket(struct tcp_iter_state *st) | 2076 | static inline bool empty_bucket(struct tcp_iter_state *st) |
2113 | { | 2077 | { |
2114 | return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && | 2078 | return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && |
2115 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | 2079 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3cabafb5cdd1..b85d9fe7d663 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -55,7 +55,7 @@ EXPORT_SYMBOL_GPL(tcp_death_row); | |||
55 | * state. | 55 | * state. |
56 | */ | 56 | */ |
57 | 57 | ||
58 | static int tcp_remember_stamp(struct sock *sk) | 58 | static bool tcp_remember_stamp(struct sock *sk) |
59 | { | 59 | { |
60 | const struct inet_connection_sock *icsk = inet_csk(sk); | 60 | const struct inet_connection_sock *icsk = inet_csk(sk); |
61 | struct tcp_sock *tp = tcp_sk(sk); | 61 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -72,13 +72,13 @@ static int tcp_remember_stamp(struct sock *sk) | |||
72 | } | 72 | } |
73 | if (release_it) | 73 | if (release_it) |
74 | inet_putpeer(peer); | 74 | inet_putpeer(peer); |
75 | return 1; | 75 | return true; |
76 | } | 76 | } |
77 | 77 | ||
78 | return 0; | 78 | return false; |
79 | } | 79 | } |
80 | 80 | ||
81 | static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | 81 | static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) |
82 | { | 82 | { |
83 | struct sock *sk = (struct sock *) tw; | 83 | struct sock *sk = (struct sock *) tw; |
84 | struct inet_peer *peer; | 84 | struct inet_peer *peer; |
@@ -94,17 +94,17 @@ static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | |||
94 | peer->tcp_ts = tcptw->tw_ts_recent; | 94 | peer->tcp_ts = tcptw->tw_ts_recent; |
95 | } | 95 | } |
96 | inet_putpeer(peer); | 96 | inet_putpeer(peer); |
97 | return 1; | 97 | return true; |
98 | } | 98 | } |
99 | return 0; | 99 | return false; |
100 | } | 100 | } |
101 | 101 | ||
102 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 102 | static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
103 | { | 103 | { |
104 | if (seq == s_win) | 104 | if (seq == s_win) |
105 | return 1; | 105 | return true; |
106 | if (after(end_seq, s_win) && before(seq, e_win)) | 106 | if (after(end_seq, s_win) && before(seq, e_win)) |
107 | return 1; | 107 | return true; |
108 | return seq == e_win && seq == end_seq; | 108 | return seq == e_win && seq == end_seq; |
109 | } | 109 | } |
110 | 110 | ||
@@ -143,7 +143,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
143 | struct tcp_options_received tmp_opt; | 143 | struct tcp_options_received tmp_opt; |
144 | const u8 *hash_location; | 144 | const u8 *hash_location; |
145 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 145 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
146 | int paws_reject = 0; | 146 | bool paws_reject = false; |
147 | 147 | ||
148 | tmp_opt.saw_tstamp = 0; | 148 | tmp_opt.saw_tstamp = 0; |
149 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { | 149 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { |
@@ -316,7 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
316 | struct inet_timewait_sock *tw = NULL; | 316 | struct inet_timewait_sock *tw = NULL; |
317 | const struct inet_connection_sock *icsk = inet_csk(sk); | 317 | const struct inet_connection_sock *icsk = inet_csk(sk); |
318 | const struct tcp_sock *tp = tcp_sk(sk); | 318 | const struct tcp_sock *tp = tcp_sk(sk); |
319 | int recycle_ok = 0; | 319 | bool recycle_ok = false; |
320 | 320 | ||
321 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) | 321 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
322 | recycle_ok = tcp_remember_stamp(sk); | 322 | recycle_ok = tcp_remember_stamp(sk); |
@@ -482,6 +482,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
482 | newtp->sacked_out = 0; | 482 | newtp->sacked_out = 0; |
483 | newtp->fackets_out = 0; | 483 | newtp->fackets_out = 0; |
484 | newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 484 | newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
485 | tcp_enable_early_retrans(newtp); | ||
485 | 486 | ||
486 | /* So many TCP implementations out there (incorrectly) count the | 487 | /* So many TCP implementations out there (incorrectly) count the |
487 | * initial SYN frame in their delayed-ACK and congestion control | 488 | * initial SYN frame in their delayed-ACK and congestion control |
@@ -574,7 +575,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
574 | struct sock *child; | 575 | struct sock *child; |
575 | const struct tcphdr *th = tcp_hdr(skb); | 576 | const struct tcphdr *th = tcp_hdr(skb); |
576 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 577 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
577 | int paws_reject = 0; | 578 | bool paws_reject = false; |
578 | 579 | ||
579 | tmp_opt.saw_tstamp = 0; | 580 | tmp_opt.saw_tstamp = 0; |
580 | if (th->doff > (sizeof(struct tcphdr)>>2)) { | 581 | if (th->doff > (sizeof(struct tcphdr)>>2)) { |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 364784a91939..803cbfe82fbc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -34,6 +34,8 @@ | |||
34 | * | 34 | * |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #define pr_fmt(fmt) "TCP: " fmt | ||
38 | |||
37 | #include <net/tcp.h> | 39 | #include <net/tcp.h> |
38 | 40 | ||
39 | #include <linux/compiler.h> | 41 | #include <linux/compiler.h> |
@@ -78,9 +80,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | |||
78 | tp->frto_counter = 3; | 80 | tp->frto_counter = 3; |
79 | 81 | ||
80 | tp->packets_out += tcp_skb_pcount(skb); | 82 | tp->packets_out += tcp_skb_pcount(skb); |
81 | if (!prior_packets) | 83 | if (!prior_packets || tp->early_retrans_delayed) |
82 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 84 | tcp_rearm_rto(sk); |
83 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
84 | } | 85 | } |
85 | 86 | ||
86 | /* SND.NXT, if window was not shrunk. | 87 | /* SND.NXT, if window was not shrunk. |
@@ -369,7 +370,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
369 | TCP_SKB_CB(skb)->end_seq = seq; | 370 | TCP_SKB_CB(skb)->end_seq = seq; |
370 | } | 371 | } |
371 | 372 | ||
372 | static inline int tcp_urg_mode(const struct tcp_sock *tp) | 373 | static inline bool tcp_urg_mode(const struct tcp_sock *tp) |
373 | { | 374 | { |
374 | return tp->snd_una != tp->snd_up; | 375 | return tp->snd_una != tp->snd_up; |
375 | } | 376 | } |
@@ -563,13 +564,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
563 | /* Compute TCP options for SYN packets. This is not the final | 564 | /* Compute TCP options for SYN packets. This is not the final |
564 | * network wire format yet. | 565 | * network wire format yet. |
565 | */ | 566 | */ |
566 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | 567 | static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
567 | struct tcp_out_options *opts, | 568 | struct tcp_out_options *opts, |
568 | struct tcp_md5sig_key **md5) | 569 | struct tcp_md5sig_key **md5) |
569 | { | 570 | { |
570 | struct tcp_sock *tp = tcp_sk(sk); | 571 | struct tcp_sock *tp = tcp_sk(sk); |
571 | struct tcp_cookie_values *cvp = tp->cookie_values; | 572 | struct tcp_cookie_values *cvp = tp->cookie_values; |
572 | unsigned remaining = MAX_TCP_OPTION_SPACE; | 573 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
573 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | 574 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? |
574 | tcp_cookie_size_check(cvp->cookie_desired) : | 575 | tcp_cookie_size_check(cvp->cookie_desired) : |
575 | 0; | 576 | 0; |
@@ -663,15 +664,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
663 | } | 664 | } |
664 | 665 | ||
665 | /* Set up TCP options for SYN-ACKs. */ | 666 | /* Set up TCP options for SYN-ACKs. */ |
666 | static unsigned tcp_synack_options(struct sock *sk, | 667 | static unsigned int tcp_synack_options(struct sock *sk, |
667 | struct request_sock *req, | 668 | struct request_sock *req, |
668 | unsigned mss, struct sk_buff *skb, | 669 | unsigned int mss, struct sk_buff *skb, |
669 | struct tcp_out_options *opts, | 670 | struct tcp_out_options *opts, |
670 | struct tcp_md5sig_key **md5, | 671 | struct tcp_md5sig_key **md5, |
671 | struct tcp_extend_values *xvp) | 672 | struct tcp_extend_values *xvp) |
672 | { | 673 | { |
673 | struct inet_request_sock *ireq = inet_rsk(req); | 674 | struct inet_request_sock *ireq = inet_rsk(req); |
674 | unsigned remaining = MAX_TCP_OPTION_SPACE; | 675 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
675 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | 676 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? |
676 | xvp->cookie_plus : | 677 | xvp->cookie_plus : |
677 | 0; | 678 | 0; |
@@ -742,13 +743,13 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
742 | /* Compute TCP options for ESTABLISHED sockets. This is not the | 743 | /* Compute TCP options for ESTABLISHED sockets. This is not the |
743 | * final wire format yet. | 744 | * final wire format yet. |
744 | */ | 745 | */ |
745 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | 746 | static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, |
746 | struct tcp_out_options *opts, | 747 | struct tcp_out_options *opts, |
747 | struct tcp_md5sig_key **md5) | 748 | struct tcp_md5sig_key **md5) |
748 | { | 749 | { |
749 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | 750 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; |
750 | struct tcp_sock *tp = tcp_sk(sk); | 751 | struct tcp_sock *tp = tcp_sk(sk); |
751 | unsigned size = 0; | 752 | unsigned int size = 0; |
752 | unsigned int eff_sacks; | 753 | unsigned int eff_sacks; |
753 | 754 | ||
754 | #ifdef CONFIG_TCP_MD5SIG | 755 | #ifdef CONFIG_TCP_MD5SIG |
@@ -770,9 +771,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
770 | 771 | ||
771 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | 772 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; |
772 | if (unlikely(eff_sacks)) { | 773 | if (unlikely(eff_sacks)) { |
773 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | 774 | const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; |
774 | opts->num_sack_blocks = | 775 | opts->num_sack_blocks = |
775 | min_t(unsigned, eff_sacks, | 776 | min_t(unsigned int, eff_sacks, |
776 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | 777 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / |
777 | TCPOLEN_SACK_PERBLOCK); | 778 | TCPOLEN_SACK_PERBLOCK); |
778 | size += TCPOLEN_SACK_BASE_ALIGNED + | 779 | size += TCPOLEN_SACK_BASE_ALIGNED + |
@@ -801,7 +802,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
801 | struct tcp_sock *tp; | 802 | struct tcp_sock *tp; |
802 | struct tcp_skb_cb *tcb; | 803 | struct tcp_skb_cb *tcb; |
803 | struct tcp_out_options opts; | 804 | struct tcp_out_options opts; |
804 | unsigned tcp_options_size, tcp_header_size; | 805 | unsigned int tcp_options_size, tcp_header_size; |
805 | struct tcp_md5sig_key *md5; | 806 | struct tcp_md5sig_key *md5; |
806 | struct tcphdr *th; | 807 | struct tcphdr *th; |
807 | int err; | 808 | int err; |
@@ -1096,6 +1097,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
1096 | eat = min_t(int, len, skb_headlen(skb)); | 1097 | eat = min_t(int, len, skb_headlen(skb)); |
1097 | if (eat) { | 1098 | if (eat) { |
1098 | __skb_pull(skb, eat); | 1099 | __skb_pull(skb, eat); |
1100 | skb->avail_size -= eat; | ||
1099 | len -= eat; | 1101 | len -= eat; |
1100 | if (!len) | 1102 | if (!len) |
1101 | return; | 1103 | return; |
@@ -1149,7 +1151,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
1149 | } | 1151 | } |
1150 | 1152 | ||
1151 | /* Calculate MSS. Not accounting for SACKs here. */ | 1153 | /* Calculate MSS. Not accounting for SACKs here. */ |
1152 | int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | 1154 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) |
1153 | { | 1155 | { |
1154 | const struct tcp_sock *tp = tcp_sk(sk); | 1156 | const struct tcp_sock *tp = tcp_sk(sk); |
1155 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1157 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1160,6 +1162,14 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | |||
1160 | */ | 1162 | */ |
1161 | mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); | 1163 | mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); |
1162 | 1164 | ||
1165 | /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ | ||
1166 | if (icsk->icsk_af_ops->net_frag_header_len) { | ||
1167 | const struct dst_entry *dst = __sk_dst_get(sk); | ||
1168 | |||
1169 | if (dst && dst_allfrag(dst)) | ||
1170 | mss_now -= icsk->icsk_af_ops->net_frag_header_len; | ||
1171 | } | ||
1172 | |||
1163 | /* Clamp it (mss_clamp does not include tcp options) */ | 1173 | /* Clamp it (mss_clamp does not include tcp options) */ |
1164 | if (mss_now > tp->rx_opt.mss_clamp) | 1174 | if (mss_now > tp->rx_opt.mss_clamp) |
1165 | mss_now = tp->rx_opt.mss_clamp; | 1175 | mss_now = tp->rx_opt.mss_clamp; |
@@ -1178,7 +1188,7 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) | |||
1178 | } | 1188 | } |
1179 | 1189 | ||
1180 | /* Inverse of above */ | 1190 | /* Inverse of above */ |
1181 | int tcp_mss_to_mtu(const struct sock *sk, int mss) | 1191 | int tcp_mss_to_mtu(struct sock *sk, int mss) |
1182 | { | 1192 | { |
1183 | const struct tcp_sock *tp = tcp_sk(sk); | 1193 | const struct tcp_sock *tp = tcp_sk(sk); |
1184 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1194 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1189,6 +1199,13 @@ int tcp_mss_to_mtu(const struct sock *sk, int mss) | |||
1189 | icsk->icsk_ext_hdr_len + | 1199 | icsk->icsk_ext_hdr_len + |
1190 | icsk->icsk_af_ops->net_header_len; | 1200 | icsk->icsk_af_ops->net_header_len; |
1191 | 1201 | ||
1202 | /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ | ||
1203 | if (icsk->icsk_af_ops->net_frag_header_len) { | ||
1204 | const struct dst_entry *dst = __sk_dst_get(sk); | ||
1205 | |||
1206 | if (dst && dst_allfrag(dst)) | ||
1207 | mtu += icsk->icsk_af_ops->net_frag_header_len; | ||
1208 | } | ||
1192 | return mtu; | 1209 | return mtu; |
1193 | } | 1210 | } |
1194 | 1211 | ||
@@ -1258,7 +1275,7 @@ unsigned int tcp_current_mss(struct sock *sk) | |||
1258 | const struct tcp_sock *tp = tcp_sk(sk); | 1275 | const struct tcp_sock *tp = tcp_sk(sk); |
1259 | const struct dst_entry *dst = __sk_dst_get(sk); | 1276 | const struct dst_entry *dst = __sk_dst_get(sk); |
1260 | u32 mss_now; | 1277 | u32 mss_now; |
1261 | unsigned header_len; | 1278 | unsigned int header_len; |
1262 | struct tcp_out_options opts; | 1279 | struct tcp_out_options opts; |
1263 | struct tcp_md5sig_key *md5; | 1280 | struct tcp_md5sig_key *md5; |
1264 | 1281 | ||
@@ -1374,33 +1391,33 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, | |||
1374 | } | 1391 | } |
1375 | 1392 | ||
1376 | /* Minshall's variant of the Nagle send check. */ | 1393 | /* Minshall's variant of the Nagle send check. */ |
1377 | static inline int tcp_minshall_check(const struct tcp_sock *tp) | 1394 | static inline bool tcp_minshall_check(const struct tcp_sock *tp) |
1378 | { | 1395 | { |
1379 | return after(tp->snd_sml, tp->snd_una) && | 1396 | return after(tp->snd_sml, tp->snd_una) && |
1380 | !after(tp->snd_sml, tp->snd_nxt); | 1397 | !after(tp->snd_sml, tp->snd_nxt); |
1381 | } | 1398 | } |
1382 | 1399 | ||
1383 | /* Return 0, if packet can be sent now without violation Nagle's rules: | 1400 | /* Return false, if packet can be sent now without violation Nagle's rules: |
1384 | * 1. It is full sized. | 1401 | * 1. It is full sized. |
1385 | * 2. Or it contains FIN. (already checked by caller) | 1402 | * 2. Or it contains FIN. (already checked by caller) |
1386 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. | 1403 | * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. |
1387 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. | 1404 | * 4. Or TCP_CORK is not set, and all sent packets are ACKed. |
1388 | * With Minshall's modification: all sent small packets are ACKed. | 1405 | * With Minshall's modification: all sent small packets are ACKed. |
1389 | */ | 1406 | */ |
1390 | static inline int tcp_nagle_check(const struct tcp_sock *tp, | 1407 | static inline bool tcp_nagle_check(const struct tcp_sock *tp, |
1391 | const struct sk_buff *skb, | 1408 | const struct sk_buff *skb, |
1392 | unsigned mss_now, int nonagle) | 1409 | unsigned int mss_now, int nonagle) |
1393 | { | 1410 | { |
1394 | return skb->len < mss_now && | 1411 | return skb->len < mss_now && |
1395 | ((nonagle & TCP_NAGLE_CORK) || | 1412 | ((nonagle & TCP_NAGLE_CORK) || |
1396 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | 1413 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); |
1397 | } | 1414 | } |
1398 | 1415 | ||
1399 | /* Return non-zero if the Nagle test allows this packet to be | 1416 | /* Return true if the Nagle test allows this packet to be |
1400 | * sent now. | 1417 | * sent now. |
1401 | */ | 1418 | */ |
1402 | static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, | 1419 | static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, |
1403 | unsigned int cur_mss, int nonagle) | 1420 | unsigned int cur_mss, int nonagle) |
1404 | { | 1421 | { |
1405 | /* Nagle rule does not apply to frames, which sit in the middle of the | 1422 | /* Nagle rule does not apply to frames, which sit in the middle of the |
1406 | * write_queue (they have no chances to get new data). | 1423 | * write_queue (they have no chances to get new data). |
@@ -1409,24 +1426,25 @@ static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff | |||
1409 | * argument based upon the location of SKB in the send queue. | 1426 | * argument based upon the location of SKB in the send queue. |
1410 | */ | 1427 | */ |
1411 | if (nonagle & TCP_NAGLE_PUSH) | 1428 | if (nonagle & TCP_NAGLE_PUSH) |
1412 | return 1; | 1429 | return true; |
1413 | 1430 | ||
1414 | /* Don't use the nagle rule for urgent data (or for the final FIN). | 1431 | /* Don't use the nagle rule for urgent data (or for the final FIN). |
1415 | * Nagle can be ignored during F-RTO too (see RFC4138). | 1432 | * Nagle can be ignored during F-RTO too (see RFC4138). |
1416 | */ | 1433 | */ |
1417 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || | 1434 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || |
1418 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) | 1435 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
1419 | return 1; | 1436 | return true; |
1420 | 1437 | ||
1421 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1438 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) |
1422 | return 1; | 1439 | return true; |
1423 | 1440 | ||
1424 | return 0; | 1441 | return false; |
1425 | } | 1442 | } |
1426 | 1443 | ||
1427 | /* Does at least the first segment of SKB fit into the send window? */ | 1444 | /* Does at least the first segment of SKB fit into the send window? */ |
1428 | static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, | 1445 | static bool tcp_snd_wnd_test(const struct tcp_sock *tp, |
1429 | unsigned int cur_mss) | 1446 | const struct sk_buff *skb, |
1447 | unsigned int cur_mss) | ||
1430 | { | 1448 | { |
1431 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; | 1449 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; |
1432 | 1450 | ||
@@ -1459,7 +1477,7 @@ static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, | |||
1459 | } | 1477 | } |
1460 | 1478 | ||
1461 | /* Test if sending is allowed right now. */ | 1479 | /* Test if sending is allowed right now. */ |
1462 | int tcp_may_send_now(struct sock *sk) | 1480 | bool tcp_may_send_now(struct sock *sk) |
1463 | { | 1481 | { |
1464 | const struct tcp_sock *tp = tcp_sk(sk); | 1482 | const struct tcp_sock *tp = tcp_sk(sk); |
1465 | struct sk_buff *skb = tcp_send_head(sk); | 1483 | struct sk_buff *skb = tcp_send_head(sk); |
@@ -1529,7 +1547,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1529 | * | 1547 | * |
1530 | * This algorithm is from John Heffner. | 1548 | * This algorithm is from John Heffner. |
1531 | */ | 1549 | */ |
1532 | static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | 1550 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) |
1533 | { | 1551 | { |
1534 | struct tcp_sock *tp = tcp_sk(sk); | 1552 | struct tcp_sock *tp = tcp_sk(sk); |
1535 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1553 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1589,11 +1607,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1589 | /* Ok, it looks like it is advisable to defer. */ | 1607 | /* Ok, it looks like it is advisable to defer. */ |
1590 | tp->tso_deferred = 1 | (jiffies << 1); | 1608 | tp->tso_deferred = 1 | (jiffies << 1); |
1591 | 1609 | ||
1592 | return 1; | 1610 | return true; |
1593 | 1611 | ||
1594 | send_now: | 1612 | send_now: |
1595 | tp->tso_deferred = 0; | 1613 | tp->tso_deferred = 0; |
1596 | return 0; | 1614 | return false; |
1597 | } | 1615 | } |
1598 | 1616 | ||
1599 | /* Create a new MTU probe if we are ready. | 1617 | /* Create a new MTU probe if we are ready. |
@@ -1735,11 +1753,11 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1735 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into | 1753 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into |
1736 | * account rare use of URG, this is not a big flaw. | 1754 | * account rare use of URG, this is not a big flaw. |
1737 | * | 1755 | * |
1738 | * Returns 1, if no segments are in flight and we have queued segments, but | 1756 | * Returns true, if no segments are in flight and we have queued segments, |
1739 | * cannot send anything now because of SWS or another problem. | 1757 | * but cannot send anything now because of SWS or another problem. |
1740 | */ | 1758 | */ |
1741 | static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | 1759 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
1742 | int push_one, gfp_t gfp) | 1760 | int push_one, gfp_t gfp) |
1743 | { | 1761 | { |
1744 | struct tcp_sock *tp = tcp_sk(sk); | 1762 | struct tcp_sock *tp = tcp_sk(sk); |
1745 | struct sk_buff *skb; | 1763 | struct sk_buff *skb; |
@@ -1753,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1753 | /* Do MTU probing. */ | 1771 | /* Do MTU probing. */ |
1754 | result = tcp_mtu_probe(sk); | 1772 | result = tcp_mtu_probe(sk); |
1755 | if (!result) { | 1773 | if (!result) { |
1756 | return 0; | 1774 | return false; |
1757 | } else if (result > 0) { | 1775 | } else if (result > 0) { |
1758 | sent_pkts = 1; | 1776 | sent_pkts = 1; |
1759 | } | 1777 | } |
@@ -1812,7 +1830,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1812 | 1830 | ||
1813 | if (likely(sent_pkts)) { | 1831 | if (likely(sent_pkts)) { |
1814 | tcp_cwnd_validate(sk); | 1832 | tcp_cwnd_validate(sk); |
1815 | return 0; | 1833 | return false; |
1816 | } | 1834 | } |
1817 | return !tp->packets_out && tcp_send_head(sk); | 1835 | return !tp->packets_out && tcp_send_head(sk); |
1818 | } | 1836 | } |
@@ -2011,22 +2029,22 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
2011 | } | 2029 | } |
2012 | 2030 | ||
2013 | /* Check if coalescing SKBs is legal. */ | 2031 | /* Check if coalescing SKBs is legal. */ |
2014 | static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) | 2032 | static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) |
2015 | { | 2033 | { |
2016 | if (tcp_skb_pcount(skb) > 1) | 2034 | if (tcp_skb_pcount(skb) > 1) |
2017 | return 0; | 2035 | return false; |
2018 | /* TODO: SACK collapsing could be used to remove this condition */ | 2036 | /* TODO: SACK collapsing could be used to remove this condition */ |
2019 | if (skb_shinfo(skb)->nr_frags != 0) | 2037 | if (skb_shinfo(skb)->nr_frags != 0) |
2020 | return 0; | 2038 | return false; |
2021 | if (skb_cloned(skb)) | 2039 | if (skb_cloned(skb)) |
2022 | return 0; | 2040 | return false; |
2023 | if (skb == tcp_send_head(sk)) | 2041 | if (skb == tcp_send_head(sk)) |
2024 | return 0; | 2042 | return false; |
2025 | /* Some heurestics for collapsing over SACK'd could be invented */ | 2043 | /* Some heurestics for collapsing over SACK'd could be invented */ |
2026 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | 2044 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) |
2027 | return 0; | 2045 | return false; |
2028 | 2046 | ||
2029 | return 1; | 2047 | return true; |
2030 | } | 2048 | } |
2031 | 2049 | ||
2032 | /* Collapse packets in the retransmit queue to make to create | 2050 | /* Collapse packets in the retransmit queue to make to create |
@@ -2037,7 +2055,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2037 | { | 2055 | { |
2038 | struct tcp_sock *tp = tcp_sk(sk); | 2056 | struct tcp_sock *tp = tcp_sk(sk); |
2039 | struct sk_buff *skb = to, *tmp; | 2057 | struct sk_buff *skb = to, *tmp; |
2040 | int first = 1; | 2058 | bool first = true; |
2041 | 2059 | ||
2042 | if (!sysctl_tcp_retrans_collapse) | 2060 | if (!sysctl_tcp_retrans_collapse) |
2043 | return; | 2061 | return; |
@@ -2051,7 +2069,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2051 | space -= skb->len; | 2069 | space -= skb->len; |
2052 | 2070 | ||
2053 | if (first) { | 2071 | if (first) { |
2054 | first = 0; | 2072 | first = false; |
2055 | continue; | 2073 | continue; |
2056 | } | 2074 | } |
2057 | 2075 | ||
@@ -2060,7 +2078,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2060 | /* Punt if not enough space exists in the first SKB for | 2078 | /* Punt if not enough space exists in the first SKB for |
2061 | * the data in the second | 2079 | * the data in the second |
2062 | */ | 2080 | */ |
2063 | if (skb->len > skb_tailroom(to)) | 2081 | if (skb->len > skb_availroom(to)) |
2064 | break; | 2082 | break; |
2065 | 2083 | ||
2066 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) | 2084 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) |
@@ -2166,8 +2184,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2166 | 2184 | ||
2167 | #if FASTRETRANS_DEBUG > 0 | 2185 | #if FASTRETRANS_DEBUG > 0 |
2168 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { | 2186 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { |
2169 | if (net_ratelimit()) | 2187 | net_dbg_ratelimited("retrans_out leaked\n"); |
2170 | printk(KERN_DEBUG "retrans_out leaked.\n"); | ||
2171 | } | 2188 | } |
2172 | #endif | 2189 | #endif |
2173 | if (!tp->retrans_out) | 2190 | if (!tp->retrans_out) |
@@ -2192,18 +2209,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2192 | /* Check if we forward retransmits are possible in the current | 2209 | /* Check if we forward retransmits are possible in the current |
2193 | * window/congestion state. | 2210 | * window/congestion state. |
2194 | */ | 2211 | */ |
2195 | static int tcp_can_forward_retransmit(struct sock *sk) | 2212 | static bool tcp_can_forward_retransmit(struct sock *sk) |
2196 | { | 2213 | { |
2197 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2214 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2198 | const struct tcp_sock *tp = tcp_sk(sk); | 2215 | const struct tcp_sock *tp = tcp_sk(sk); |
2199 | 2216 | ||
2200 | /* Forward retransmissions are possible only during Recovery. */ | 2217 | /* Forward retransmissions are possible only during Recovery. */ |
2201 | if (icsk->icsk_ca_state != TCP_CA_Recovery) | 2218 | if (icsk->icsk_ca_state != TCP_CA_Recovery) |
2202 | return 0; | 2219 | return false; |
2203 | 2220 | ||
2204 | /* No forward retransmissions in Reno are possible. */ | 2221 | /* No forward retransmissions in Reno are possible. */ |
2205 | if (tcp_is_reno(tp)) | 2222 | if (tcp_is_reno(tp)) |
2206 | return 0; | 2223 | return false; |
2207 | 2224 | ||
2208 | /* Yeah, we have to make difficult choice between forward transmission | 2225 | /* Yeah, we have to make difficult choice between forward transmission |
2209 | * and retransmission... Both ways have their merits... | 2226 | * and retransmission... Both ways have their merits... |
@@ -2214,9 +2231,9 @@ static int tcp_can_forward_retransmit(struct sock *sk) | |||
2214 | */ | 2231 | */ |
2215 | 2232 | ||
2216 | if (tcp_may_send_now(sk)) | 2233 | if (tcp_may_send_now(sk)) |
2217 | return 0; | 2234 | return false; |
2218 | 2235 | ||
2219 | return 1; | 2236 | return true; |
2220 | } | 2237 | } |
2221 | 2238 | ||
2222 | /* This gets called after a retransmit timeout, and the initially | 2239 | /* This gets called after a retransmit timeout, and the initially |
@@ -2401,7 +2418,7 @@ int tcp_send_synack(struct sock *sk) | |||
2401 | 2418 | ||
2402 | skb = tcp_write_queue_head(sk); | 2419 | skb = tcp_write_queue_head(sk); |
2403 | if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { | 2420 | if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { |
2404 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); | 2421 | pr_debug("%s: wrong queue state\n", __func__); |
2405 | return -EFAULT; | 2422 | return -EFAULT; |
2406 | } | 2423 | } |
2407 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { | 2424 | if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { |
@@ -2561,7 +2578,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2561 | EXPORT_SYMBOL(tcp_make_synack); | 2578 | EXPORT_SYMBOL(tcp_make_synack); |
2562 | 2579 | ||
2563 | /* Do all connect socket setups that can be done AF independent. */ | 2580 | /* Do all connect socket setups that can be done AF independent. */ |
2564 | static void tcp_connect_init(struct sock *sk) | 2581 | void tcp_connect_init(struct sock *sk) |
2565 | { | 2582 | { |
2566 | const struct dst_entry *dst = __sk_dst_get(sk); | 2583 | const struct dst_entry *dst = __sk_dst_get(sk); |
2567 | struct tcp_sock *tp = tcp_sk(sk); | 2584 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2616,9 +2633,12 @@ static void tcp_connect_init(struct sock *sk) | |||
2616 | tp->snd_una = tp->write_seq; | 2633 | tp->snd_una = tp->write_seq; |
2617 | tp->snd_sml = tp->write_seq; | 2634 | tp->snd_sml = tp->write_seq; |
2618 | tp->snd_up = tp->write_seq; | 2635 | tp->snd_up = tp->write_seq; |
2619 | tp->rcv_nxt = 0; | 2636 | tp->snd_nxt = tp->write_seq; |
2620 | tp->rcv_wup = 0; | 2637 | |
2621 | tp->copied_seq = 0; | 2638 | if (likely(!tp->repair)) |
2639 | tp->rcv_nxt = 0; | ||
2640 | tp->rcv_wup = tp->rcv_nxt; | ||
2641 | tp->copied_seq = tp->rcv_nxt; | ||
2622 | 2642 | ||
2623 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | 2643 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
2624 | inet_csk(sk)->icsk_retransmits = 0; | 2644 | inet_csk(sk)->icsk_retransmits = 0; |
@@ -2641,7 +2661,6 @@ int tcp_connect(struct sock *sk) | |||
2641 | /* Reserve space for headers. */ | 2661 | /* Reserve space for headers. */ |
2642 | skb_reserve(buff, MAX_TCP_HEADER); | 2662 | skb_reserve(buff, MAX_TCP_HEADER); |
2643 | 2663 | ||
2644 | tp->snd_nxt = tp->write_seq; | ||
2645 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); | 2664 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
2646 | TCP_ECN_send_syn(sk, buff); | 2665 | TCP_ECN_send_syn(sk, buff); |
2647 | 2666 | ||
@@ -2790,6 +2809,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
2790 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2809 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2791 | } | 2810 | } |
2792 | 2811 | ||
2812 | void tcp_send_window_probe(struct sock *sk) | ||
2813 | { | ||
2814 | if (sk->sk_state == TCP_ESTABLISHED) { | ||
2815 | tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; | ||
2816 | tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; | ||
2817 | tcp_xmit_probe_skb(sk, 0); | ||
2818 | } | ||
2819 | } | ||
2820 | |||
2793 | /* Initiate keepalive or window probe from timer. */ | 2821 | /* Initiate keepalive or window probe from timer. */ |
2794 | int tcp_write_wakeup(struct sock *sk) | 2822 | int tcp_write_wakeup(struct sock *sk) |
2795 | { | 2823 | { |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index a981cdc0a6e9..4526fe68e60e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void) | |||
91 | * Note: arguments must match tcp_rcv_established()! | 91 | * Note: arguments must match tcp_rcv_established()! |
92 | */ | 92 | */ |
93 | static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | 93 | static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, |
94 | struct tcphdr *th, unsigned len) | 94 | struct tcphdr *th, unsigned int len) |
95 | { | 95 | { |
96 | const struct tcp_sock *tp = tcp_sk(sk); | 96 | const struct tcp_sock *tp = tcp_sk(sk); |
97 | const struct inet_sock *inet = inet_sk(sk); | 97 | const struct inet_sock *inet = inet_sk(sk); |
@@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = { | |||
138 | .entry = jtcp_rcv_established, | 138 | .entry = jtcp_rcv_established, |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static int tcpprobe_open(struct inode * inode, struct file * file) | 141 | static int tcpprobe_open(struct inode *inode, struct file *file) |
142 | { | 142 | { |
143 | /* Reset (empty) log */ | 143 | /* Reset (empty) log */ |
144 | spin_lock_bh(&tcp_probe.lock); | 144 | spin_lock_bh(&tcp_probe.lock); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 34d4a02c2f16..e911e6c523ec 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk) | |||
319 | struct tcp_sock *tp = tcp_sk(sk); | 319 | struct tcp_sock *tp = tcp_sk(sk); |
320 | struct inet_connection_sock *icsk = inet_csk(sk); | 320 | struct inet_connection_sock *icsk = inet_csk(sk); |
321 | 321 | ||
322 | if (tp->early_retrans_delayed) { | ||
323 | tcp_resume_early_retransmit(sk); | ||
324 | return; | ||
325 | } | ||
326 | |||
322 | if (!tp->packets_out) | 327 | if (!tp->packets_out) |
323 | goto out; | 328 | goto out; |
324 | 329 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fe141052a1be..609397ee78fb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -107,6 +107,7 @@ | |||
107 | #include <net/checksum.h> | 107 | #include <net/checksum.h> |
108 | #include <net/xfrm.h> | 108 | #include <net/xfrm.h> |
109 | #include <trace/events/udp.h> | 109 | #include <trace/events/udp.h> |
110 | #include <linux/static_key.h> | ||
110 | #include "udp_impl.h" | 111 | #include "udp_impl.h" |
111 | 112 | ||
112 | struct udp_table udp_table __read_mostly; | 113 | struct udp_table udp_table __read_mostly; |
@@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
206 | 207 | ||
207 | if (!snum) { | 208 | if (!snum) { |
208 | int low, high, remaining; | 209 | int low, high, remaining; |
209 | unsigned rand; | 210 | unsigned int rand; |
210 | unsigned short first, last; | 211 | unsigned short first, last; |
211 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); | 212 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); |
212 | 213 | ||
@@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
846 | * Get and verify the address. | 847 | * Get and verify the address. |
847 | */ | 848 | */ |
848 | if (msg->msg_name) { | 849 | if (msg->msg_name) { |
849 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; | 850 | struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; |
850 | if (msg->msg_namelen < sizeof(*usin)) | 851 | if (msg->msg_namelen < sizeof(*usin)) |
851 | return -EINVAL; | 852 | return -EINVAL; |
852 | if (usin->sin_family != AF_INET) { | 853 | if (usin->sin_family != AF_INET) { |
@@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1379 | 1380 | ||
1380 | } | 1381 | } |
1381 | 1382 | ||
1383 | static struct static_key udp_encap_needed __read_mostly; | ||
1384 | void udp_encap_enable(void) | ||
1385 | { | ||
1386 | if (!static_key_enabled(&udp_encap_needed)) | ||
1387 | static_key_slow_inc(&udp_encap_needed); | ||
1388 | } | ||
1389 | EXPORT_SYMBOL(udp_encap_enable); | ||
1390 | |||
1382 | /* returns: | 1391 | /* returns: |
1383 | * -1: error | 1392 | * -1: error |
1384 | * 0: success | 1393 | * 0: success |
@@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1400 | goto drop; | 1409 | goto drop; |
1401 | nf_reset(skb); | 1410 | nf_reset(skb); |
1402 | 1411 | ||
1403 | if (up->encap_type) { | 1412 | if (static_key_false(&udp_encap_needed) && up->encap_type) { |
1404 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); | 1413 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); |
1405 | 1414 | ||
1406 | /* | 1415 | /* |
@@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1470 | goto drop; | 1479 | goto drop; |
1471 | 1480 | ||
1472 | 1481 | ||
1473 | if (sk_rcvqueues_full(sk, skb)) | 1482 | if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) |
1474 | goto drop; | 1483 | goto drop; |
1475 | 1484 | ||
1476 | rc = 0; | 1485 | rc = 0; |
@@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1479 | bh_lock_sock(sk); | 1488 | bh_lock_sock(sk); |
1480 | if (!sock_owned_by_user(sk)) | 1489 | if (!sock_owned_by_user(sk)) |
1481 | rc = __udp_queue_rcv_skb(sk, skb); | 1490 | rc = __udp_queue_rcv_skb(sk, skb); |
1482 | else if (sk_add_backlog(sk, skb)) { | 1491 | else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { |
1483 | bh_unlock_sock(sk); | 1492 | bh_unlock_sock(sk); |
1484 | goto drop; | 1493 | goto drop; |
1485 | } | 1494 | } |
@@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1760 | /* FALLTHROUGH */ | 1769 | /* FALLTHROUGH */ |
1761 | case UDP_ENCAP_L2TPINUDP: | 1770 | case UDP_ENCAP_L2TPINUDP: |
1762 | up->encap_type = val; | 1771 | up->encap_type = val; |
1772 | udp_encap_enable(); | ||
1763 | break; | 1773 | break; |
1764 | default: | 1774 | default: |
1765 | err = -ENOPROTOOPT; | 1775 | err = -ENOPROTOOPT; |
@@ -2163,9 +2173,15 @@ void udp4_proc_exit(void) | |||
2163 | static __initdata unsigned long uhash_entries; | 2173 | static __initdata unsigned long uhash_entries; |
2164 | static int __init set_uhash_entries(char *str) | 2174 | static int __init set_uhash_entries(char *str) |
2165 | { | 2175 | { |
2176 | ssize_t ret; | ||
2177 | |||
2166 | if (!str) | 2178 | if (!str) |
2167 | return 0; | 2179 | return 0; |
2168 | uhash_entries = simple_strtoul(str, &str, 0); | 2180 | |
2181 | ret = kstrtoul(str, 0, &uhash_entries); | ||
2182 | if (ret) | ||
2183 | return 0; | ||
2184 | |||
2169 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) | 2185 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) |
2170 | uhash_entries = UDP_HTABLE_SIZE_MIN; | 2186 | uhash_entries = UDP_HTABLE_SIZE_MIN; |
2171 | return 1; | 2187 | return 1; |
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 8a949f19deb6..a7f86a3cd502 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c | |||
@@ -146,9 +146,17 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, | |||
146 | return udp_dump_one(&udp_table, in_skb, nlh, req); | 146 | return udp_dump_one(&udp_table, in_skb, nlh, req); |
147 | } | 147 | } |
148 | 148 | ||
149 | static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | ||
150 | void *info) | ||
151 | { | ||
152 | r->idiag_rqueue = sk_rmem_alloc_get(sk); | ||
153 | r->idiag_wqueue = sk_wmem_alloc_get(sk); | ||
154 | } | ||
155 | |||
149 | static const struct inet_diag_handler udp_diag_handler = { | 156 | static const struct inet_diag_handler udp_diag_handler = { |
150 | .dump = udp_diag_dump, | 157 | .dump = udp_diag_dump, |
151 | .dump_one = udp_diag_dump_one, | 158 | .dump_one = udp_diag_dump_one, |
159 | .idiag_get_info = udp_diag_get_info, | ||
152 | .idiag_type = IPPROTO_UDP, | 160 | .idiag_type = IPPROTO_UDP, |
153 | }; | 161 | }; |
154 | 162 | ||
@@ -167,6 +175,7 @@ static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr * | |||
167 | static const struct inet_diag_handler udplite_diag_handler = { | 175 | static const struct inet_diag_handler udplite_diag_handler = { |
168 | .dump = udplite_diag_dump, | 176 | .dump = udplite_diag_dump, |
169 | .dump_one = udplite_diag_dump_one, | 177 | .dump_one = udplite_diag_dump_one, |
178 | .idiag_get_info = udp_diag_get_info, | ||
170 | .idiag_type = IPPROTO_UDPLITE, | 179 | .idiag_type = IPPROTO_UDPLITE, |
171 | }; | 180 | }; |
172 | 181 | ||
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index aaad650d47d9..5a681e298b90 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h | |||
@@ -25,7 +25,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
25 | size_t len, int noblock, int flags, int *addr_len); | 25 | size_t len, int noblock, int flags, int *addr_len); |
26 | extern int udp_sendpage(struct sock *sk, struct page *page, int offset, | 26 | extern int udp_sendpage(struct sock *sk, struct page *page, int offset, |
27 | size_t size, int flags); | 27 | size_t size, int flags); |
28 | extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); | 28 | extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); |
29 | extern void udp_destroy_sock(struct sock *sk); | 29 | extern void udp_destroy_sock(struct sock *sk); |
30 | 30 | ||
31 | #ifdef CONFIG_PROC_FS | 31 | #ifdef CONFIG_PROC_FS |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da8d43..0d3426cb5c4f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
152 | 152 | ||
153 | case IPPROTO_AH: | 153 | case IPPROTO_AH: |
154 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | 154 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { |
155 | __be32 *ah_hdr = (__be32*)xprth; | 155 | __be32 *ah_hdr = (__be32 *)xprth; |
156 | 156 | ||
157 | fl4->fl4_ipsec_spi = ah_hdr[1]; | 157 | fl4->fl4_ipsec_spi = ah_hdr[1]; |
158 | } | 158 | } |
@@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size) | |||
298 | xfrm4_state_init(); | 298 | xfrm4_state_init(); |
299 | xfrm4_policy_init(); | 299 | xfrm4_policy_init(); |
300 | #ifdef CONFIG_SYSCTL | 300 | #ifdef CONFIG_SYSCTL |
301 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | 301 | sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4", |
302 | xfrm4_policy_table); | 302 | xfrm4_policy_table); |
303 | #endif | 303 | #endif |
304 | } | 304 | } |
305 | 305 | ||