diff options
Diffstat (limited to 'net/ipv4')
68 files changed, 1650 insertions, 850 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index cbb505ba9324..d183262943d9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
| @@ -163,8 +163,6 @@ config IP_PNP_RARP | |||
| 163 | operating on your network. Read | 163 | operating on your network. Read |
| 164 | <file:Documentation/filesystems/nfs/nfsroot.txt> for details. | 164 | <file:Documentation/filesystems/nfs/nfsroot.txt> for details. |
| 165 | 165 | ||
| 166 | # not yet ready.. | ||
| 167 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP | ||
| 168 | config NET_IPIP | 166 | config NET_IPIP |
| 169 | tristate "IP: tunneling" | 167 | tristate "IP: tunneling" |
| 170 | select INET_TUNNEL | 168 | select INET_TUNNEL |
| @@ -409,6 +407,14 @@ config INET_TCP_DIAG | |||
| 409 | depends on INET_DIAG | 407 | depends on INET_DIAG |
| 410 | def_tristate INET_DIAG | 408 | def_tristate INET_DIAG |
| 411 | 409 | ||
| 410 | config INET_UDP_DIAG | ||
| 411 | tristate "UDP: socket monitoring interface" | ||
| 412 | depends on INET_DIAG && (IPV6 || IPV6=n) | ||
| 413 | default n | ||
| 414 | ---help--- | ||
| 415 | Support for UDP socket monitoring interface used by the ss tool. | ||
| 416 | If unsure, say Y. | ||
| 417 | |||
| 412 | menuconfig TCP_CONG_ADVANCED | 418 | menuconfig TCP_CONG_ADVANCED |
| 413 | bool "TCP: advanced congestion control" | 419 | bool "TCP: advanced congestion control" |
| 414 | ---help--- | 420 | ---help--- |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f2dc69cffb57..ff75d3bbcd6a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
| @@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o | |||
| 34 | obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ | 34 | obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ |
| 35 | obj-$(CONFIG_INET_DIAG) += inet_diag.o | 35 | obj-$(CONFIG_INET_DIAG) += inet_diag.o |
| 36 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o | 36 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o |
| 37 | obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o | ||
| 37 | obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o | 38 | obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o |
| 38 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | 39 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o |
| 39 | obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o | 40 | obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o |
| @@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
| 47 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 48 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
| 48 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 49 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
| 49 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 50 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
| 51 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o | ||
| 50 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 52 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
| 51 | 53 | ||
| 52 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 54 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b5096a9875a..f7b5670744f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -1250,7 +1250,8 @@ out: | |||
| 1250 | return err; | 1250 | return err; |
| 1251 | } | 1251 | } |
| 1252 | 1252 | ||
| 1253 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) | 1253 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, |
| 1254 | netdev_features_t features) | ||
| 1254 | { | 1255 | { |
| 1255 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1256 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 1256 | struct iphdr *iph; | 1257 | struct iphdr *iph; |
| @@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net) | |||
| 1572 | sizeof(struct icmp_mib), | 1573 | sizeof(struct icmp_mib), |
| 1573 | __alignof__(struct icmp_mib)) < 0) | 1574 | __alignof__(struct icmp_mib)) < 0) |
| 1574 | goto err_icmp_mib; | 1575 | goto err_icmp_mib; |
| 1575 | if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, | 1576 | net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), |
| 1576 | sizeof(struct icmpmsg_mib), | 1577 | GFP_KERNEL); |
| 1577 | __alignof__(struct icmpmsg_mib)) < 0) | 1578 | if (!net->mib.icmpmsg_statistics) |
| 1578 | goto err_icmpmsg_mib; | 1579 | goto err_icmpmsg_mib; |
| 1579 | 1580 | ||
| 1580 | tcp_mib_init(net); | 1581 | tcp_mib_init(net); |
| @@ -1598,7 +1599,7 @@ err_tcp_mib: | |||
| 1598 | 1599 | ||
| 1599 | static __net_exit void ipv4_mib_exit_net(struct net *net) | 1600 | static __net_exit void ipv4_mib_exit_net(struct net *net) |
| 1600 | { | 1601 | { |
| 1601 | snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); | 1602 | kfree(net->mib.icmpmsg_statistics); |
| 1602 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); | 1603 | snmp_mib_free((void __percpu **)net->mib.icmp_statistics); |
| 1603 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); | 1604 | snmp_mib_free((void __percpu **)net->mib.udplite_statistics); |
| 1604 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); | 1605 | snmp_mib_free((void __percpu **)net->mib.udp_statistics); |
| @@ -1671,6 +1672,8 @@ static int __init inet_init(void) | |||
| 1671 | ip_static_sysctl_init(); | 1672 | ip_static_sysctl_init(); |
| 1672 | #endif | 1673 | #endif |
| 1673 | 1674 | ||
| 1675 | tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; | ||
| 1676 | |||
| 1674 | /* | 1677 | /* |
| 1675 | * Add all the base protocols. | 1678 | * Add all the base protocols. |
| 1676 | */ | 1679 | */ |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 96a164aa1367..63e49890ad31 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
| @@ -112,11 +112,6 @@ | |||
| 112 | #include <net/arp.h> | 112 | #include <net/arp.h> |
| 113 | #include <net/ax25.h> | 113 | #include <net/ax25.h> |
| 114 | #include <net/netrom.h> | 114 | #include <net/netrom.h> |
| 115 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
| 116 | #include <net/atmclip.h> | ||
| 117 | struct neigh_table *clip_tbl_hook; | ||
| 118 | EXPORT_SYMBOL(clip_tbl_hook); | ||
| 119 | #endif | ||
| 120 | 115 | ||
| 121 | #include <asm/system.h> | 116 | #include <asm/system.h> |
| 122 | #include <linux/uaccess.h> | 117 | #include <linux/uaccess.h> |
| @@ -126,7 +121,7 @@ EXPORT_SYMBOL(clip_tbl_hook); | |||
| 126 | /* | 121 | /* |
| 127 | * Interface to generic neighbour cache. | 122 | * Interface to generic neighbour cache. |
| 128 | */ | 123 | */ |
| 129 | static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd); | 124 | static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); |
| 130 | static int arp_constructor(struct neighbour *neigh); | 125 | static int arp_constructor(struct neighbour *neigh); |
| 131 | static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); | 126 | static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); |
| 132 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); | 127 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); |
| @@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = { | |||
| 164 | 159 | ||
| 165 | struct neigh_table arp_tbl = { | 160 | struct neigh_table arp_tbl = { |
| 166 | .family = AF_INET, | 161 | .family = AF_INET, |
| 167 | .entry_size = sizeof(struct neighbour) + 4, | ||
| 168 | .key_len = 4, | 162 | .key_len = 4, |
| 169 | .hash = arp_hash, | 163 | .hash = arp_hash, |
| 170 | .constructor = arp_constructor, | 164 | .constructor = arp_constructor, |
| @@ -177,7 +171,7 @@ struct neigh_table arp_tbl = { | |||
| 177 | .gc_staletime = 60 * HZ, | 171 | .gc_staletime = 60 * HZ, |
| 178 | .reachable_time = 30 * HZ, | 172 | .reachable_time = 30 * HZ, |
| 179 | .delay_probe_time = 5 * HZ, | 173 | .delay_probe_time = 5 * HZ, |
| 180 | .queue_len = 3, | 174 | .queue_len_bytes = 64*1024, |
| 181 | .ucast_probes = 3, | 175 | .ucast_probes = 3, |
| 182 | .mcast_probes = 3, | 176 | .mcast_probes = 3, |
| 183 | .anycast_delay = 1 * HZ, | 177 | .anycast_delay = 1 * HZ, |
| @@ -221,9 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) | |||
| 221 | 215 | ||
| 222 | static u32 arp_hash(const void *pkey, | 216 | static u32 arp_hash(const void *pkey, |
| 223 | const struct net_device *dev, | 217 | const struct net_device *dev, |
| 224 | __u32 hash_rnd) | 218 | __u32 *hash_rnd) |
| 225 | { | 219 | { |
| 226 | return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); | 220 | return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd); |
| 227 | } | 221 | } |
| 228 | 222 | ||
| 229 | static int arp_constructor(struct neighbour *neigh) | 223 | static int arp_constructor(struct neighbour *neigh) |
| @@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh) | |||
| 283 | default: | 277 | default: |
| 284 | break; | 278 | break; |
| 285 | case ARPHRD_ROSE: | 279 | case ARPHRD_ROSE: |
| 286 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 280 | #if IS_ENABLED(CONFIG_AX25) |
| 287 | case ARPHRD_AX25: | 281 | case ARPHRD_AX25: |
| 288 | #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) | 282 | #if IS_ENABLED(CONFIG_NETROM) |
| 289 | case ARPHRD_NETROM: | 283 | case ARPHRD_NETROM: |
| 290 | #endif | 284 | #endif |
| 291 | neigh->ops = &arp_broken_ops; | 285 | neigh->ops = &arp_broken_ops; |
| @@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
| 592 | struct sk_buff *skb; | 586 | struct sk_buff *skb; |
| 593 | struct arphdr *arp; | 587 | struct arphdr *arp; |
| 594 | unsigned char *arp_ptr; | 588 | unsigned char *arp_ptr; |
| 589 | int hlen = LL_RESERVED_SPACE(dev); | ||
| 590 | int tlen = dev->needed_tailroom; | ||
| 595 | 591 | ||
| 596 | /* | 592 | /* |
| 597 | * Allocate a buffer | 593 | * Allocate a buffer |
| 598 | */ | 594 | */ |
| 599 | 595 | ||
| 600 | skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); | 596 | skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); |
| 601 | if (skb == NULL) | 597 | if (skb == NULL) |
| 602 | return NULL; | 598 | return NULL; |
| 603 | 599 | ||
| 604 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 600 | skb_reserve(skb, hlen); |
| 605 | skb_reset_network_header(skb); | 601 | skb_reset_network_header(skb); |
| 606 | arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); | 602 | arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); |
| 607 | skb->dev = dev; | 603 | skb->dev = dev; |
| @@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
| 633 | arp->ar_pro = htons(ETH_P_IP); | 629 | arp->ar_pro = htons(ETH_P_IP); |
| 634 | break; | 630 | break; |
| 635 | 631 | ||
| 636 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 632 | #if IS_ENABLED(CONFIG_AX25) |
| 637 | case ARPHRD_AX25: | 633 | case ARPHRD_AX25: |
| 638 | arp->ar_hrd = htons(ARPHRD_AX25); | 634 | arp->ar_hrd = htons(ARPHRD_AX25); |
| 639 | arp->ar_pro = htons(AX25_P_IP); | 635 | arp->ar_pro = htons(AX25_P_IP); |
| 640 | break; | 636 | break; |
| 641 | 637 | ||
| 642 | #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) | 638 | #if IS_ENABLED(CONFIG_NETROM) |
| 643 | case ARPHRD_NETROM: | 639 | case ARPHRD_NETROM: |
| 644 | arp->ar_hrd = htons(ARPHRD_NETROM); | 640 | arp->ar_hrd = htons(ARPHRD_NETROM); |
| 645 | arp->ar_pro = htons(AX25_P_IP); | 641 | arp->ar_pro = htons(AX25_P_IP); |
| @@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
| 647 | #endif | 643 | #endif |
| 648 | #endif | 644 | #endif |
| 649 | 645 | ||
| 650 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) | 646 | #if IS_ENABLED(CONFIG_FDDI) |
| 651 | case ARPHRD_FDDI: | 647 | case ARPHRD_FDDI: |
| 652 | arp->ar_hrd = htons(ARPHRD_ETHER); | 648 | arp->ar_hrd = htons(ARPHRD_ETHER); |
| 653 | arp->ar_pro = htons(ETH_P_IP); | 649 | arp->ar_pro = htons(ETH_P_IP); |
| 654 | break; | 650 | break; |
| 655 | #endif | 651 | #endif |
| 656 | #if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) | 652 | #if IS_ENABLED(CONFIG_TR) |
| 657 | case ARPHRD_IEEE802_TR: | 653 | case ARPHRD_IEEE802_TR: |
| 658 | arp->ar_hrd = htons(ARPHRD_IEEE802); | 654 | arp->ar_hrd = htons(ARPHRD_IEEE802); |
| 659 | arp->ar_pro = htons(ETH_P_IP); | 655 | arp->ar_pro = htons(ETH_P_IP); |
| @@ -867,7 +863,8 @@ static int arp_process(struct sk_buff *skb) | |||
| 867 | if (addr_type == RTN_UNICAST && | 863 | if (addr_type == RTN_UNICAST && |
| 868 | (arp_fwd_proxy(in_dev, dev, rt) || | 864 | (arp_fwd_proxy(in_dev, dev, rt) || |
| 869 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || | 865 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || |
| 870 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { | 866 | (rt->dst.dev != dev && |
| 867 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { | ||
| 871 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 868 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
| 872 | if (n) | 869 | if (n) |
| 873 | neigh_release(n); | 870 | neigh_release(n); |
| @@ -1040,7 +1037,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
| 1040 | return -EINVAL; | 1037 | return -EINVAL; |
| 1041 | } | 1038 | } |
| 1042 | switch (dev->type) { | 1039 | switch (dev->type) { |
| 1043 | #if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) | 1040 | #if IS_ENABLED(CONFIG_FDDI) |
| 1044 | case ARPHRD_FDDI: | 1041 | case ARPHRD_FDDI: |
| 1045 | /* | 1042 | /* |
| 1046 | * According to RFC 1390, FDDI devices should accept ARP | 1043 | * According to RFC 1390, FDDI devices should accept ARP |
| @@ -1286,7 +1283,7 @@ void __init arp_init(void) | |||
| 1286 | } | 1283 | } |
| 1287 | 1284 | ||
| 1288 | #ifdef CONFIG_PROC_FS | 1285 | #ifdef CONFIG_PROC_FS |
| 1289 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 1286 | #if IS_ENABLED(CONFIG_AX25) |
| 1290 | 1287 | ||
| 1291 | /* ------------------------------------------------------------------------ */ | 1288 | /* ------------------------------------------------------------------------ */ |
| 1292 | /* | 1289 | /* |
| @@ -1334,7 +1331,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, | |||
| 1334 | 1331 | ||
| 1335 | read_lock(&n->lock); | 1332 | read_lock(&n->lock); |
| 1336 | /* Convert hardware address to XX:XX:XX:XX ... form. */ | 1333 | /* Convert hardware address to XX:XX:XX:XX ... form. */ |
| 1337 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 1334 | #if IS_ENABLED(CONFIG_AX25) |
| 1338 | if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) | 1335 | if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) |
| 1339 | ax2asc2((ax25_address *)n->ha, hbuffer); | 1336 | ax2asc2((ax25_address *)n->ha, hbuffer); |
| 1340 | else { | 1337 | else { |
| @@ -1347,7 +1344,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, | |||
| 1347 | if (k != 0) | 1344 | if (k != 0) |
| 1348 | --k; | 1345 | --k; |
| 1349 | hbuffer[k] = 0; | 1346 | hbuffer[k] = 0; |
| 1350 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 1347 | #if IS_ENABLED(CONFIG_AX25) |
| 1351 | } | 1348 | } |
| 1352 | #endif | 1349 | #endif |
| 1353 | sprintf(tbuf, "%pI4", n->primary_key); | 1350 | sprintf(tbuf, "%pI4", n->primary_key); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 65f01dc47565..e41c40f48cfe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
| @@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
| 258 | ip_mc_up(in_dev); | 258 | ip_mc_up(in_dev); |
| 259 | 259 | ||
| 260 | /* we can receive as soon as ip_ptr is set -- do this last */ | 260 | /* we can receive as soon as ip_ptr is set -- do this last */ |
| 261 | RCU_INIT_POINTER(dev->ip_ptr, in_dev); | 261 | rcu_assign_pointer(dev->ip_ptr, in_dev); |
| 262 | out: | 262 | out: |
| 263 | return in_dev; | 263 | return in_dev; |
| 264 | out_kfree: | 264 | out_kfree: |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 46339ba7a2d3..799fc790b3cf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
| @@ -67,6 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) | |||
| 67 | 67 | ||
| 68 | return err; | 68 | return err; |
| 69 | } | 69 | } |
| 70 | EXPORT_SYMBOL_GPL(fib_lookup); | ||
| 70 | 71 | ||
| 71 | static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, | 72 | static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, |
| 72 | int flags, struct fib_lookup_arg *arg) | 73 | int flags, struct fib_lookup_arg *arg) |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 37b671185c81..2b555a5521e0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -205,7 +205,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | |||
| 205 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | 205 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | /* Same as RCU_INIT_POINTER | 208 | /* Same as rcu_assign_pointer |
| 209 | * but that macro() assumes that value is a pointer. | 209 | * but that macro() assumes that value is a pointer. |
| 210 | */ | 210 | */ |
| 211 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | 211 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
| @@ -529,7 +529,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * | |||
| 529 | if (n) | 529 | if (n) |
| 530 | node_set_parent(n, tn); | 530 | node_set_parent(n, tn); |
| 531 | 531 | ||
| 532 | RCU_INIT_POINTER(tn->child[i], n); | 532 | rcu_assign_pointer(tn->child[i], n); |
| 533 | } | 533 | } |
| 534 | 534 | ||
| 535 | #define MAX_WORK 10 | 535 | #define MAX_WORK 10 |
| @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
| 1015 | 1015 | ||
| 1016 | tp = node_parent((struct rt_trie_node *) tn); | 1016 | tp = node_parent((struct rt_trie_node *) tn); |
| 1017 | if (!tp) | 1017 | if (!tp) |
| 1018 | RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); | 1018 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1019 | 1019 | ||
| 1020 | tnode_free_flush(); | 1020 | tnode_free_flush(); |
| 1021 | if (!tp) | 1021 | if (!tp) |
| @@ -1027,7 +1027,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
| 1027 | if (IS_TNODE(tn)) | 1027 | if (IS_TNODE(tn)) |
| 1028 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1028 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
| 1029 | 1029 | ||
| 1030 | RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); | 1030 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1031 | tnode_free_flush(); | 1031 | tnode_free_flush(); |
| 1032 | } | 1032 | } |
| 1033 | 1033 | ||
| @@ -1164,7 +1164,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1164 | put_child(t, (struct tnode *)tp, cindex, | 1164 | put_child(t, (struct tnode *)tp, cindex, |
| 1165 | (struct rt_trie_node *)tn); | 1165 | (struct rt_trie_node *)tn); |
| 1166 | } else { | 1166 | } else { |
| 1167 | RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); | 1167 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1168 | tp = tn; | 1168 | tp = tn; |
| 1169 | } | 1169 | } |
| 1170 | } | 1170 | } |
| @@ -1607,6 +1607,7 @@ found: | |||
| 1607 | rcu_read_unlock(); | 1607 | rcu_read_unlock(); |
| 1608 | return ret; | 1608 | return ret; |
| 1609 | } | 1609 | } |
| 1610 | EXPORT_SYMBOL_GPL(fib_table_lookup); | ||
| 1610 | 1611 | ||
| 1611 | /* | 1612 | /* |
| 1612 | * Remove the leaf and return parent. | 1613 | * Remove the leaf and return parent. |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b2ca095cb9da..450e5d21ed2a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
| @@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 304 | struct igmpv3_report *pig; | 304 | struct igmpv3_report *pig; |
| 305 | struct net *net = dev_net(dev); | 305 | struct net *net = dev_net(dev); |
| 306 | struct flowi4 fl4; | 306 | struct flowi4 fl4; |
| 307 | int hlen = LL_RESERVED_SPACE(dev); | ||
| 308 | int tlen = dev->needed_tailroom; | ||
| 307 | 309 | ||
| 308 | while (1) { | 310 | while (1) { |
| 309 | skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), | 311 | skb = alloc_skb(size + hlen + tlen, |
| 310 | GFP_ATOMIC | __GFP_NOWARN); | 312 | GFP_ATOMIC | __GFP_NOWARN); |
| 311 | if (skb) | 313 | if (skb) |
| 312 | break; | 314 | break; |
| @@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 327 | skb_dst_set(skb, &rt->dst); | 329 | skb_dst_set(skb, &rt->dst); |
| 328 | skb->dev = dev; | 330 | skb->dev = dev; |
| 329 | 331 | ||
| 330 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 332 | skb_reserve(skb, hlen); |
| 331 | 333 | ||
| 332 | skb_reset_network_header(skb); | 334 | skb_reset_network_header(skb); |
| 333 | pip = ip_hdr(skb); | 335 | pip = ip_hdr(skb); |
| @@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 647 | __be32 group = pmc ? pmc->multiaddr : 0; | 649 | __be32 group = pmc ? pmc->multiaddr : 0; |
| 648 | struct flowi4 fl4; | 650 | struct flowi4 fl4; |
| 649 | __be32 dst; | 651 | __be32 dst; |
| 652 | int hlen, tlen; | ||
| 650 | 653 | ||
| 651 | if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) | 654 | if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) |
| 652 | return igmpv3_send_report(in_dev, pmc); | 655 | return igmpv3_send_report(in_dev, pmc); |
| @@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 661 | if (IS_ERR(rt)) | 664 | if (IS_ERR(rt)) |
| 662 | return -1; | 665 | return -1; |
| 663 | 666 | ||
| 664 | skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); | 667 | hlen = LL_RESERVED_SPACE(dev); |
| 668 | tlen = dev->needed_tailroom; | ||
| 669 | skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); | ||
| 665 | if (skb == NULL) { | 670 | if (skb == NULL) { |
| 666 | ip_rt_put(rt); | 671 | ip_rt_put(rt); |
| 667 | return -1; | 672 | return -1; |
| @@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 669 | 674 | ||
| 670 | skb_dst_set(skb, &rt->dst); | 675 | skb_dst_set(skb, &rt->dst); |
| 671 | 676 | ||
| 672 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 677 | skb_reserve(skb, hlen); |
| 673 | 678 | ||
| 674 | skb_reset_network_header(skb); | 679 | skb_reset_network_header(skb); |
| 675 | iph = ip_hdr(skb); | 680 | iph = ip_hdr(skb); |
| @@ -875,6 +880,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
| 875 | * to be intended in a v3 query. | 880 | * to be intended in a v3 query. |
| 876 | */ | 881 | */ |
| 877 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); | 882 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); |
| 883 | if (!max_delay) | ||
| 884 | max_delay = 1; /* can't mod w/ 0 */ | ||
| 878 | } else { /* v3 */ | 885 | } else { /* v3 */ |
| 879 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) | 886 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) |
| 880 | return; | 887 | return; |
| @@ -1242,7 +1249,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
| 1242 | 1249 | ||
| 1243 | im->next_rcu = in_dev->mc_list; | 1250 | im->next_rcu = in_dev->mc_list; |
| 1244 | in_dev->mc_count++; | 1251 | in_dev->mc_count++; |
| 1245 | RCU_INIT_POINTER(in_dev->mc_list, im); | 1252 | rcu_assign_pointer(in_dev->mc_list, im); |
| 1246 | 1253 | ||
| 1247 | #ifdef CONFIG_IP_MULTICAST | 1254 | #ifdef CONFIG_IP_MULTICAST |
| 1248 | igmpv3_del_delrec(in_dev, im->multiaddr); | 1255 | igmpv3_del_delrec(in_dev, im->multiaddr); |
| @@ -1574,7 +1581,7 @@ out_unlock: | |||
| 1574 | * Add multicast single-source filter to the interface list | 1581 | * Add multicast single-source filter to the interface list |
| 1575 | */ | 1582 | */ |
| 1576 | static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, | 1583 | static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, |
| 1577 | __be32 *psfsrc, int delta) | 1584 | __be32 *psfsrc) |
| 1578 | { | 1585 | { |
| 1579 | struct ip_sf_list *psf, *psf_prev; | 1586 | struct ip_sf_list *psf, *psf_prev; |
| 1580 | 1587 | ||
| @@ -1709,7 +1716,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, | |||
| 1709 | pmc->sfcount[sfmode]++; | 1716 | pmc->sfcount[sfmode]++; |
| 1710 | err = 0; | 1717 | err = 0; |
| 1711 | for (i=0; i<sfcount; i++) { | 1718 | for (i=0; i<sfcount; i++) { |
| 1712 | err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); | 1719 | err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); |
| 1713 | if (err) | 1720 | if (err) |
| 1714 | break; | 1721 | break; |
| 1715 | } | 1722 | } |
| @@ -1814,7 +1821,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
| 1814 | iml->next_rcu = inet->mc_list; | 1821 | iml->next_rcu = inet->mc_list; |
| 1815 | iml->sflist = NULL; | 1822 | iml->sflist = NULL; |
| 1816 | iml->sfmode = MCAST_EXCLUDE; | 1823 | iml->sfmode = MCAST_EXCLUDE; |
| 1817 | RCU_INIT_POINTER(inet->mc_list, iml); | 1824 | rcu_assign_pointer(inet->mc_list, iml); |
| 1818 | ip_mc_inc_group(in_dev, addr); | 1825 | ip_mc_inc_group(in_dev, addr); |
| 1819 | err = 0; | 1826 | err = 0; |
| 1820 | done: | 1827 | done: |
| @@ -2001,7 +2008,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
| 2001 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); | 2008 | atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); |
| 2002 | kfree_rcu(psl, rcu); | 2009 | kfree_rcu(psl, rcu); |
| 2003 | } | 2010 | } |
| 2004 | RCU_INIT_POINTER(pmc->sflist, newpsl); | 2011 | rcu_assign_pointer(pmc->sflist, newpsl); |
| 2005 | psl = newpsl; | 2012 | psl = newpsl; |
| 2006 | } | 2013 | } |
| 2007 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ | 2014 | rv = 1; /* > 0 for insert logic below if sl_count is 0 */ |
| @@ -2104,7 +2111,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
| 2104 | } else | 2111 | } else |
| 2105 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2112 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
| 2106 | 0, NULL, 0); | 2113 | 0, NULL, 0); |
| 2107 | RCU_INIT_POINTER(pmc->sflist, newpsl); | 2114 | rcu_assign_pointer(pmc->sflist, newpsl); |
| 2108 | pmc->sfmode = msf->imsf_fmode; | 2115 | pmc->sfmode = msf->imsf_fmode; |
| 2109 | err = 0; | 2116 | err = 0; |
| 2110 | done: | 2117 | done: |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348d..19d66cefd7d3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -123,11 +123,14 @@ again: | |||
| 123 | smallest_size = tb->num_owners; | 123 | smallest_size = tb->num_owners; |
| 124 | smallest_rover = rover; | 124 | smallest_rover = rover; |
| 125 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { | 125 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { |
| 126 | spin_unlock(&head->lock); | ||
| 127 | snum = smallest_rover; | 126 | snum = smallest_rover; |
| 128 | goto have_snum; | 127 | goto tb_found; |
| 129 | } | 128 | } |
| 130 | } | 129 | } |
| 130 | if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { | ||
| 131 | snum = rover; | ||
| 132 | goto tb_found; | ||
| 133 | } | ||
| 131 | goto next; | 134 | goto next; |
| 132 | } | 135 | } |
| 133 | break; | 136 | break; |
| @@ -418,7 +421,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, | |||
| 418 | return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); | 421 | return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); |
| 419 | } | 422 | } |
| 420 | 423 | ||
| 421 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 424 | #if IS_ENABLED(CONFIG_IPV6) |
| 422 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) | 425 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) |
| 423 | #else | 426 | #else |
| 424 | #define AF_INET_FAMILY(fam) 1 | 427 | #define AF_INET_FAMILY(fam) 1 |
| @@ -588,10 +591,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
| 588 | } | 591 | } |
| 589 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | 592 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); |
| 590 | 593 | ||
| 591 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | 594 | /** |
| 592 | const gfp_t priority) | 595 | * inet_csk_clone_lock - clone an inet socket, and lock its clone |
| 596 | * @sk: the socket to clone | ||
| 597 | * @req: request_sock | ||
| 598 | * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) | ||
| 599 | * | ||
| 600 | * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) | ||
| 601 | */ | ||
| 602 | struct sock *inet_csk_clone_lock(const struct sock *sk, | ||
| 603 | const struct request_sock *req, | ||
| 604 | const gfp_t priority) | ||
| 593 | { | 605 | { |
| 594 | struct sock *newsk = sk_clone(sk, priority); | 606 | struct sock *newsk = sk_clone_lock(sk, priority); |
| 595 | 607 | ||
| 596 | if (newsk != NULL) { | 608 | if (newsk != NULL) { |
| 597 | struct inet_connection_sock *newicsk = inet_csk(newsk); | 609 | struct inet_connection_sock *newicsk = inet_csk(newsk); |
| @@ -615,7 +627,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
| 615 | } | 627 | } |
| 616 | return newsk; | 628 | return newsk; |
| 617 | } | 629 | } |
| 618 | EXPORT_SYMBOL_GPL(inet_csk_clone); | 630 | EXPORT_SYMBOL_GPL(inet_csk_clone_lock); |
| 619 | 631 | ||
| 620 | /* | 632 | /* |
| 621 | * At this point, there should be no process reference to this | 633 | * At this point, there should be no process reference to this |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ccee270a9b65..fcf281819cd4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/stddef.h> | 33 | #include <linux/stddef.h> |
| 34 | 34 | ||
| 35 | #include <linux/inet_diag.h> | 35 | #include <linux/inet_diag.h> |
| 36 | #include <linux/sock_diag.h> | ||
| 36 | 37 | ||
| 37 | static const struct inet_diag_handler **inet_diag_table; | 38 | static const struct inet_diag_handler **inet_diag_table; |
| 38 | 39 | ||
| @@ -45,24 +46,22 @@ struct inet_diag_entry { | |||
| 45 | u16 userlocks; | 46 | u16 userlocks; |
| 46 | }; | 47 | }; |
| 47 | 48 | ||
| 48 | static struct sock *idiagnl; | ||
| 49 | |||
| 50 | #define INET_DIAG_PUT(skb, attrtype, attrlen) \ | 49 | #define INET_DIAG_PUT(skb, attrtype, attrlen) \ |
| 51 | RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) | 50 | RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) |
| 52 | 51 | ||
| 53 | static DEFINE_MUTEX(inet_diag_table_mutex); | 52 | static DEFINE_MUTEX(inet_diag_table_mutex); |
| 54 | 53 | ||
| 55 | static const struct inet_diag_handler *inet_diag_lock_handler(int type) | 54 | static const struct inet_diag_handler *inet_diag_lock_handler(int proto) |
| 56 | { | 55 | { |
| 57 | if (!inet_diag_table[type]) | 56 | if (!inet_diag_table[proto]) |
| 58 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | 57 | request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, |
| 59 | NETLINK_INET_DIAG, type); | 58 | NETLINK_SOCK_DIAG, AF_INET, proto); |
| 60 | 59 | ||
| 61 | mutex_lock(&inet_diag_table_mutex); | 60 | mutex_lock(&inet_diag_table_mutex); |
| 62 | if (!inet_diag_table[type]) | 61 | if (!inet_diag_table[proto]) |
| 63 | return ERR_PTR(-ENOENT); | 62 | return ERR_PTR(-ENOENT); |
| 64 | 63 | ||
| 65 | return inet_diag_table[type]; | 64 | return inet_diag_table[proto]; |
| 66 | } | 65 | } |
| 67 | 66 | ||
| 68 | static inline void inet_diag_unlock_handler( | 67 | static inline void inet_diag_unlock_handler( |
| @@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler( | |||
| 71 | mutex_unlock(&inet_diag_table_mutex); | 70 | mutex_unlock(&inet_diag_table_mutex); |
| 72 | } | 71 | } |
| 73 | 72 | ||
| 74 | static int inet_csk_diag_fill(struct sock *sk, | 73 | int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, |
| 75 | struct sk_buff *skb, | 74 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
| 76 | int ext, u32 pid, u32 seq, u16 nlmsg_flags, | 75 | u32 pid, u32 seq, u16 nlmsg_flags, |
| 77 | const struct nlmsghdr *unlh) | 76 | const struct nlmsghdr *unlh) |
| 78 | { | 77 | { |
| 79 | const struct inet_sock *inet = inet_sk(sk); | 78 | const struct inet_sock *inet = inet_sk(sk); |
| 80 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 81 | struct inet_diag_msg *r; | 79 | struct inet_diag_msg *r; |
| 82 | struct nlmsghdr *nlh; | 80 | struct nlmsghdr *nlh; |
| 83 | void *info = NULL; | 81 | void *info = NULL; |
| 84 | struct inet_diag_meminfo *minfo = NULL; | 82 | struct inet_diag_meminfo *minfo = NULL; |
| 85 | unsigned char *b = skb_tail_pointer(skb); | 83 | unsigned char *b = skb_tail_pointer(skb); |
| 86 | const struct inet_diag_handler *handler; | 84 | const struct inet_diag_handler *handler; |
| 85 | int ext = req->idiag_ext; | ||
| 87 | 86 | ||
| 88 | handler = inet_diag_table[unlh->nlmsg_type]; | 87 | handler = inet_diag_table[req->sdiag_protocol]; |
| 89 | BUG_ON(handler == NULL); | 88 | BUG_ON(handler == NULL); |
| 90 | 89 | ||
| 91 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); | 90 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); |
| @@ -97,25 +96,13 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
| 97 | if (ext & (1 << (INET_DIAG_MEMINFO - 1))) | 96 | if (ext & (1 << (INET_DIAG_MEMINFO - 1))) |
| 98 | minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); | 97 | minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); |
| 99 | 98 | ||
| 100 | if (ext & (1 << (INET_DIAG_INFO - 1))) | ||
| 101 | info = INET_DIAG_PUT(skb, INET_DIAG_INFO, | ||
| 102 | handler->idiag_info_size); | ||
| 103 | |||
| 104 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { | ||
| 105 | const size_t len = strlen(icsk->icsk_ca_ops->name); | ||
| 106 | |||
| 107 | strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), | ||
| 108 | icsk->icsk_ca_ops->name); | ||
| 109 | } | ||
| 110 | |||
| 111 | r->idiag_family = sk->sk_family; | 99 | r->idiag_family = sk->sk_family; |
| 112 | r->idiag_state = sk->sk_state; | 100 | r->idiag_state = sk->sk_state; |
| 113 | r->idiag_timer = 0; | 101 | r->idiag_timer = 0; |
| 114 | r->idiag_retrans = 0; | 102 | r->idiag_retrans = 0; |
| 115 | 103 | ||
| 116 | r->id.idiag_if = sk->sk_bound_dev_if; | 104 | r->id.idiag_if = sk->sk_bound_dev_if; |
| 117 | r->id.idiag_cookie[0] = (u32)(unsigned long)sk; | 105 | sock_diag_save_cookie(sk, r->id.idiag_cookie); |
| 118 | r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | ||
| 119 | 106 | ||
| 120 | r->id.idiag_sport = inet->inet_sport; | 107 | r->id.idiag_sport = inet->inet_sport; |
| 121 | r->id.idiag_dport = inet->inet_dport; | 108 | r->id.idiag_dport = inet->inet_dport; |
| @@ -128,20 +115,36 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
| 128 | if (ext & (1 << (INET_DIAG_TOS - 1))) | 115 | if (ext & (1 << (INET_DIAG_TOS - 1))) |
| 129 | RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); | 116 | RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); |
| 130 | 117 | ||
| 131 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 118 | #if IS_ENABLED(CONFIG_IPV6) |
| 132 | if (r->idiag_family == AF_INET6) { | 119 | if (r->idiag_family == AF_INET6) { |
| 133 | const struct ipv6_pinfo *np = inet6_sk(sk); | 120 | const struct ipv6_pinfo *np = inet6_sk(sk); |
| 134 | 121 | ||
| 122 | *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; | ||
| 123 | *(struct in6_addr *)r->id.idiag_dst = np->daddr; | ||
| 135 | if (ext & (1 << (INET_DIAG_TCLASS - 1))) | 124 | if (ext & (1 << (INET_DIAG_TCLASS - 1))) |
| 136 | RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); | 125 | RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); |
| 137 | |||
| 138 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | ||
| 139 | &np->rcv_saddr); | ||
| 140 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
| 141 | &np->daddr); | ||
| 142 | } | 126 | } |
| 143 | #endif | 127 | #endif |
| 144 | 128 | ||
| 129 | r->idiag_uid = sock_i_uid(sk); | ||
| 130 | r->idiag_inode = sock_i_ino(sk); | ||
| 131 | |||
| 132 | if (minfo) { | ||
| 133 | minfo->idiag_rmem = sk_rmem_alloc_get(sk); | ||
| 134 | minfo->idiag_wmem = sk->sk_wmem_queued; | ||
| 135 | minfo->idiag_fmem = sk->sk_forward_alloc; | ||
| 136 | minfo->idiag_tmem = sk_wmem_alloc_get(sk); | ||
| 137 | } | ||
| 138 | |||
| 139 | if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) | ||
| 140 | if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) | ||
| 141 | goto rtattr_failure; | ||
| 142 | |||
| 143 | if (icsk == NULL) { | ||
| 144 | r->idiag_rqueue = r->idiag_wqueue = 0; | ||
| 145 | goto out; | ||
| 146 | } | ||
| 147 | |||
| 145 | #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) | 148 | #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) |
| 146 | 149 | ||
| 147 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { | 150 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { |
| @@ -162,14 +165,14 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
| 162 | } | 165 | } |
| 163 | #undef EXPIRES_IN_MS | 166 | #undef EXPIRES_IN_MS |
| 164 | 167 | ||
| 165 | r->idiag_uid = sock_i_uid(sk); | 168 | if (ext & (1 << (INET_DIAG_INFO - 1))) |
| 166 | r->idiag_inode = sock_i_ino(sk); | 169 | info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); |
| 167 | 170 | ||
| 168 | if (minfo) { | 171 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { |
| 169 | minfo->idiag_rmem = sk_rmem_alloc_get(sk); | 172 | const size_t len = strlen(icsk->icsk_ca_ops->name); |
| 170 | minfo->idiag_wmem = sk->sk_wmem_queued; | 173 | |
| 171 | minfo->idiag_fmem = sk->sk_forward_alloc; | 174 | strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), |
| 172 | minfo->idiag_tmem = sk_wmem_alloc_get(sk); | 175 | icsk->icsk_ca_ops->name); |
| 173 | } | 176 | } |
| 174 | 177 | ||
| 175 | handler->idiag_get_info(sk, r, info); | 178 | handler->idiag_get_info(sk, r, info); |
| @@ -178,6 +181,7 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
| 178 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) | 181 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) |
| 179 | icsk->icsk_ca_ops->get_info(sk, ext, skb); | 182 | icsk->icsk_ca_ops->get_info(sk, ext, skb); |
| 180 | 183 | ||
| 184 | out: | ||
| 181 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | 185 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
| 182 | return skb->len; | 186 | return skb->len; |
| 183 | 187 | ||
| @@ -186,10 +190,20 @@ nlmsg_failure: | |||
| 186 | nlmsg_trim(skb, b); | 190 | nlmsg_trim(skb, b); |
| 187 | return -EMSGSIZE; | 191 | return -EMSGSIZE; |
| 188 | } | 192 | } |
| 193 | EXPORT_SYMBOL_GPL(inet_sk_diag_fill); | ||
| 194 | |||
| 195 | static int inet_csk_diag_fill(struct sock *sk, | ||
| 196 | struct sk_buff *skb, struct inet_diag_req_v2 *req, | ||
| 197 | u32 pid, u32 seq, u16 nlmsg_flags, | ||
| 198 | const struct nlmsghdr *unlh) | ||
| 199 | { | ||
| 200 | return inet_sk_diag_fill(sk, inet_csk(sk), | ||
| 201 | skb, req, pid, seq, nlmsg_flags, unlh); | ||
| 202 | } | ||
| 189 | 203 | ||
| 190 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | 204 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, |
| 191 | struct sk_buff *skb, int ext, u32 pid, | 205 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
| 192 | u32 seq, u16 nlmsg_flags, | 206 | u32 pid, u32 seq, u16 nlmsg_flags, |
| 193 | const struct nlmsghdr *unlh) | 207 | const struct nlmsghdr *unlh) |
| 194 | { | 208 | { |
| 195 | long tmo; | 209 | long tmo; |
| @@ -210,8 +224,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
| 210 | r->idiag_family = tw->tw_family; | 224 | r->idiag_family = tw->tw_family; |
| 211 | r->idiag_retrans = 0; | 225 | r->idiag_retrans = 0; |
| 212 | r->id.idiag_if = tw->tw_bound_dev_if; | 226 | r->id.idiag_if = tw->tw_bound_dev_if; |
| 213 | r->id.idiag_cookie[0] = (u32)(unsigned long)tw; | 227 | sock_diag_save_cookie(tw, r->id.idiag_cookie); |
| 214 | r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1); | ||
| 215 | r->id.idiag_sport = tw->tw_sport; | 228 | r->id.idiag_sport = tw->tw_sport; |
| 216 | r->id.idiag_dport = tw->tw_dport; | 229 | r->id.idiag_dport = tw->tw_dport; |
| 217 | r->id.idiag_src[0] = tw->tw_rcv_saddr; | 230 | r->id.idiag_src[0] = tw->tw_rcv_saddr; |
| @@ -223,15 +236,13 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
| 223 | r->idiag_wqueue = 0; | 236 | r->idiag_wqueue = 0; |
| 224 | r->idiag_uid = 0; | 237 | r->idiag_uid = 0; |
| 225 | r->idiag_inode = 0; | 238 | r->idiag_inode = 0; |
| 226 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 239 | #if IS_ENABLED(CONFIG_IPV6) |
| 227 | if (tw->tw_family == AF_INET6) { | 240 | if (tw->tw_family == AF_INET6) { |
| 228 | const struct inet6_timewait_sock *tw6 = | 241 | const struct inet6_timewait_sock *tw6 = |
| 229 | inet6_twsk((struct sock *)tw); | 242 | inet6_twsk((struct sock *)tw); |
| 230 | 243 | ||
| 231 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | 244 | *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; |
| 232 | &tw6->tw_v6_rcv_saddr); | 245 | *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; |
| 233 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
| 234 | &tw6->tw_v6_daddr); | ||
| 235 | } | 246 | } |
| 236 | #endif | 247 | #endif |
| 237 | nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; | 248 | nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; |
| @@ -242,42 +253,31 @@ nlmsg_failure: | |||
| 242 | } | 253 | } |
| 243 | 254 | ||
| 244 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | 255 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, |
| 245 | int ext, u32 pid, u32 seq, u16 nlmsg_flags, | 256 | struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, |
| 246 | const struct nlmsghdr *unlh) | 257 | const struct nlmsghdr *unlh) |
| 247 | { | 258 | { |
| 248 | if (sk->sk_state == TCP_TIME_WAIT) | 259 | if (sk->sk_state == TCP_TIME_WAIT) |
| 249 | return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, | 260 | return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, |
| 250 | skb, ext, pid, seq, nlmsg_flags, | 261 | skb, r, pid, seq, nlmsg_flags, |
| 251 | unlh); | 262 | unlh); |
| 252 | return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); | 263 | return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); |
| 253 | } | 264 | } |
| 254 | 265 | ||
| 255 | static int inet_diag_get_exact(struct sk_buff *in_skb, | 266 | int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, |
| 256 | const struct nlmsghdr *nlh) | 267 | const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) |
| 257 | { | 268 | { |
| 258 | int err; | 269 | int err; |
| 259 | struct sock *sk; | 270 | struct sock *sk; |
| 260 | struct inet_diag_req *req = NLMSG_DATA(nlh); | ||
| 261 | struct sk_buff *rep; | 271 | struct sk_buff *rep; |
| 262 | struct inet_hashinfo *hashinfo; | ||
| 263 | const struct inet_diag_handler *handler; | ||
| 264 | 272 | ||
| 265 | handler = inet_diag_lock_handler(nlh->nlmsg_type); | ||
| 266 | if (IS_ERR(handler)) { | ||
| 267 | err = PTR_ERR(handler); | ||
| 268 | goto unlock; | ||
| 269 | } | ||
| 270 | |||
| 271 | hashinfo = handler->idiag_hashinfo; | ||
| 272 | err = -EINVAL; | 273 | err = -EINVAL; |
| 273 | 274 | if (req->sdiag_family == AF_INET) { | |
| 274 | if (req->idiag_family == AF_INET) { | ||
| 275 | sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], | 275 | sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], |
| 276 | req->id.idiag_dport, req->id.idiag_src[0], | 276 | req->id.idiag_dport, req->id.idiag_src[0], |
| 277 | req->id.idiag_sport, req->id.idiag_if); | 277 | req->id.idiag_sport, req->id.idiag_if); |
| 278 | } | 278 | } |
| 279 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 279 | #if IS_ENABLED(CONFIG_IPV6) |
| 280 | else if (req->idiag_family == AF_INET6) { | 280 | else if (req->sdiag_family == AF_INET6) { |
| 281 | sk = inet6_lookup(&init_net, hashinfo, | 281 | sk = inet6_lookup(&init_net, hashinfo, |
| 282 | (struct in6_addr *)req->id.idiag_dst, | 282 | (struct in6_addr *)req->id.idiag_dst, |
| 283 | req->id.idiag_dport, | 283 | req->id.idiag_dport, |
| @@ -287,29 +287,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, | |||
| 287 | } | 287 | } |
| 288 | #endif | 288 | #endif |
| 289 | else { | 289 | else { |
| 290 | goto unlock; | 290 | goto out_nosk; |
| 291 | } | 291 | } |
| 292 | 292 | ||
| 293 | err = -ENOENT; | 293 | err = -ENOENT; |
| 294 | if (sk == NULL) | 294 | if (sk == NULL) |
| 295 | goto unlock; | 295 | goto out_nosk; |
| 296 | 296 | ||
| 297 | err = -ESTALE; | 297 | err = sock_diag_check_cookie(sk, req->id.idiag_cookie); |
| 298 | if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || | 298 | if (err) |
| 299 | req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && | ||
| 300 | ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || | ||
| 301 | (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) | ||
| 302 | goto out; | 299 | goto out; |
| 303 | 300 | ||
| 304 | err = -ENOMEM; | 301 | err = -ENOMEM; |
| 305 | rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + | 302 | rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + |
| 306 | sizeof(struct inet_diag_meminfo) + | 303 | sizeof(struct inet_diag_meminfo) + |
| 307 | handler->idiag_info_size + 64)), | 304 | sizeof(struct tcp_info) + 64)), |
| 308 | GFP_KERNEL); | 305 | GFP_KERNEL); |
| 309 | if (!rep) | 306 | if (!rep) |
| 310 | goto out; | 307 | goto out; |
| 311 | 308 | ||
| 312 | err = sk_diag_fill(sk, rep, req->idiag_ext, | 309 | err = sk_diag_fill(sk, rep, req, |
| 313 | NETLINK_CB(in_skb).pid, | 310 | NETLINK_CB(in_skb).pid, |
| 314 | nlh->nlmsg_seq, 0, nlh); | 311 | nlh->nlmsg_seq, 0, nlh); |
| 315 | if (err < 0) { | 312 | if (err < 0) { |
| @@ -317,7 +314,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, | |||
| 317 | kfree_skb(rep); | 314 | kfree_skb(rep); |
| 318 | goto out; | 315 | goto out; |
| 319 | } | 316 | } |
| 320 | err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, | 317 | err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, |
| 321 | MSG_DONTWAIT); | 318 | MSG_DONTWAIT); |
| 322 | if (err > 0) | 319 | if (err > 0) |
| 323 | err = 0; | 320 | err = 0; |
| @@ -329,8 +326,25 @@ out: | |||
| 329 | else | 326 | else |
| 330 | sock_put(sk); | 327 | sock_put(sk); |
| 331 | } | 328 | } |
| 332 | unlock: | 329 | out_nosk: |
| 330 | return err; | ||
| 331 | } | ||
| 332 | EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); | ||
| 333 | |||
| 334 | static int inet_diag_get_exact(struct sk_buff *in_skb, | ||
| 335 | const struct nlmsghdr *nlh, | ||
| 336 | struct inet_diag_req_v2 *req) | ||
| 337 | { | ||
| 338 | const struct inet_diag_handler *handler; | ||
| 339 | int err; | ||
| 340 | |||
| 341 | handler = inet_diag_lock_handler(req->sdiag_protocol); | ||
| 342 | if (IS_ERR(handler)) | ||
| 343 | err = PTR_ERR(handler); | ||
| 344 | else | ||
| 345 | err = handler->dump_one(in_skb, nlh, req); | ||
| 333 | inet_diag_unlock_handler(handler); | 346 | inet_diag_unlock_handler(handler); |
| 347 | |||
| 334 | return err; | 348 | return err; |
| 335 | } | 349 | } |
| 336 | 350 | ||
| @@ -361,9 +375,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) | |||
| 361 | } | 375 | } |
| 362 | 376 | ||
| 363 | 377 | ||
| 364 | static int inet_diag_bc_run(const void *bc, int len, | 378 | static int inet_diag_bc_run(const struct nlattr *_bc, |
| 365 | const struct inet_diag_entry *entry) | 379 | const struct inet_diag_entry *entry) |
| 366 | { | 380 | { |
| 381 | const void *bc = nla_data(_bc); | ||
| 382 | int len = nla_len(_bc); | ||
| 383 | |||
| 367 | while (len > 0) { | 384 | while (len > 0) { |
| 368 | int yes = 1; | 385 | int yes = 1; |
| 369 | const struct inet_diag_bc_op *op = bc; | 386 | const struct inet_diag_bc_op *op = bc; |
| @@ -437,6 +454,35 @@ static int inet_diag_bc_run(const void *bc, int len, | |||
| 437 | return len == 0; | 454 | return len == 0; |
| 438 | } | 455 | } |
| 439 | 456 | ||
| 457 | int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) | ||
| 458 | { | ||
| 459 | struct inet_diag_entry entry; | ||
| 460 | struct inet_sock *inet = inet_sk(sk); | ||
| 461 | |||
| 462 | if (bc == NULL) | ||
| 463 | return 1; | ||
| 464 | |||
| 465 | entry.family = sk->sk_family; | ||
| 466 | #if IS_ENABLED(CONFIG_IPV6) | ||
| 467 | if (entry.family == AF_INET6) { | ||
| 468 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
| 469 | |||
| 470 | entry.saddr = np->rcv_saddr.s6_addr32; | ||
| 471 | entry.daddr = np->daddr.s6_addr32; | ||
| 472 | } else | ||
| 473 | #endif | ||
| 474 | { | ||
| 475 | entry.saddr = &inet->inet_rcv_saddr; | ||
| 476 | entry.daddr = &inet->inet_daddr; | ||
| 477 | } | ||
| 478 | entry.sport = inet->inet_num; | ||
| 479 | entry.dport = ntohs(inet->inet_dport); | ||
| 480 | entry.userlocks = sk->sk_userlocks; | ||
| 481 | |||
| 482 | return inet_diag_bc_run(bc, &entry); | ||
| 483 | } | ||
| 484 | EXPORT_SYMBOL_GPL(inet_diag_bc_sk); | ||
| 485 | |||
| 440 | static int valid_cc(const void *bc, int len, int cc) | 486 | static int valid_cc(const void *bc, int len, int cc) |
| 441 | { | 487 | { |
| 442 | while (len >= 0) { | 488 | while (len >= 0) { |
| @@ -493,57 +539,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) | |||
| 493 | 539 | ||
| 494 | static int inet_csk_diag_dump(struct sock *sk, | 540 | static int inet_csk_diag_dump(struct sock *sk, |
| 495 | struct sk_buff *skb, | 541 | struct sk_buff *skb, |
| 496 | struct netlink_callback *cb) | 542 | struct netlink_callback *cb, |
| 543 | struct inet_diag_req_v2 *r, | ||
| 544 | const struct nlattr *bc) | ||
| 497 | { | 545 | { |
| 498 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | 546 | if (!inet_diag_bc_sk(bc, sk)) |
| 499 | 547 | return 0; | |
| 500 | if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { | ||
| 501 | struct inet_diag_entry entry; | ||
| 502 | const struct nlattr *bc = nlmsg_find_attr(cb->nlh, | ||
| 503 | sizeof(*r), | ||
| 504 | INET_DIAG_REQ_BYTECODE); | ||
| 505 | struct inet_sock *inet = inet_sk(sk); | ||
| 506 | |||
| 507 | entry.family = sk->sk_family; | ||
| 508 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
| 509 | if (entry.family == AF_INET6) { | ||
| 510 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
| 511 | |||
| 512 | entry.saddr = np->rcv_saddr.s6_addr32; | ||
| 513 | entry.daddr = np->daddr.s6_addr32; | ||
| 514 | } else | ||
| 515 | #endif | ||
| 516 | { | ||
| 517 | entry.saddr = &inet->inet_rcv_saddr; | ||
| 518 | entry.daddr = &inet->inet_daddr; | ||
| 519 | } | ||
| 520 | entry.sport = inet->inet_num; | ||
| 521 | entry.dport = ntohs(inet->inet_dport); | ||
| 522 | entry.userlocks = sk->sk_userlocks; | ||
| 523 | 548 | ||
| 524 | if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) | 549 | return inet_csk_diag_fill(sk, skb, r, |
| 525 | return 0; | ||
| 526 | } | ||
| 527 | |||
| 528 | return inet_csk_diag_fill(sk, skb, r->idiag_ext, | ||
| 529 | NETLINK_CB(cb->skb).pid, | 550 | NETLINK_CB(cb->skb).pid, |
| 530 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 551 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
| 531 | } | 552 | } |
| 532 | 553 | ||
| 533 | static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, | 554 | static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, |
| 534 | struct sk_buff *skb, | 555 | struct sk_buff *skb, |
| 535 | struct netlink_callback *cb) | 556 | struct netlink_callback *cb, |
| 557 | struct inet_diag_req_v2 *r, | ||
| 558 | const struct nlattr *bc) | ||
| 536 | { | 559 | { |
| 537 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | 560 | if (bc != NULL) { |
| 538 | |||
| 539 | if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { | ||
| 540 | struct inet_diag_entry entry; | 561 | struct inet_diag_entry entry; |
| 541 | const struct nlattr *bc = nlmsg_find_attr(cb->nlh, | ||
| 542 | sizeof(*r), | ||
| 543 | INET_DIAG_REQ_BYTECODE); | ||
| 544 | 562 | ||
| 545 | entry.family = tw->tw_family; | 563 | entry.family = tw->tw_family; |
| 546 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 564 | #if IS_ENABLED(CONFIG_IPV6) |
| 547 | if (tw->tw_family == AF_INET6) { | 565 | if (tw->tw_family == AF_INET6) { |
| 548 | struct inet6_timewait_sock *tw6 = | 566 | struct inet6_timewait_sock *tw6 = |
| 549 | inet6_twsk((struct sock *)tw); | 567 | inet6_twsk((struct sock *)tw); |
| @@ -559,11 +577,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, | |||
| 559 | entry.dport = ntohs(tw->tw_dport); | 577 | entry.dport = ntohs(tw->tw_dport); |
| 560 | entry.userlocks = 0; | 578 | entry.userlocks = 0; |
| 561 | 579 | ||
| 562 | if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) | 580 | if (!inet_diag_bc_run(bc, &entry)) |
| 563 | return 0; | 581 | return 0; |
| 564 | } | 582 | } |
| 565 | 583 | ||
| 566 | return inet_twsk_diag_fill(tw, skb, r->idiag_ext, | 584 | return inet_twsk_diag_fill(tw, skb, r, |
| 567 | NETLINK_CB(cb->skb).pid, | 585 | NETLINK_CB(cb->skb).pid, |
| 568 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 586 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
| 569 | } | 587 | } |
| @@ -589,8 +607,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
| 589 | r->idiag_retrans = req->retrans; | 607 | r->idiag_retrans = req->retrans; |
| 590 | 608 | ||
| 591 | r->id.idiag_if = sk->sk_bound_dev_if; | 609 | r->id.idiag_if = sk->sk_bound_dev_if; |
| 592 | r->id.idiag_cookie[0] = (u32)(unsigned long)req; | 610 | sock_diag_save_cookie(req, r->id.idiag_cookie); |
| 593 | r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); | ||
| 594 | 611 | ||
| 595 | tmo = req->expires - jiffies; | 612 | tmo = req->expires - jiffies; |
| 596 | if (tmo < 0) | 613 | if (tmo < 0) |
| @@ -605,12 +622,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
| 605 | r->idiag_wqueue = 0; | 622 | r->idiag_wqueue = 0; |
| 606 | r->idiag_uid = sock_i_uid(sk); | 623 | r->idiag_uid = sock_i_uid(sk); |
| 607 | r->idiag_inode = 0; | 624 | r->idiag_inode = 0; |
| 608 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 625 | #if IS_ENABLED(CONFIG_IPV6) |
| 609 | if (r->idiag_family == AF_INET6) { | 626 | if (r->idiag_family == AF_INET6) { |
| 610 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | 627 | *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; |
| 611 | &inet6_rsk(req)->loc_addr); | 628 | *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; |
| 612 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
| 613 | &inet6_rsk(req)->rmt_addr); | ||
| 614 | } | 629 | } |
| 615 | #endif | 630 | #endif |
| 616 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | 631 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
| @@ -623,13 +638,13 @@ nlmsg_failure: | |||
| 623 | } | 638 | } |
| 624 | 639 | ||
| 625 | static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | 640 | static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, |
| 626 | struct netlink_callback *cb) | 641 | struct netlink_callback *cb, |
| 642 | struct inet_diag_req_v2 *r, | ||
| 643 | const struct nlattr *bc) | ||
| 627 | { | 644 | { |
| 628 | struct inet_diag_entry entry; | 645 | struct inet_diag_entry entry; |
| 629 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | ||
| 630 | struct inet_connection_sock *icsk = inet_csk(sk); | 646 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 631 | struct listen_sock *lopt; | 647 | struct listen_sock *lopt; |
| 632 | const struct nlattr *bc = NULL; | ||
| 633 | struct inet_sock *inet = inet_sk(sk); | 648 | struct inet_sock *inet = inet_sk(sk); |
| 634 | int j, s_j; | 649 | int j, s_j; |
| 635 | int reqnum, s_reqnum; | 650 | int reqnum, s_reqnum; |
| @@ -649,9 +664,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
| 649 | if (!lopt || !lopt->qlen) | 664 | if (!lopt || !lopt->qlen) |
| 650 | goto out; | 665 | goto out; |
| 651 | 666 | ||
| 652 | if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { | 667 | if (bc != NULL) { |
| 653 | bc = nlmsg_find_attr(cb->nlh, sizeof(*r), | ||
| 654 | INET_DIAG_REQ_BYTECODE); | ||
| 655 | entry.sport = inet->inet_num; | 668 | entry.sport = inet->inet_num; |
| 656 | entry.userlocks = sk->sk_userlocks; | 669 | entry.userlocks = sk->sk_userlocks; |
| 657 | } | 670 | } |
| @@ -671,21 +684,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
| 671 | 684 | ||
| 672 | if (bc) { | 685 | if (bc) { |
| 673 | entry.saddr = | 686 | entry.saddr = |
| 674 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 687 | #if IS_ENABLED(CONFIG_IPV6) |
| 675 | (entry.family == AF_INET6) ? | 688 | (entry.family == AF_INET6) ? |
| 676 | inet6_rsk(req)->loc_addr.s6_addr32 : | 689 | inet6_rsk(req)->loc_addr.s6_addr32 : |
| 677 | #endif | 690 | #endif |
| 678 | &ireq->loc_addr; | 691 | &ireq->loc_addr; |
| 679 | entry.daddr = | 692 | entry.daddr = |
| 680 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 693 | #if IS_ENABLED(CONFIG_IPV6) |
| 681 | (entry.family == AF_INET6) ? | 694 | (entry.family == AF_INET6) ? |
| 682 | inet6_rsk(req)->rmt_addr.s6_addr32 : | 695 | inet6_rsk(req)->rmt_addr.s6_addr32 : |
| 683 | #endif | 696 | #endif |
| 684 | &ireq->rmt_addr; | 697 | &ireq->rmt_addr; |
| 685 | entry.dport = ntohs(ireq->rmt_port); | 698 | entry.dport = ntohs(ireq->rmt_port); |
| 686 | 699 | ||
| 687 | if (!inet_diag_bc_run(nla_data(bc), | 700 | if (!inet_diag_bc_run(bc, &entry)) |
| 688 | nla_len(bc), &entry)) | ||
| 689 | continue; | 701 | continue; |
| 690 | } | 702 | } |
| 691 | 703 | ||
| @@ -708,19 +720,11 @@ out: | |||
| 708 | return err; | 720 | return err; |
| 709 | } | 721 | } |
| 710 | 722 | ||
| 711 | static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | 723 | void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, |
| 724 | struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 712 | { | 725 | { |
| 713 | int i, num; | 726 | int i, num; |
| 714 | int s_i, s_num; | 727 | int s_i, s_num; |
| 715 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | ||
| 716 | const struct inet_diag_handler *handler; | ||
| 717 | struct inet_hashinfo *hashinfo; | ||
| 718 | |||
| 719 | handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); | ||
| 720 | if (IS_ERR(handler)) | ||
| 721 | goto unlock; | ||
| 722 | |||
| 723 | hashinfo = handler->idiag_hashinfo; | ||
| 724 | 728 | ||
| 725 | s_i = cb->args[1]; | 729 | s_i = cb->args[1]; |
| 726 | s_num = num = cb->args[2]; | 730 | s_num = num = cb->args[2]; |
| @@ -745,6 +749,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 745 | continue; | 749 | continue; |
| 746 | } | 750 | } |
| 747 | 751 | ||
| 752 | if (r->sdiag_family != AF_UNSPEC && | ||
| 753 | sk->sk_family != r->sdiag_family) | ||
| 754 | goto next_listen; | ||
| 755 | |||
| 748 | if (r->id.idiag_sport != inet->inet_sport && | 756 | if (r->id.idiag_sport != inet->inet_sport && |
| 749 | r->id.idiag_sport) | 757 | r->id.idiag_sport) |
| 750 | goto next_listen; | 758 | goto next_listen; |
| @@ -754,7 +762,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 754 | cb->args[3] > 0) | 762 | cb->args[3] > 0) |
| 755 | goto syn_recv; | 763 | goto syn_recv; |
| 756 | 764 | ||
| 757 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 765 | if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { |
| 758 | spin_unlock_bh(&ilb->lock); | 766 | spin_unlock_bh(&ilb->lock); |
| 759 | goto done; | 767 | goto done; |
| 760 | } | 768 | } |
| @@ -763,7 +771,7 @@ syn_recv: | |||
| 763 | if (!(r->idiag_states & TCPF_SYN_RECV)) | 771 | if (!(r->idiag_states & TCPF_SYN_RECV)) |
| 764 | goto next_listen; | 772 | goto next_listen; |
| 765 | 773 | ||
| 766 | if (inet_diag_dump_reqs(skb, sk, cb) < 0) { | 774 | if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { |
| 767 | spin_unlock_bh(&ilb->lock); | 775 | spin_unlock_bh(&ilb->lock); |
| 768 | goto done; | 776 | goto done; |
| 769 | } | 777 | } |
| @@ -785,7 +793,7 @@ skip_listen_ht: | |||
| 785 | } | 793 | } |
| 786 | 794 | ||
| 787 | if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) | 795 | if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) |
| 788 | goto unlock; | 796 | goto out; |
| 789 | 797 | ||
| 790 | for (i = s_i; i <= hashinfo->ehash_mask; i++) { | 798 | for (i = s_i; i <= hashinfo->ehash_mask; i++) { |
| 791 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; | 799 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; |
| @@ -810,13 +818,16 @@ skip_listen_ht: | |||
| 810 | goto next_normal; | 818 | goto next_normal; |
| 811 | if (!(r->idiag_states & (1 << sk->sk_state))) | 819 | if (!(r->idiag_states & (1 << sk->sk_state))) |
| 812 | goto next_normal; | 820 | goto next_normal; |
| 821 | if (r->sdiag_family != AF_UNSPEC && | ||
| 822 | sk->sk_family != r->sdiag_family) | ||
| 823 | goto next_normal; | ||
| 813 | if (r->id.idiag_sport != inet->inet_sport && | 824 | if (r->id.idiag_sport != inet->inet_sport && |
| 814 | r->id.idiag_sport) | 825 | r->id.idiag_sport) |
| 815 | goto next_normal; | 826 | goto next_normal; |
| 816 | if (r->id.idiag_dport != inet->inet_dport && | 827 | if (r->id.idiag_dport != inet->inet_dport && |
| 817 | r->id.idiag_dport) | 828 | r->id.idiag_dport) |
| 818 | goto next_normal; | 829 | goto next_normal; |
| 819 | if (inet_csk_diag_dump(sk, skb, cb) < 0) { | 830 | if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { |
| 820 | spin_unlock_bh(lock); | 831 | spin_unlock_bh(lock); |
| 821 | goto done; | 832 | goto done; |
| 822 | } | 833 | } |
| @@ -832,13 +843,16 @@ next_normal: | |||
| 832 | 843 | ||
| 833 | if (num < s_num) | 844 | if (num < s_num) |
| 834 | goto next_dying; | 845 | goto next_dying; |
| 846 | if (r->sdiag_family != AF_UNSPEC && | ||
| 847 | tw->tw_family != r->sdiag_family) | ||
| 848 | goto next_dying; | ||
| 835 | if (r->id.idiag_sport != tw->tw_sport && | 849 | if (r->id.idiag_sport != tw->tw_sport && |
| 836 | r->id.idiag_sport) | 850 | r->id.idiag_sport) |
| 837 | goto next_dying; | 851 | goto next_dying; |
| 838 | if (r->id.idiag_dport != tw->tw_dport && | 852 | if (r->id.idiag_dport != tw->tw_dport && |
| 839 | r->id.idiag_dport) | 853 | r->id.idiag_dport) |
| 840 | goto next_dying; | 854 | goto next_dying; |
| 841 | if (inet_twsk_diag_dump(tw, skb, cb) < 0) { | 855 | if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { |
| 842 | spin_unlock_bh(lock); | 856 | spin_unlock_bh(lock); |
| 843 | goto done; | 857 | goto done; |
| 844 | } | 858 | } |
| @@ -852,12 +866,82 @@ next_dying: | |||
| 852 | done: | 866 | done: |
| 853 | cb->args[1] = i; | 867 | cb->args[1] = i; |
| 854 | cb->args[2] = num; | 868 | cb->args[2] = num; |
| 855 | unlock: | 869 | out: |
| 870 | ; | ||
| 871 | } | ||
| 872 | EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); | ||
| 873 | |||
| 874 | static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 875 | struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 876 | { | ||
| 877 | const struct inet_diag_handler *handler; | ||
| 878 | |||
| 879 | handler = inet_diag_lock_handler(r->sdiag_protocol); | ||
| 880 | if (!IS_ERR(handler)) | ||
| 881 | handler->dump(skb, cb, r, bc); | ||
| 856 | inet_diag_unlock_handler(handler); | 882 | inet_diag_unlock_handler(handler); |
| 883 | |||
| 857 | return skb->len; | 884 | return skb->len; |
| 858 | } | 885 | } |
| 859 | 886 | ||
| 860 | static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 887 | static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) |
| 888 | { | ||
| 889 | struct nlattr *bc = NULL; | ||
| 890 | int hdrlen = sizeof(struct inet_diag_req_v2); | ||
| 891 | |||
| 892 | if (nlmsg_attrlen(cb->nlh, hdrlen)) | ||
| 893 | bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); | ||
| 894 | |||
| 895 | return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); | ||
| 896 | } | ||
| 897 | |||
| 898 | static inline int inet_diag_type2proto(int type) | ||
| 899 | { | ||
| 900 | switch (type) { | ||
| 901 | case TCPDIAG_GETSOCK: | ||
| 902 | return IPPROTO_TCP; | ||
| 903 | case DCCPDIAG_GETSOCK: | ||
| 904 | return IPPROTO_DCCP; | ||
| 905 | default: | ||
| 906 | return 0; | ||
| 907 | } | ||
| 908 | } | ||
| 909 | |||
| 910 | static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) | ||
| 911 | { | ||
| 912 | struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); | ||
| 913 | struct inet_diag_req_v2 req; | ||
| 914 | struct nlattr *bc = NULL; | ||
| 915 | int hdrlen = sizeof(struct inet_diag_req); | ||
| 916 | |||
| 917 | req.sdiag_family = AF_UNSPEC; /* compatibility */ | ||
| 918 | req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); | ||
| 919 | req.idiag_ext = rc->idiag_ext; | ||
| 920 | req.idiag_states = rc->idiag_states; | ||
| 921 | req.id = rc->id; | ||
| 922 | |||
| 923 | if (nlmsg_attrlen(cb->nlh, hdrlen)) | ||
| 924 | bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); | ||
| 925 | |||
| 926 | return __inet_diag_dump(skb, cb, &req, bc); | ||
| 927 | } | ||
| 928 | |||
| 929 | static int inet_diag_get_exact_compat(struct sk_buff *in_skb, | ||
| 930 | const struct nlmsghdr *nlh) | ||
| 931 | { | ||
| 932 | struct inet_diag_req *rc = NLMSG_DATA(nlh); | ||
| 933 | struct inet_diag_req_v2 req; | ||
| 934 | |||
| 935 | req.sdiag_family = rc->idiag_family; | ||
| 936 | req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); | ||
| 937 | req.idiag_ext = rc->idiag_ext; | ||
| 938 | req.idiag_states = rc->idiag_states; | ||
| 939 | req.id = rc->id; | ||
| 940 | |||
| 941 | return inet_diag_get_exact(in_skb, nlh, &req); | ||
| 942 | } | ||
| 943 | |||
| 944 | static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
| 861 | { | 945 | { |
| 862 | int hdrlen = sizeof(struct inet_diag_req); | 946 | int hdrlen = sizeof(struct inet_diag_req); |
| 863 | 947 | ||
| @@ -877,28 +961,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 877 | return -EINVAL; | 961 | return -EINVAL; |
| 878 | } | 962 | } |
| 879 | 963 | ||
| 880 | return netlink_dump_start(idiagnl, skb, nlh, | 964 | return netlink_dump_start(sock_diag_nlsk, skb, nlh, |
| 881 | inet_diag_dump, NULL, 0); | 965 | inet_diag_dump_compat, NULL, 0); |
| 882 | } | 966 | } |
| 883 | 967 | ||
| 884 | return inet_diag_get_exact(skb, nlh); | 968 | return inet_diag_get_exact_compat(skb, nlh); |
| 885 | } | 969 | } |
| 886 | 970 | ||
| 887 | static DEFINE_MUTEX(inet_diag_mutex); | 971 | static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) |
| 888 | |||
| 889 | static void inet_diag_rcv(struct sk_buff *skb) | ||
| 890 | { | 972 | { |
| 891 | mutex_lock(&inet_diag_mutex); | 973 | int hdrlen = sizeof(struct inet_diag_req_v2); |
| 892 | netlink_rcv_skb(skb, &inet_diag_rcv_msg); | 974 | |
| 893 | mutex_unlock(&inet_diag_mutex); | 975 | if (nlmsg_len(h) < hdrlen) |
| 976 | return -EINVAL; | ||
| 977 | |||
| 978 | if (h->nlmsg_flags & NLM_F_DUMP) { | ||
| 979 | if (nlmsg_attrlen(h, hdrlen)) { | ||
| 980 | struct nlattr *attr; | ||
| 981 | attr = nlmsg_find_attr(h, hdrlen, | ||
| 982 | INET_DIAG_REQ_BYTECODE); | ||
| 983 | if (attr == NULL || | ||
| 984 | nla_len(attr) < sizeof(struct inet_diag_bc_op) || | ||
| 985 | inet_diag_bc_audit(nla_data(attr), nla_len(attr))) | ||
| 986 | return -EINVAL; | ||
| 987 | } | ||
| 988 | |||
| 989 | return netlink_dump_start(sock_diag_nlsk, skb, h, | ||
| 990 | inet_diag_dump, NULL, 0); | ||
| 991 | } | ||
| 992 | |||
| 993 | return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); | ||
| 894 | } | 994 | } |
| 895 | 995 | ||
| 996 | static struct sock_diag_handler inet_diag_handler = { | ||
| 997 | .family = AF_INET, | ||
| 998 | .dump = inet_diag_handler_dump, | ||
| 999 | }; | ||
| 1000 | |||
| 1001 | static struct sock_diag_handler inet6_diag_handler = { | ||
| 1002 | .family = AF_INET6, | ||
| 1003 | .dump = inet_diag_handler_dump, | ||
| 1004 | }; | ||
| 1005 | |||
| 896 | int inet_diag_register(const struct inet_diag_handler *h) | 1006 | int inet_diag_register(const struct inet_diag_handler *h) |
| 897 | { | 1007 | { |
| 898 | const __u16 type = h->idiag_type; | 1008 | const __u16 type = h->idiag_type; |
| 899 | int err = -EINVAL; | 1009 | int err = -EINVAL; |
| 900 | 1010 | ||
| 901 | if (type >= INET_DIAG_GETSOCK_MAX) | 1011 | if (type >= IPPROTO_MAX) |
| 902 | goto out; | 1012 | goto out; |
| 903 | 1013 | ||
| 904 | mutex_lock(&inet_diag_table_mutex); | 1014 | mutex_lock(&inet_diag_table_mutex); |
| @@ -917,7 +1027,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) | |||
| 917 | { | 1027 | { |
| 918 | const __u16 type = h->idiag_type; | 1028 | const __u16 type = h->idiag_type; |
| 919 | 1029 | ||
| 920 | if (type >= INET_DIAG_GETSOCK_MAX) | 1030 | if (type >= IPPROTO_MAX) |
| 921 | return; | 1031 | return; |
| 922 | 1032 | ||
| 923 | mutex_lock(&inet_diag_table_mutex); | 1033 | mutex_lock(&inet_diag_table_mutex); |
| @@ -928,7 +1038,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister); | |||
| 928 | 1038 | ||
| 929 | static int __init inet_diag_init(void) | 1039 | static int __init inet_diag_init(void) |
| 930 | { | 1040 | { |
| 931 | const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * | 1041 | const int inet_diag_table_size = (IPPROTO_MAX * |
| 932 | sizeof(struct inet_diag_handler *)); | 1042 | sizeof(struct inet_diag_handler *)); |
| 933 | int err = -ENOMEM; | 1043 | int err = -ENOMEM; |
| 934 | 1044 | ||
| @@ -936,25 +1046,35 @@ static int __init inet_diag_init(void) | |||
| 936 | if (!inet_diag_table) | 1046 | if (!inet_diag_table) |
| 937 | goto out; | 1047 | goto out; |
| 938 | 1048 | ||
| 939 | idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, | 1049 | err = sock_diag_register(&inet_diag_handler); |
| 940 | inet_diag_rcv, NULL, THIS_MODULE); | 1050 | if (err) |
| 941 | if (idiagnl == NULL) | 1051 | goto out_free_nl; |
| 942 | goto out_free_table; | 1052 | |
| 943 | err = 0; | 1053 | err = sock_diag_register(&inet6_diag_handler); |
| 1054 | if (err) | ||
| 1055 | goto out_free_inet; | ||
| 1056 | |||
| 1057 | sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); | ||
| 944 | out: | 1058 | out: |
| 945 | return err; | 1059 | return err; |
| 946 | out_free_table: | 1060 | |
| 1061 | out_free_inet: | ||
| 1062 | sock_diag_unregister(&inet_diag_handler); | ||
| 1063 | out_free_nl: | ||
| 947 | kfree(inet_diag_table); | 1064 | kfree(inet_diag_table); |
| 948 | goto out; | 1065 | goto out; |
| 949 | } | 1066 | } |
| 950 | 1067 | ||
| 951 | static void __exit inet_diag_exit(void) | 1068 | static void __exit inet_diag_exit(void) |
| 952 | { | 1069 | { |
| 953 | netlink_kernel_release(idiagnl); | 1070 | sock_diag_unregister(&inet6_diag_handler); |
| 1071 | sock_diag_unregister(&inet_diag_handler); | ||
| 1072 | sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); | ||
| 954 | kfree(inet_diag_table); | 1073 | kfree(inet_diag_table); |
| 955 | } | 1074 | } |
| 956 | 1075 | ||
| 957 | module_init(inet_diag_init); | 1076 | module_init(inet_diag_init); |
| 958 | module_exit(inet_diag_exit); | 1077 | module_exit(inet_diag_exit); |
| 959 | MODULE_LICENSE("GPL"); | 1078 | MODULE_LICENSE("GPL"); |
| 960 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); | 1079 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); |
| 1080 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); | ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86f13c67ea85..d4d61b694fab 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
| 19 | #include <linux/net.h> | 19 | #include <linux/net.h> |
| 20 | #include <linux/workqueue.h> | ||
| 20 | #include <net/ip.h> | 21 | #include <net/ip.h> |
| 21 | #include <net/inetpeer.h> | 22 | #include <net/inetpeer.h> |
| 22 | #include <net/secure_seq.h> | 23 | #include <net/secure_seq.h> |
| @@ -66,6 +67,11 @@ | |||
| 66 | 67 | ||
| 67 | static struct kmem_cache *peer_cachep __read_mostly; | 68 | static struct kmem_cache *peer_cachep __read_mostly; |
| 68 | 69 | ||
| 70 | static LIST_HEAD(gc_list); | ||
| 71 | static const int gc_delay = 60 * HZ; | ||
| 72 | static struct delayed_work gc_work; | ||
| 73 | static DEFINE_SPINLOCK(gc_lock); | ||
| 74 | |||
| 69 | #define node_height(x) x->avl_height | 75 | #define node_height(x) x->avl_height |
| 70 | 76 | ||
| 71 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) | 77 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) |
| @@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m | |||
| 102 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ | 108 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ |
| 103 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ | 109 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ |
| 104 | 110 | ||
| 111 | static void inetpeer_gc_worker(struct work_struct *work) | ||
| 112 | { | ||
| 113 | struct inet_peer *p, *n; | ||
| 114 | LIST_HEAD(list); | ||
| 115 | |||
| 116 | spin_lock_bh(&gc_lock); | ||
| 117 | list_replace_init(&gc_list, &list); | ||
| 118 | spin_unlock_bh(&gc_lock); | ||
| 119 | |||
| 120 | if (list_empty(&list)) | ||
| 121 | return; | ||
| 122 | |||
| 123 | list_for_each_entry_safe(p, n, &list, gc_list) { | ||
| 124 | |||
| 125 | if(need_resched()) | ||
| 126 | cond_resched(); | ||
| 127 | |||
| 128 | if (p->avl_left != peer_avl_empty) { | ||
| 129 | list_add_tail(&p->avl_left->gc_list, &list); | ||
| 130 | p->avl_left = peer_avl_empty; | ||
| 131 | } | ||
| 132 | |||
| 133 | if (p->avl_right != peer_avl_empty) { | ||
| 134 | list_add_tail(&p->avl_right->gc_list, &list); | ||
| 135 | p->avl_right = peer_avl_empty; | ||
| 136 | } | ||
| 137 | |||
| 138 | n = list_entry(p->gc_list.next, struct inet_peer, gc_list); | ||
| 139 | |||
| 140 | if (!atomic_read(&p->refcnt)) { | ||
| 141 | list_del(&p->gc_list); | ||
| 142 | kmem_cache_free(peer_cachep, p); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | if (list_empty(&list)) | ||
| 147 | return; | ||
| 148 | |||
| 149 | spin_lock_bh(&gc_lock); | ||
| 150 | list_splice(&list, &gc_list); | ||
| 151 | spin_unlock_bh(&gc_lock); | ||
| 152 | |||
| 153 | schedule_delayed_work(&gc_work, gc_delay); | ||
| 154 | } | ||
| 105 | 155 | ||
| 106 | /* Called from ip_output.c:ip_init */ | 156 | /* Called from ip_output.c:ip_init */ |
| 107 | void __init inet_initpeers(void) | 157 | void __init inet_initpeers(void) |
| @@ -126,6 +176,7 @@ void __init inet_initpeers(void) | |||
| 126 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, | 176 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, |
| 127 | NULL); | 177 | NULL); |
| 128 | 178 | ||
| 179 | INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); | ||
| 129 | } | 180 | } |
| 130 | 181 | ||
| 131 | static int addr_compare(const struct inetpeer_addr *a, | 182 | static int addr_compare(const struct inetpeer_addr *a, |
| @@ -136,7 +187,7 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
| 136 | for (i = 0; i < n; i++) { | 187 | for (i = 0; i < n; i++) { |
| 137 | if (a->addr.a6[i] == b->addr.a6[i]) | 188 | if (a->addr.a6[i] == b->addr.a6[i]) |
| 138 | continue; | 189 | continue; |
| 139 | if (a->addr.a6[i] < b->addr.a6[i]) | 190 | if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i]) |
| 140 | return -1; | 191 | return -1; |
| 141 | return 1; | 192 | return 1; |
| 142 | } | 193 | } |
| @@ -448,7 +499,7 @@ relookup: | |||
| 448 | p->pmtu_expires = 0; | 499 | p->pmtu_expires = 0; |
| 449 | p->pmtu_orig = 0; | 500 | p->pmtu_orig = 0; |
| 450 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | 501 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); |
| 451 | 502 | INIT_LIST_HEAD(&p->gc_list); | |
| 452 | 503 | ||
| 453 | /* Link the node. */ | 504 | /* Link the node. */ |
| 454 | link_to_pool(p, base); | 505 | link_to_pool(p, base); |
| @@ -508,3 +559,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) | |||
| 508 | return rc; | 559 | return rc; |
| 509 | } | 560 | } |
| 510 | EXPORT_SYMBOL(inet_peer_xrlim_allow); | 561 | EXPORT_SYMBOL(inet_peer_xrlim_allow); |
| 562 | |||
| 563 | void inetpeer_invalidate_tree(int family) | ||
| 564 | { | ||
| 565 | struct inet_peer *old, *new, *prev; | ||
| 566 | struct inet_peer_base *base = family_to_base(family); | ||
| 567 | |||
| 568 | write_seqlock_bh(&base->lock); | ||
| 569 | |||
| 570 | old = base->root; | ||
| 571 | if (old == peer_avl_empty_rcu) | ||
| 572 | goto out; | ||
| 573 | |||
| 574 | new = peer_avl_empty_rcu; | ||
| 575 | |||
| 576 | prev = cmpxchg(&base->root, old, new); | ||
| 577 | if (prev == old) { | ||
| 578 | base->total = 0; | ||
| 579 | spin_lock(&gc_lock); | ||
| 580 | list_add_tail(&prev->gc_list, &gc_list); | ||
| 581 | spin_unlock(&gc_lock); | ||
| 582 | schedule_delayed_work(&gc_work, gc_delay); | ||
| 583 | } | ||
| 584 | |||
| 585 | out: | ||
| 586 | write_sequnlock_bh(&base->lock); | ||
| 587 | } | ||
| 588 | EXPORT_SYMBOL(inetpeer_invalidate_tree); | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fdaabf2f2b68..1f23a57aa9e6 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
| @@ -392,7 +392,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 392 | /* Is this the final fragment? */ | 392 | /* Is this the final fragment? */ |
| 393 | if ((flags & IP_MF) == 0) { | 393 | if ((flags & IP_MF) == 0) { |
| 394 | /* If we already have some bits beyond end | 394 | /* If we already have some bits beyond end |
| 395 | * or have different end, the segment is corrrupted. | 395 | * or have different end, the segment is corrupted. |
| 396 | */ | 396 | */ |
| 397 | if (end < qp->q.len || | 397 | if (end < qp->q.len || |
| 398 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) | 398 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d55110e93120..38673d2860e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
| @@ -46,7 +46,7 @@ | |||
| 46 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
| 47 | #include <net/gre.h> | 47 | #include <net/gre.h> |
| 48 | 48 | ||
| 49 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 49 | #if IS_ENABLED(CONFIG_IPV6) |
| 50 | #include <net/ipv6.h> | 50 | #include <net/ipv6.h> |
| 51 | #include <net/ip6_fib.h> | 51 | #include <net/ip6_fib.h> |
| 52 | #include <net/ip6_route.h> | 52 | #include <net/ip6_route.h> |
| @@ -65,7 +65,7 @@ | |||
| 65 | it is infeasible task. The most general solutions would be | 65 | it is infeasible task. The most general solutions would be |
| 66 | to keep skb->encapsulation counter (sort of local ttl), | 66 | to keep skb->encapsulation counter (sort of local ttl), |
| 67 | and silently drop packet when it expires. It is a good | 67 | and silently drop packet when it expires. It is a good |
| 68 | solution, but it supposes maintaing new variable in ALL | 68 | solution, but it supposes maintaining new variable in ALL |
| 69 | skb, even if no tunneling is used. | 69 | skb, even if no tunneling is used. |
| 70 | 70 | ||
| 71 | Current solution: xmit_recursion breaks dead loops. This is a percpu | 71 | Current solution: xmit_recursion breaks dead loops. This is a percpu |
| @@ -91,14 +91,14 @@ | |||
| 91 | 91 | ||
| 92 | One of them is to parse packet trying to detect inner encapsulation | 92 | One of them is to parse packet trying to detect inner encapsulation |
| 93 | made by our node. It is difficult or even impossible, especially, | 93 | made by our node. It is difficult or even impossible, especially, |
| 94 | taking into account fragmentation. TO be short, tt is not solution at all. | 94 | taking into account fragmentation. TO be short, ttl is not solution at all. |
| 95 | 95 | ||
| 96 | Current solution: The solution was UNEXPECTEDLY SIMPLE. | 96 | Current solution: The solution was UNEXPECTEDLY SIMPLE. |
| 97 | We force DF flag on tunnels with preconfigured hop limit, | 97 | We force DF flag on tunnels with preconfigured hop limit, |
| 98 | that is ALL. :-) Well, it does not remove the problem completely, | 98 | that is ALL. :-) Well, it does not remove the problem completely, |
| 99 | but exponential growth of network traffic is changed to linear | 99 | but exponential growth of network traffic is changed to linear |
| 100 | (branches, that exceed pmtu are pruned) and tunnel mtu | 100 | (branches, that exceed pmtu are pruned) and tunnel mtu |
| 101 | fastly degrades to value <68, where looping stops. | 101 | rapidly degrades to value <68, where looping stops. |
| 102 | Yes, it is not good if there exists a router in the loop, | 102 | Yes, it is not good if there exists a router in the loop, |
| 103 | which does not force DF, even when encapsulating packets have DF set. | 103 | which does not force DF, even when encapsulating packets have DF set. |
| 104 | But it is not our problem! Nobody could accuse us, we made | 104 | But it is not our problem! Nobody could accuse us, we made |
| @@ -171,7 +171,7 @@ struct pcpu_tstats { | |||
| 171 | unsigned long rx_bytes; | 171 | unsigned long rx_bytes; |
| 172 | unsigned long tx_packets; | 172 | unsigned long tx_packets; |
| 173 | unsigned long tx_bytes; | 173 | unsigned long tx_bytes; |
| 174 | }; | 174 | } __attribute__((aligned(4*sizeof(unsigned long)))); |
| 175 | 175 | ||
| 176 | static struct net_device_stats *ipgre_get_stats(struct net_device *dev) | 176 | static struct net_device_stats *ipgre_get_stats(struct net_device *dev) |
| 177 | { | 177 | { |
| @@ -422,6 +422,10 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, | |||
| 422 | if (register_netdevice(dev) < 0) | 422 | if (register_netdevice(dev) < 0) |
| 423 | goto failed_free; | 423 | goto failed_free; |
| 424 | 424 | ||
| 425 | /* Can use a lockless transmit, unless we generate output sequences */ | ||
| 426 | if (!(nt->parms.o_flags & GRE_SEQ)) | ||
| 427 | dev->features |= NETIF_F_LLTX; | ||
| 428 | |||
| 425 | dev_hold(dev); | 429 | dev_hold(dev); |
| 426 | ipgre_tunnel_link(ign, nt); | 430 | ipgre_tunnel_link(ign, nt); |
| 427 | return nt; | 431 | return nt; |
| @@ -453,8 +457,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
| 453 | GRE tunnels with enabled checksum. Tell them "thank you". | 457 | GRE tunnels with enabled checksum. Tell them "thank you". |
| 454 | 458 | ||
| 455 | Well, I wonder, rfc1812 was written by Cisco employee, | 459 | Well, I wonder, rfc1812 was written by Cisco employee, |
| 456 | what the hell these idiots break standrads established | 460 | what the hell these idiots break standards established |
| 457 | by themself??? | 461 | by themselves??? |
| 458 | */ | 462 | */ |
| 459 | 463 | ||
| 460 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 464 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
| @@ -729,9 +733,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 729 | if ((dst = rt->rt_gateway) == 0) | 733 | if ((dst = rt->rt_gateway) == 0) |
| 730 | goto tx_error_icmp; | 734 | goto tx_error_icmp; |
| 731 | } | 735 | } |
| 732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 736 | #if IS_ENABLED(CONFIG_IPV6) |
| 733 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 737 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
| 734 | struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); | 738 | struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); |
| 735 | const struct in6_addr *addr6; | 739 | const struct in6_addr *addr6; |
| 736 | int addr_type; | 740 | int addr_type; |
| 737 | 741 | ||
| @@ -799,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 799 | goto tx_error; | 803 | goto tx_error; |
| 800 | } | 804 | } |
| 801 | } | 805 | } |
| 802 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 806 | #if IS_ENABLED(CONFIG_IPV6) |
| 803 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 807 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
| 804 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); | 808 | struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); |
| 805 | 809 | ||
| @@ -835,6 +839,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 835 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| | 839 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| |
| 836 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 840 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
| 837 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | 841 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); |
| 842 | if (max_headroom > dev->needed_headroom) | ||
| 843 | dev->needed_headroom = max_headroom; | ||
| 838 | if (!new_skb) { | 844 | if (!new_skb) { |
| 839 | ip_rt_put(rt); | 845 | ip_rt_put(rt); |
| 840 | dev->stats.tx_dropped++; | 846 | dev->stats.tx_dropped++; |
| @@ -873,7 +879,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 873 | if ((iph->ttl = tiph->ttl) == 0) { | 879 | if ((iph->ttl = tiph->ttl) == 0) { |
| 874 | if (skb->protocol == htons(ETH_P_IP)) | 880 | if (skb->protocol == htons(ETH_P_IP)) |
| 875 | iph->ttl = old_iph->ttl; | 881 | iph->ttl = old_iph->ttl; |
| 876 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 882 | #if IS_ENABLED(CONFIG_IPV6) |
| 877 | else if (skb->protocol == htons(ETH_P_IPV6)) | 883 | else if (skb->protocol == htons(ETH_P_IPV6)) |
| 878 | iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; | 884 | iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; |
| 879 | #endif | 885 | #endif |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1e60f7679075..42dd1a90edea 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
| @@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb) | |||
| 573 | } | 573 | } |
| 574 | if (srrptr + 3 <= srrspace) { | 574 | if (srrptr + 3 <= srrspace) { |
| 575 | opt->is_changed = 1; | 575 | opt->is_changed = 1; |
| 576 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | ||
| 577 | ip_hdr(skb)->daddr = opt->nexthop; | 576 | ip_hdr(skb)->daddr = opt->nexthop; |
| 577 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | ||
| 578 | optptr[2] = srrptr+4; | 578 | optptr[2] = srrptr+4; |
| 579 | } else if (net_ratelimit()) | 579 | } else if (net_ratelimit()) |
| 580 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); | 580 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0bc95f3977d2..ff302bde8890 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | rcu_read_lock(); | 208 | rcu_read_lock(); |
| 209 | neigh = dst_get_neighbour(dst); | 209 | neigh = dst_get_neighbour_noref(dst); |
| 210 | if (neigh) { | 210 | if (neigh) { |
| 211 | int res = neigh_output(neigh, skb); | 211 | int res = neigh_output(neigh, skb); |
| 212 | 212 | ||
| @@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb) | |||
| 319 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 319 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | /* | ||
| 323 | * copy saddr and daddr, possibly using 64bit load/stores | ||
| 324 | * Equivalent to : | ||
| 325 | * iph->saddr = fl4->saddr; | ||
| 326 | * iph->daddr = fl4->daddr; | ||
| 327 | */ | ||
| 328 | static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) | ||
| 329 | { | ||
| 330 | BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != | ||
| 331 | offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); | ||
| 332 | memcpy(&iph->saddr, &fl4->saddr, | ||
| 333 | sizeof(fl4->saddr) + sizeof(fl4->daddr)); | ||
| 334 | } | ||
| 335 | |||
| 322 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) | 336 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) |
| 323 | { | 337 | { |
| 324 | struct sock *sk = skb->sk; | 338 | struct sock *sk = skb->sk; |
| @@ -381,8 +395,8 @@ packet_routed: | |||
| 381 | iph->frag_off = 0; | 395 | iph->frag_off = 0; |
| 382 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 396 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
| 383 | iph->protocol = sk->sk_protocol; | 397 | iph->protocol = sk->sk_protocol; |
| 384 | iph->saddr = fl4->saddr; | 398 | ip_copy_addrs(iph, fl4); |
| 385 | iph->daddr = fl4->daddr; | 399 | |
| 386 | /* Transport layer set skb->h.foo itself. */ | 400 | /* Transport layer set skb->h.foo itself. */ |
| 387 | 401 | ||
| 388 | if (inet_opt && inet_opt->opt.optlen) { | 402 | if (inet_opt && inet_opt->opt.optlen) { |
| @@ -1337,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
| 1337 | ip_select_ident(iph, &rt->dst, sk); | 1351 | ip_select_ident(iph, &rt->dst, sk); |
| 1338 | iph->ttl = ttl; | 1352 | iph->ttl = ttl; |
| 1339 | iph->protocol = sk->sk_protocol; | 1353 | iph->protocol = sk->sk_protocol; |
| 1340 | iph->saddr = fl4->saddr; | 1354 | ip_copy_addrs(iph, fl4); |
| 1341 | iph->daddr = fl4->daddr; | ||
| 1342 | 1355 | ||
| 1343 | if (opt) { | 1356 | if (opt) { |
| 1344 | iph->ihl += opt->optlen>>2; | 1357 | iph->ihl += opt->optlen>>2; |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 09ff51bf16a4..8aa87c19fa00 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | #include <net/route.h> | 37 | #include <net/route.h> |
| 38 | #include <net/xfrm.h> | 38 | #include <net/xfrm.h> |
| 39 | #include <net/compat.h> | 39 | #include <net/compat.h> |
| 40 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 40 | #if IS_ENABLED(CONFIG_IPV6) |
| 41 | #include <net/transp_v6.h> | 41 | #include <net/transp_v6.h> |
| 42 | #endif | 42 | #endif |
| 43 | 43 | ||
| @@ -55,20 +55,13 @@ | |||
| 55 | /* | 55 | /* |
| 56 | * SOL_IP control messages. | 56 | * SOL_IP control messages. |
| 57 | */ | 57 | */ |
| 58 | #define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb)) | ||
| 58 | 59 | ||
| 59 | static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | 60 | static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) |
| 60 | { | 61 | { |
| 61 | struct in_pktinfo info; | 62 | struct in_pktinfo info = *PKTINFO_SKB_CB(skb); |
| 62 | struct rtable *rt = skb_rtable(skb); | ||
| 63 | 63 | ||
| 64 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; | 64 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; |
| 65 | if (rt) { | ||
| 66 | info.ipi_ifindex = rt->rt_iif; | ||
| 67 | info.ipi_spec_dst.s_addr = rt->rt_spec_dst; | ||
| 68 | } else { | ||
| 69 | info.ipi_ifindex = 0; | ||
| 70 | info.ipi_spec_dst.s_addr = 0; | ||
| 71 | } | ||
| 72 | 65 | ||
| 73 | put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); | 66 | put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); |
| 74 | } | 67 | } |
| @@ -515,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
| 515 | sock_owned_by_user(sk)); | 508 | sock_owned_by_user(sk)); |
| 516 | if (inet->is_icsk) { | 509 | if (inet->is_icsk) { |
| 517 | struct inet_connection_sock *icsk = inet_csk(sk); | 510 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 518 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 511 | #if IS_ENABLED(CONFIG_IPV6) |
| 519 | if (sk->sk_family == PF_INET || | 512 | if (sk->sk_family == PF_INET || |
| 520 | (!((1 << sk->sk_state) & | 513 | (!((1 << sk->sk_state) & |
| 521 | (TCPF_LISTEN | TCPF_CLOSE)) && | 514 | (TCPF_LISTEN | TCPF_CLOSE)) && |
| @@ -526,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
| 526 | if (opt) | 519 | if (opt) |
| 527 | icsk->icsk_ext_hdr_len += opt->opt.optlen; | 520 | icsk->icsk_ext_hdr_len += opt->opt.optlen; |
| 528 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | 521 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
| 529 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 522 | #if IS_ENABLED(CONFIG_IPV6) |
| 530 | } | 523 | } |
| 531 | #endif | 524 | #endif |
| 532 | } | 525 | } |
| @@ -992,20 +985,28 @@ e_inval: | |||
| 992 | } | 985 | } |
| 993 | 986 | ||
| 994 | /** | 987 | /** |
| 995 | * ip_queue_rcv_skb - Queue an skb into sock receive queue | 988 | * ipv4_pktinfo_prepare - transfert some info from rtable to skb |
| 996 | * @sk: socket | 989 | * @sk: socket |
| 997 | * @skb: buffer | 990 | * @skb: buffer |
| 998 | * | 991 | * |
| 999 | * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option | 992 | * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst |
| 1000 | * is not set, we drop skb dst entry now, while dst cache line is hot. | 993 | * in skb->cb[] before dst drop. |
| 994 | * This way, receiver doesnt make cache line misses to read rtable. | ||
| 1001 | */ | 995 | */ |
| 1002 | int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 996 | void ipv4_pktinfo_prepare(struct sk_buff *skb) |
| 1003 | { | 997 | { |
| 1004 | if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) | 998 | struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); |
| 1005 | skb_dst_drop(skb); | 999 | const struct rtable *rt = skb_rtable(skb); |
| 1006 | return sock_queue_rcv_skb(sk, skb); | 1000 | |
| 1001 | if (rt) { | ||
| 1002 | pktinfo->ipi_ifindex = rt->rt_iif; | ||
| 1003 | pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; | ||
| 1004 | } else { | ||
| 1005 | pktinfo->ipi_ifindex = 0; | ||
| 1006 | pktinfo->ipi_spec_dst.s_addr = 0; | ||
| 1007 | } | ||
| 1008 | skb_dst_drop(skb); | ||
| 1007 | } | 1009 | } |
| 1008 | EXPORT_SYMBOL(ip_queue_rcv_skb); | ||
| 1009 | 1010 | ||
| 1010 | int ip_setsockopt(struct sock *sk, int level, | 1011 | int ip_setsockopt(struct sock *sk, int level, |
| 1011 | int optname, char __user *optval, unsigned int optlen) | 1012 | int optname, char __user *optval, unsigned int optlen) |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 99ec116bef14..6e412a60a91f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
| @@ -141,7 +141,7 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ | |||
| 141 | __be32 root_server_addr = NONE; /* Address of NFS server */ | 141 | __be32 root_server_addr = NONE; /* Address of NFS server */ |
| 142 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ | 142 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ |
| 143 | 143 | ||
| 144 | u32 ic_dev_xid; /* Device under configuration */ | 144 | __be32 ic_dev_xid; /* Device under configuration */ |
| 145 | 145 | ||
| 146 | /* vendor class identifier */ | 146 | /* vendor class identifier */ |
| 147 | static char vendor_class_identifier[253] __initdata; | 147 | static char vendor_class_identifier[253] __initdata; |
| @@ -767,13 +767,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d | |||
| 767 | struct sk_buff *skb; | 767 | struct sk_buff *skb; |
| 768 | struct bootp_pkt *b; | 768 | struct bootp_pkt *b; |
| 769 | struct iphdr *h; | 769 | struct iphdr *h; |
| 770 | int hlen = LL_RESERVED_SPACE(dev); | ||
| 771 | int tlen = dev->needed_tailroom; | ||
| 770 | 772 | ||
| 771 | /* Allocate packet */ | 773 | /* Allocate packet */ |
| 772 | skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, | 774 | skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15, |
| 773 | GFP_KERNEL); | 775 | GFP_KERNEL); |
| 774 | if (!skb) | 776 | if (!skb) |
| 775 | return; | 777 | return; |
| 776 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 778 | skb_reserve(skb, hlen); |
| 777 | b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); | 779 | b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); |
| 778 | memset(b, 0, sizeof(struct bootp_pkt)); | 780 | memset(b, 0, sizeof(struct bootp_pkt)); |
| 779 | 781 | ||
| @@ -826,8 +828,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d | |||
| 826 | skb->dev = dev; | 828 | skb->dev = dev; |
| 827 | skb->protocol = htons(ETH_P_IP); | 829 | skb->protocol = htons(ETH_P_IP); |
| 828 | if (dev_hard_header(skb, dev, ntohs(skb->protocol), | 830 | if (dev_hard_header(skb, dev, ntohs(skb->protocol), |
| 829 | dev->broadcast, dev->dev_addr, skb->len) < 0 || | 831 | dev->broadcast, dev->dev_addr, skb->len) < 0) { |
| 830 | dev_queue_xmit(skb) < 0) | 832 | kfree_skb(skb); |
| 833 | printk("E"); | ||
| 834 | return; | ||
| 835 | } | ||
| 836 | |||
| 837 | if (dev_queue_xmit(skb) < 0) | ||
| 831 | printk("E"); | 838 | printk("E"); |
| 832 | } | 839 | } |
| 833 | 840 | ||
| @@ -852,9 +859,9 @@ static int __init ic_bootp_string(char *dest, char *src, int len, int max) | |||
| 852 | */ | 859 | */ |
| 853 | static void __init ic_do_bootp_ext(u8 *ext) | 860 | static void __init ic_do_bootp_ext(u8 *ext) |
| 854 | { | 861 | { |
| 855 | u8 servers; | 862 | u8 servers; |
| 856 | int i; | 863 | int i; |
| 857 | u16 mtu; | 864 | __be16 mtu; |
| 858 | 865 | ||
| 859 | #ifdef IPCONFIG_DEBUG | 866 | #ifdef IPCONFIG_DEBUG |
| 860 | u8 *c; | 867 | u8 *c; |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 0b2e7329abda..22a199315309 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
| @@ -148,7 +148,7 @@ struct pcpu_tstats { | |||
| 148 | unsigned long rx_bytes; | 148 | unsigned long rx_bytes; |
| 149 | unsigned long tx_packets; | 149 | unsigned long tx_packets; |
| 150 | unsigned long tx_bytes; | 150 | unsigned long tx_bytes; |
| 151 | }; | 151 | } __attribute__((aligned(4*sizeof(unsigned long)))); |
| 152 | 152 | ||
| 153 | static struct net_device_stats *ipip_get_stats(struct net_device *dev) | 153 | static struct net_device_stats *ipip_get_stats(struct net_device *dev) |
| 154 | { | 154 | { |
| @@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) | |||
| 231 | (iter = rtnl_dereference(*tp)) != NULL; | 231 | (iter = rtnl_dereference(*tp)) != NULL; |
| 232 | tp = &iter->next) { | 232 | tp = &iter->next) { |
| 233 | if (t == iter) { | 233 | if (t == iter) { |
| 234 | RCU_INIT_POINTER(*tp, t->next); | 234 | rcu_assign_pointer(*tp, t->next); |
| 235 | break; | 235 | break; |
| 236 | } | 236 | } |
| 237 | } | 237 | } |
| @@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | |||
| 241 | { | 241 | { |
| 242 | struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); | 242 | struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); |
| 243 | 243 | ||
| 244 | RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); | 244 | rcu_assign_pointer(t->next, rtnl_dereference(*tp)); |
| 245 | RCU_INIT_POINTER(*tp, t); | 245 | rcu_assign_pointer(*tp, t); |
| 246 | } | 246 | } |
| 247 | 247 | ||
| 248 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | 248 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, |
| @@ -792,7 +792,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) | |||
| 792 | return -ENOMEM; | 792 | return -ENOMEM; |
| 793 | 793 | ||
| 794 | dev_hold(dev); | 794 | dev_hold(dev); |
| 795 | RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel); | 795 | rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); |
| 796 | return 0; | 796 | return 0; |
| 797 | } | 797 | } |
| 798 | 798 | ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 76a7f07b38b6..7bc2db6db8d4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -1225,7 +1225,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi | |||
| 1225 | 1225 | ||
| 1226 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 1226 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
| 1227 | if (ret == 0) { | 1227 | if (ret == 0) { |
| 1228 | RCU_INIT_POINTER(mrt->mroute_sk, sk); | 1228 | rcu_assign_pointer(mrt->mroute_sk, sk); |
| 1229 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; | 1229 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; |
| 1230 | } | 1230 | } |
| 1231 | rtnl_unlock(); | 1231 | rtnl_unlock(); |
| @@ -1520,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
| 1520 | struct mr_table *mrt; | 1520 | struct mr_table *mrt; |
| 1521 | struct vif_device *v; | 1521 | struct vif_device *v; |
| 1522 | int ct; | 1522 | int ct; |
| 1523 | LIST_HEAD(list); | ||
| 1524 | 1523 | ||
| 1525 | if (event != NETDEV_UNREGISTER) | 1524 | if (event != NETDEV_UNREGISTER) |
| 1526 | return NOTIFY_DONE; | 1525 | return NOTIFY_DONE; |
| @@ -1529,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
| 1529 | v = &mrt->vif_table[0]; | 1528 | v = &mrt->vif_table[0]; |
| 1530 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { | 1529 | for (ct = 0; ct < mrt->maxvif; ct++, v++) { |
| 1531 | if (v->dev == dev) | 1530 | if (v->dev == dev) |
| 1532 | vif_delete(mrt, ct, 1, &list); | 1531 | vif_delete(mrt, ct, 1, NULL); |
| 1533 | } | 1532 | } |
| 1534 | } | 1533 | } |
| 1535 | unregister_netdevice_many(&list); | ||
| 1536 | return NOTIFY_DONE; | 1534 | return NOTIFY_DONE; |
| 1537 | } | 1535 | } |
| 1538 | 1536 | ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f19f2182894c..74dfc9e5211f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
| @@ -27,7 +27,7 @@ config NF_CONNTRACK_IPV4 | |||
| 27 | 27 | ||
| 28 | config NF_CONNTRACK_PROC_COMPAT | 28 | config NF_CONNTRACK_PROC_COMPAT |
| 29 | bool "proc/sysctl compatibility with old connection tracking" | 29 | bool "proc/sysctl compatibility with old connection tracking" |
| 30 | depends on NF_CONNTRACK_IPV4 | 30 | depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 |
| 31 | default y | 31 | default y |
| 32 | help | 32 | help |
| 33 | This option enables /proc and sysctl compatibility with the old | 33 | This option enables /proc and sysctl compatibility with the old |
| @@ -76,11 +76,21 @@ config IP_NF_MATCH_AH | |||
| 76 | config IP_NF_MATCH_ECN | 76 | config IP_NF_MATCH_ECN |
| 77 | tristate '"ecn" match support' | 77 | tristate '"ecn" match support' |
| 78 | depends on NETFILTER_ADVANCED | 78 | depends on NETFILTER_ADVANCED |
| 79 | help | 79 | select NETFILTER_XT_MATCH_ECN |
| 80 | This option adds a `ECN' match, which allows you to match against | 80 | ---help--- |
| 81 | the IPv4 and TCP header ECN fields. | 81 | This is a backwards-compat option for the user's convenience |
| 82 | (e.g. when running oldconfig). It selects | ||
| 83 | CONFIG_NETFILTER_XT_MATCH_ECN. | ||
| 84 | |||
| 85 | config IP_NF_MATCH_RPFILTER | ||
| 86 | tristate '"rpfilter" reverse path filter match support' | ||
| 87 | depends on NETFILTER_ADVANCED | ||
| 88 | ---help--- | ||
| 89 | This option allows you to match packets whose replies would | ||
| 90 | go out via the interface the packet came in. | ||
| 82 | 91 | ||
| 83 | To compile it as a module, choose M here. If unsure, say N. | 92 | To compile it as a module, choose M here. If unsure, say N. |
| 93 | The module will be called ipt_rpfilter. | ||
| 84 | 94 | ||
| 85 | config IP_NF_MATCH_TTL | 95 | config IP_NF_MATCH_TTL |
| 86 | tristate '"ttl" match support' | 96 | tristate '"ttl" match support' |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dca2082ec683..213a462b739b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
| @@ -49,7 +49,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o | |||
| 49 | 49 | ||
| 50 | # matches | 50 | # matches |
| 51 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o | 51 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o |
| 52 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o | 52 | obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o |
| 53 | 53 | ||
| 54 | # targets | 54 | # targets |
| 55 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o | 55 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index e59aabd0eae4..94d45e1f8882 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
| @@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 404 | int status, type, pid, flags; | 404 | int status, type, pid, flags; |
| 405 | unsigned int nlmsglen, skblen; | 405 | unsigned int nlmsglen, skblen; |
| 406 | struct nlmsghdr *nlh; | 406 | struct nlmsghdr *nlh; |
| 407 | bool enable_timestamp = false; | ||
| 407 | 408 | ||
| 408 | skblen = skb->len; | 409 | skblen = skb->len; |
| 409 | if (skblen < sizeof(*nlh)) | 410 | if (skblen < sizeof(*nlh)) |
| @@ -430,7 +431,7 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 430 | if (type <= IPQM_BASE) | 431 | if (type <= IPQM_BASE) |
| 431 | return; | 432 | return; |
| 432 | 433 | ||
| 433 | if (security_netlink_recv(skb, CAP_NET_ADMIN)) | 434 | if (!capable(CAP_NET_ADMIN)) |
| 434 | RCV_SKB_FAIL(-EPERM); | 435 | RCV_SKB_FAIL(-EPERM); |
| 435 | 436 | ||
| 436 | spin_lock_bh(&queue_lock); | 437 | spin_lock_bh(&queue_lock); |
| @@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
| 441 | RCV_SKB_FAIL(-EBUSY); | 442 | RCV_SKB_FAIL(-EBUSY); |
| 442 | } | 443 | } |
| 443 | } else { | 444 | } else { |
| 444 | net_enable_timestamp(); | 445 | enable_timestamp = true; |
| 445 | peer_pid = pid; | 446 | peer_pid = pid; |
| 446 | } | 447 | } |
| 447 | 448 | ||
| 448 | spin_unlock_bh(&queue_lock); | 449 | spin_unlock_bh(&queue_lock); |
| 449 | 450 | if (enable_timestamp) | |
| 451 | net_enable_timestamp(); | ||
| 450 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | 452 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, |
| 451 | nlmsglen - NLMSG_LENGTH(0)); | 453 | nlmsglen - NLMSG_LENGTH(0)); |
| 452 | if (status < 0) | 454 | if (status < 0) |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9931152a78b5..2f210c79dc87 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
| @@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); | |||
| 30 | /* FIXME: Multiple targets. --RR */ | 30 | /* FIXME: Multiple targets. --RR */ |
| 31 | static int masquerade_tg_check(const struct xt_tgchk_param *par) | 31 | static int masquerade_tg_check(const struct xt_tgchk_param *par) |
| 32 | { | 32 | { |
| 33 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 33 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 34 | 34 | ||
| 35 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 35 | if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { |
| 36 | pr_debug("bad MAP_IPS.\n"); | 36 | pr_debug("bad MAP_IPS.\n"); |
| 37 | return -EINVAL; | 37 | return -EINVAL; |
| 38 | } | 38 | } |
| @@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 49 | struct nf_conn *ct; | 49 | struct nf_conn *ct; |
| 50 | struct nf_conn_nat *nat; | 50 | struct nf_conn_nat *nat; |
| 51 | enum ip_conntrack_info ctinfo; | 51 | enum ip_conntrack_info ctinfo; |
| 52 | struct nf_nat_range newrange; | 52 | struct nf_nat_ipv4_range newrange; |
| 53 | const struct nf_nat_multi_range_compat *mr; | 53 | const struct nf_nat_ipv4_multi_range_compat *mr; |
| 54 | const struct rtable *rt; | 54 | const struct rtable *rt; |
| 55 | __be32 newsrc; | 55 | __be32 newsrc; |
| 56 | 56 | ||
| @@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 79 | nat->masq_index = par->out->ifindex; | 79 | nat->masq_index = par->out->ifindex; |
| 80 | 80 | ||
| 81 | /* Transfer from original range. */ | 81 | /* Transfer from original range. */ |
| 82 | newrange = ((struct nf_nat_range) | 82 | newrange = ((struct nf_nat_ipv4_range) |
| 83 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 83 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, |
| 84 | newsrc, newsrc, | 84 | newsrc, newsrc, |
| 85 | mr->range[0].min, mr->range[0].max }); | 85 | mr->range[0].min, mr->range[0].max }); |
| 86 | 86 | ||
| 87 | /* Hand modified range to generic setup. */ | 87 | /* Hand modified range to generic setup. */ |
| 88 | return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); | 88 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static int | 91 | static int |
| @@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = { | |||
| 139 | .name = "MASQUERADE", | 139 | .name = "MASQUERADE", |
| 140 | .family = NFPROTO_IPV4, | 140 | .family = NFPROTO_IPV4, |
| 141 | .target = masquerade_tg, | 141 | .target = masquerade_tg, |
| 142 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 142 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), |
| 143 | .table = "nat", | 143 | .table = "nat", |
| 144 | .hooks = 1 << NF_INET_POST_ROUTING, | 144 | .hooks = 1 << NF_INET_POST_ROUTING, |
| 145 | .checkentry = masquerade_tg_check, | 145 | .checkentry = masquerade_tg_check, |
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6cdb298f1035..b5bfbbabf70d 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
| @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); | |||
| 24 | 24 | ||
| 25 | static int netmap_tg_check(const struct xt_tgchk_param *par) | 25 | static int netmap_tg_check(const struct xt_tgchk_param *par) |
| 26 | { | 26 | { |
| 27 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 27 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 28 | 28 | ||
| 29 | if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { | 29 | if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { |
| 30 | pr_debug("bad MAP_IPS.\n"); | 30 | pr_debug("bad MAP_IPS.\n"); |
| 31 | return -EINVAL; | 31 | return -EINVAL; |
| 32 | } | 32 | } |
| @@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 43 | struct nf_conn *ct; | 43 | struct nf_conn *ct; |
| 44 | enum ip_conntrack_info ctinfo; | 44 | enum ip_conntrack_info ctinfo; |
| 45 | __be32 new_ip, netmask; | 45 | __be32 new_ip, netmask; |
| 46 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 46 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 47 | struct nf_nat_range newrange; | 47 | struct nf_nat_ipv4_range newrange; |
| 48 | 48 | ||
| 49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || |
| 50 | par->hooknum == NF_INET_POST_ROUTING || | 50 | par->hooknum == NF_INET_POST_ROUTING || |
| @@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 61 | new_ip = ip_hdr(skb)->saddr & ~netmask; | 61 | new_ip = ip_hdr(skb)->saddr & ~netmask; |
| 62 | new_ip |= mr->range[0].min_ip & netmask; | 62 | new_ip |= mr->range[0].min_ip & netmask; |
| 63 | 63 | ||
| 64 | newrange = ((struct nf_nat_range) | 64 | newrange = ((struct nf_nat_ipv4_range) |
| 65 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 65 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, |
| 66 | new_ip, new_ip, | 66 | new_ip, new_ip, |
| 67 | mr->range[0].min, mr->range[0].max }); | 67 | mr->range[0].min, mr->range[0].max }); |
| 68 | 68 | ||
| @@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = { | |||
| 74 | .name = "NETMAP", | 74 | .name = "NETMAP", |
| 75 | .family = NFPROTO_IPV4, | 75 | .family = NFPROTO_IPV4, |
| 76 | .target = netmap_tg, | 76 | .target = netmap_tg, |
| 77 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 77 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), |
| 78 | .table = "nat", | 78 | .table = "nat", |
| 79 | .hooks = (1 << NF_INET_PRE_ROUTING) | | 79 | .hooks = (1 << NF_INET_PRE_ROUTING) | |
| 80 | (1 << NF_INET_POST_ROUTING) | | 80 | (1 << NF_INET_POST_ROUTING) | |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 18a0656505a0..7c0103a5203e 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c | |||
| @@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); | |||
| 28 | /* FIXME: Take multiple ranges --RR */ | 28 | /* FIXME: Take multiple ranges --RR */ |
| 29 | static int redirect_tg_check(const struct xt_tgchk_param *par) | 29 | static int redirect_tg_check(const struct xt_tgchk_param *par) |
| 30 | { | 30 | { |
| 31 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 31 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 32 | 32 | ||
| 33 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 33 | if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { |
| 34 | pr_debug("bad MAP_IPS.\n"); | 34 | pr_debug("bad MAP_IPS.\n"); |
| 35 | return -EINVAL; | 35 | return -EINVAL; |
| 36 | } | 36 | } |
| @@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 47 | struct nf_conn *ct; | 47 | struct nf_conn *ct; |
| 48 | enum ip_conntrack_info ctinfo; | 48 | enum ip_conntrack_info ctinfo; |
| 49 | __be32 newdst; | 49 | __be32 newdst; |
| 50 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 50 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 51 | struct nf_nat_range newrange; | 51 | struct nf_nat_ipv4_range newrange; |
| 52 | 52 | ||
| 53 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | 53 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || |
| 54 | par->hooknum == NF_INET_LOCAL_OUT); | 54 | par->hooknum == NF_INET_LOCAL_OUT); |
| @@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | /* Transfer from original range. */ | 78 | /* Transfer from original range. */ |
| 79 | newrange = ((struct nf_nat_range) | 79 | newrange = ((struct nf_nat_ipv4_range) |
| 80 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 80 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, |
| 81 | newdst, newdst, | 81 | newdst, newdst, |
| 82 | mr->range[0].min, mr->range[0].max }); | 82 | mr->range[0].min, mr->range[0].max }); |
| 83 | 83 | ||
| 84 | /* Hand modified range to generic setup. */ | 84 | /* Hand modified range to generic setup. */ |
| 85 | return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST); | 85 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | static struct xt_target redirect_tg_reg __read_mostly = { | 88 | static struct xt_target redirect_tg_reg __read_mostly = { |
| 89 | .name = "REDIRECT", | 89 | .name = "REDIRECT", |
| 90 | .family = NFPROTO_IPV4, | 90 | .family = NFPROTO_IPV4, |
| 91 | .target = redirect_tg, | 91 | .target = redirect_tg, |
| 92 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 92 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), |
| 93 | .table = "nat", | 93 | .table = "nat", |
| 94 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), | 94 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), |
| 95 | .checkentry = redirect_tg_check, | 95 | .checkentry = redirect_tg_check, |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b5508151e547..ba5756d20165 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
| @@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10; | |||
| 65 | module_param(flushtimeout, uint, 0600); | 65 | module_param(flushtimeout, uint, 0600); |
| 66 | MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); | 66 | MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); |
| 67 | 67 | ||
| 68 | static int nflog = 1; | 68 | static bool nflog = true; |
| 69 | module_param(nflog, bool, 0400); | 69 | module_param(nflog, bool, 0400); |
| 70 | MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); | 70 | MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); |
| 71 | 71 | ||
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c deleted file mode 100644 index 2b57e52c746c..000000000000 --- a/net/ipv4/netfilter/ipt_ecn.c +++ /dev/null | |||
| @@ -1,127 +0,0 @@ | |||
| 1 | /* IP tables module for matching the value of the IPv4 and TCP ECN bits | ||
| 2 | * | ||
| 3 | * (C) 2002 by Harald Welte <laforge@gnumonks.org> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License version 2 as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 10 | #include <linux/in.h> | ||
| 11 | #include <linux/ip.h> | ||
| 12 | #include <net/ip.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/skbuff.h> | ||
| 15 | #include <linux/tcp.h> | ||
| 16 | |||
| 17 | #include <linux/netfilter/x_tables.h> | ||
| 18 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
| 19 | #include <linux/netfilter_ipv4/ipt_ecn.h> | ||
| 20 | |||
| 21 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
| 22 | MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4"); | ||
| 23 | MODULE_LICENSE("GPL"); | ||
| 24 | |||
| 25 | static inline bool match_ip(const struct sk_buff *skb, | ||
| 26 | const struct ipt_ecn_info *einfo) | ||
| 27 | { | ||
| 28 | return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ | ||
| 29 | !!(einfo->invert & IPT_ECN_OP_MATCH_IP); | ||
| 30 | } | ||
| 31 | |||
| 32 | static inline bool match_tcp(const struct sk_buff *skb, | ||
| 33 | const struct ipt_ecn_info *einfo, | ||
| 34 | bool *hotdrop) | ||
| 35 | { | ||
| 36 | struct tcphdr _tcph; | ||
| 37 | const struct tcphdr *th; | ||
| 38 | |||
| 39 | /* In practice, TCP match does this, so can't fail. But let's | ||
| 40 | * be good citizens. | ||
| 41 | */ | ||
| 42 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | ||
| 43 | if (th == NULL) { | ||
| 44 | *hotdrop = false; | ||
| 45 | return false; | ||
| 46 | } | ||
| 47 | |||
| 48 | if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { | ||
| 49 | if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { | ||
| 50 | if (th->ece == 1) | ||
| 51 | return false; | ||
| 52 | } else { | ||
| 53 | if (th->ece == 0) | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { | ||
| 59 | if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { | ||
| 60 | if (th->cwr == 1) | ||
| 61 | return false; | ||
| 62 | } else { | ||
| 63 | if (th->cwr == 0) | ||
| 64 | return false; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | return true; | ||
| 69 | } | ||
| 70 | |||
| 71 | static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 72 | { | ||
| 73 | const struct ipt_ecn_info *info = par->matchinfo; | ||
| 74 | |||
| 75 | if (info->operation & IPT_ECN_OP_MATCH_IP) | ||
| 76 | if (!match_ip(skb, info)) | ||
| 77 | return false; | ||
| 78 | |||
| 79 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { | ||
| 80 | if (!match_tcp(skb, info, &par->hotdrop)) | ||
| 81 | return false; | ||
| 82 | } | ||
| 83 | |||
| 84 | return true; | ||
| 85 | } | ||
| 86 | |||
| 87 | static int ecn_mt_check(const struct xt_mtchk_param *par) | ||
| 88 | { | ||
| 89 | const struct ipt_ecn_info *info = par->matchinfo; | ||
| 90 | const struct ipt_ip *ip = par->entryinfo; | ||
| 91 | |||
| 92 | if (info->operation & IPT_ECN_OP_MATCH_MASK) | ||
| 93 | return -EINVAL; | ||
| 94 | |||
| 95 | if (info->invert & IPT_ECN_OP_MATCH_MASK) | ||
| 96 | return -EINVAL; | ||
| 97 | |||
| 98 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && | ||
| 99 | (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { | ||
| 100 | pr_info("cannot match TCP bits in rule for non-tcp packets\n"); | ||
| 101 | return -EINVAL; | ||
| 102 | } | ||
| 103 | |||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | static struct xt_match ecn_mt_reg __read_mostly = { | ||
| 108 | .name = "ecn", | ||
| 109 | .family = NFPROTO_IPV4, | ||
| 110 | .match = ecn_mt, | ||
| 111 | .matchsize = sizeof(struct ipt_ecn_info), | ||
| 112 | .checkentry = ecn_mt_check, | ||
| 113 | .me = THIS_MODULE, | ||
| 114 | }; | ||
| 115 | |||
| 116 | static int __init ecn_mt_init(void) | ||
| 117 | { | ||
| 118 | return xt_register_match(&ecn_mt_reg); | ||
| 119 | } | ||
| 120 | |||
| 121 | static void __exit ecn_mt_exit(void) | ||
| 122 | { | ||
| 123 | xt_unregister_match(&ecn_mt_reg); | ||
| 124 | } | ||
| 125 | |||
| 126 | module_init(ecn_mt_init); | ||
| 127 | module_exit(ecn_mt_exit); | ||
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 000000000000..31371be8174b --- /dev/null +++ b/net/ipv4/netfilter/ipt_rpfilter.c | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2011 Florian Westphal <fw@strlen.de> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
| 9 | */ | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/skbuff.h> | ||
| 13 | #include <linux/netdevice.h> | ||
| 14 | #include <linux/ip.h> | ||
| 15 | #include <net/ip.h> | ||
| 16 | #include <net/ip_fib.h> | ||
| 17 | #include <net/route.h> | ||
| 18 | |||
| 19 | #include <linux/netfilter/xt_rpfilter.h> | ||
| 20 | #include <linux/netfilter/x_tables.h> | ||
| 21 | |||
| 22 | MODULE_LICENSE("GPL"); | ||
| 23 | MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); | ||
| 24 | MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); | ||
| 25 | |||
| 26 | /* don't try to find route from mcast/bcast/zeronet */ | ||
| 27 | static __be32 rpfilter_get_saddr(__be32 addr) | ||
| 28 | { | ||
| 29 | if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || | ||
| 30 | ipv4_is_zeronet(addr)) | ||
| 31 | return 0; | ||
| 32 | return addr; | ||
| 33 | } | ||
| 34 | |||
| 35 | static bool rpfilter_lookup_reverse(struct flowi4 *fl4, | ||
| 36 | const struct net_device *dev, u8 flags) | ||
| 37 | { | ||
| 38 | struct fib_result res; | ||
| 39 | bool dev_match; | ||
| 40 | struct net *net = dev_net(dev); | ||
| 41 | int ret __maybe_unused; | ||
| 42 | |||
| 43 | if (fib_lookup(net, fl4, &res)) | ||
| 44 | return false; | ||
| 45 | |||
| 46 | if (res.type != RTN_UNICAST) { | ||
| 47 | if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | dev_match = false; | ||
| 51 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
| 52 | for (ret = 0; ret < res.fi->fib_nhs; ret++) { | ||
| 53 | struct fib_nh *nh = &res.fi->fib_nh[ret]; | ||
| 54 | |||
| 55 | if (nh->nh_dev == dev) { | ||
| 56 | dev_match = true; | ||
| 57 | break; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | #else | ||
| 61 | if (FIB_RES_DEV(res) == dev) | ||
| 62 | dev_match = true; | ||
| 63 | #endif | ||
| 64 | if (dev_match || flags & XT_RPFILTER_LOOSE) | ||
| 65 | return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; | ||
| 66 | return dev_match; | ||
| 67 | } | ||
| 68 | |||
| 69 | static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 70 | { | ||
| 71 | const struct xt_rpfilter_info *info; | ||
| 72 | const struct iphdr *iph; | ||
| 73 | struct flowi4 flow; | ||
| 74 | bool invert; | ||
| 75 | |||
| 76 | info = par->matchinfo; | ||
| 77 | invert = info->flags & XT_RPFILTER_INVERT; | ||
| 78 | |||
| 79 | if (par->in->flags & IFF_LOOPBACK) | ||
| 80 | return true ^ invert; | ||
| 81 | |||
| 82 | iph = ip_hdr(skb); | ||
| 83 | if (ipv4_is_multicast(iph->daddr)) { | ||
| 84 | if (ipv4_is_zeronet(iph->saddr)) | ||
| 85 | return ipv4_is_local_multicast(iph->daddr) ^ invert; | ||
| 86 | flow.flowi4_iif = 0; | ||
| 87 | } else { | ||
| 88 | flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; | ||
| 89 | } | ||
| 90 | |||
| 91 | flow.daddr = iph->saddr; | ||
| 92 | flow.saddr = rpfilter_get_saddr(iph->daddr); | ||
| 93 | flow.flowi4_oif = 0; | ||
| 94 | flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; | ||
| 95 | flow.flowi4_tos = RT_TOS(iph->tos); | ||
| 96 | flow.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
| 97 | |||
| 98 | return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; | ||
| 99 | } | ||
| 100 | |||
| 101 | static int rpfilter_check(const struct xt_mtchk_param *par) | ||
| 102 | { | ||
| 103 | const struct xt_rpfilter_info *info = par->matchinfo; | ||
| 104 | unsigned int options = ~XT_RPFILTER_OPTION_MASK; | ||
| 105 | if (info->flags & options) { | ||
| 106 | pr_info("unknown options encountered"); | ||
| 107 | return -EINVAL; | ||
| 108 | } | ||
| 109 | |||
| 110 | if (strcmp(par->table, "mangle") != 0 && | ||
| 111 | strcmp(par->table, "raw") != 0) { | ||
| 112 | pr_info("match only valid in the \'raw\' " | ||
| 113 | "or \'mangle\' tables, not \'%s\'.\n", par->table); | ||
| 114 | return -EINVAL; | ||
| 115 | } | ||
| 116 | |||
| 117 | return 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | static struct xt_match rpfilter_mt_reg __read_mostly = { | ||
| 121 | .name = "rpfilter", | ||
| 122 | .family = NFPROTO_IPV4, | ||
| 123 | .checkentry = rpfilter_check, | ||
| 124 | .match = rpfilter_mt, | ||
| 125 | .matchsize = sizeof(struct xt_rpfilter_info), | ||
| 126 | .hooks = (1 << NF_INET_PRE_ROUTING), | ||
| 127 | .me = THIS_MODULE | ||
| 128 | }; | ||
| 129 | |||
| 130 | static int __init rpfilter_mt_init(void) | ||
| 131 | { | ||
| 132 | return xt_register_match(&rpfilter_mt_reg); | ||
| 133 | } | ||
| 134 | |||
| 135 | static void __exit rpfilter_mt_exit(void) | ||
| 136 | { | ||
| 137 | xt_unregister_match(&rpfilter_mt_reg); | ||
| 138 | } | ||
| 139 | |||
| 140 | module_init(rpfilter_mt_init); | ||
| 141 | module_exit(rpfilter_mt_exit); | ||
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c37641e819f2..0e58f09e59fb 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
| @@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, | |||
| 52 | static struct nf_hook_ops *filter_ops __read_mostly; | 52 | static struct nf_hook_ops *filter_ops __read_mostly; |
| 53 | 53 | ||
| 54 | /* Default to forward because I got too much mail already. */ | 54 | /* Default to forward because I got too much mail already. */ |
| 55 | static int forward = NF_ACCEPT; | 55 | static bool forward = NF_ACCEPT; |
| 56 | module_param(forward, bool, 0000); | 56 | module_param(forward, bool, 0000); |
| 57 | 57 | ||
| 58 | static int __net_init iptable_filter_net_init(struct net *net) | 58 | static int __net_init iptable_filter_net_init(struct net *net) |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 447bc5cfdc6c..a708933dc230 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include <net/netfilter/nf_nat_helper.h> | 30 | #include <net/netfilter/nf_nat_helper.h> |
| 31 | #include <net/netfilter/nf_conntrack_helper.h> | 31 | #include <net/netfilter/nf_conntrack_helper.h> |
| 32 | #include <net/netfilter/nf_conntrack_l3proto.h> | 32 | #include <net/netfilter/nf_conntrack_l3proto.h> |
| 33 | #include <net/netfilter/nf_conntrack_l4proto.h> | ||
| 34 | #include <net/netfilter/nf_conntrack_zones.h> | 33 | #include <net/netfilter/nf_conntrack_zones.h> |
| 35 | 34 | ||
| 36 | static DEFINE_SPINLOCK(nf_nat_lock); | 35 | static DEFINE_SPINLOCK(nf_nat_lock); |
| @@ -57,7 +56,7 @@ hash_by_src(const struct net *net, u16 zone, | |||
| 57 | /* Original src, to ensure we map it consistently if poss. */ | 56 | /* Original src, to ensure we map it consistently if poss. */ |
| 58 | hash = jhash_3words((__force u32)tuple->src.u3.ip, | 57 | hash = jhash_3words((__force u32)tuple->src.u3.ip, |
| 59 | (__force u32)tuple->src.u.all ^ zone, | 58 | (__force u32)tuple->src.u.all ^ zone, |
| 60 | tuple->dst.protonum, 0); | 59 | tuple->dst.protonum, nf_conntrack_hash_rnd); |
| 61 | return ((u64)hash * net->ipv4.nat_htable_size) >> 32; | 60 | return ((u64)hash * net->ipv4.nat_htable_size) >> 32; |
| 62 | } | 61 | } |
| 63 | 62 | ||
| @@ -82,14 +81,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple); | |||
| 82 | * that meet the constraints of range. */ | 81 | * that meet the constraints of range. */ |
| 83 | static int | 82 | static int |
| 84 | in_range(const struct nf_conntrack_tuple *tuple, | 83 | in_range(const struct nf_conntrack_tuple *tuple, |
| 85 | const struct nf_nat_range *range) | 84 | const struct nf_nat_ipv4_range *range) |
| 86 | { | 85 | { |
| 87 | const struct nf_nat_protocol *proto; | 86 | const struct nf_nat_protocol *proto; |
| 88 | int ret = 0; | 87 | int ret = 0; |
| 89 | 88 | ||
| 90 | /* If we are supposed to map IPs, then we must be in the | 89 | /* If we are supposed to map IPs, then we must be in the |
| 91 | range specified, otherwise let this drag us onto a new src IP. */ | 90 | range specified, otherwise let this drag us onto a new src IP. */ |
| 92 | if (range->flags & IP_NAT_RANGE_MAP_IPS) { | 91 | if (range->flags & NF_NAT_RANGE_MAP_IPS) { |
| 93 | if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || | 92 | if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || |
| 94 | ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) | 93 | ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) |
| 95 | return 0; | 94 | return 0; |
| @@ -97,8 +96,8 @@ in_range(const struct nf_conntrack_tuple *tuple, | |||
| 97 | 96 | ||
| 98 | rcu_read_lock(); | 97 | rcu_read_lock(); |
| 99 | proto = __nf_nat_proto_find(tuple->dst.protonum); | 98 | proto = __nf_nat_proto_find(tuple->dst.protonum); |
| 100 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || | 99 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || |
| 101 | proto->in_range(tuple, IP_NAT_MANIP_SRC, | 100 | proto->in_range(tuple, NF_NAT_MANIP_SRC, |
| 102 | &range->min, &range->max)) | 101 | &range->min, &range->max)) |
| 103 | ret = 1; | 102 | ret = 1; |
| 104 | rcu_read_unlock(); | 103 | rcu_read_unlock(); |
| @@ -123,7 +122,7 @@ static int | |||
| 123 | find_appropriate_src(struct net *net, u16 zone, | 122 | find_appropriate_src(struct net *net, u16 zone, |
| 124 | const struct nf_conntrack_tuple *tuple, | 123 | const struct nf_conntrack_tuple *tuple, |
| 125 | struct nf_conntrack_tuple *result, | 124 | struct nf_conntrack_tuple *result, |
| 126 | const struct nf_nat_range *range) | 125 | const struct nf_nat_ipv4_range *range) |
| 127 | { | 126 | { |
| 128 | unsigned int h = hash_by_src(net, zone, tuple); | 127 | unsigned int h = hash_by_src(net, zone, tuple); |
| 129 | const struct nf_conn_nat *nat; | 128 | const struct nf_conn_nat *nat; |
| @@ -157,7 +156,7 @@ find_appropriate_src(struct net *net, u16 zone, | |||
| 157 | */ | 156 | */ |
| 158 | static void | 157 | static void |
| 159 | find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, | 158 | find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, |
| 160 | const struct nf_nat_range *range, | 159 | const struct nf_nat_ipv4_range *range, |
| 161 | const struct nf_conn *ct, | 160 | const struct nf_conn *ct, |
| 162 | enum nf_nat_manip_type maniptype) | 161 | enum nf_nat_manip_type maniptype) |
| 163 | { | 162 | { |
| @@ -166,10 +165,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, | |||
| 166 | u_int32_t minip, maxip, j; | 165 | u_int32_t minip, maxip, j; |
| 167 | 166 | ||
| 168 | /* No IP mapping? Do nothing. */ | 167 | /* No IP mapping? Do nothing. */ |
| 169 | if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) | 168 | if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) |
| 170 | return; | 169 | return; |
| 171 | 170 | ||
| 172 | if (maniptype == IP_NAT_MANIP_SRC) | 171 | if (maniptype == NF_NAT_MANIP_SRC) |
| 173 | var_ipp = &tuple->src.u3.ip; | 172 | var_ipp = &tuple->src.u3.ip; |
| 174 | else | 173 | else |
| 175 | var_ipp = &tuple->dst.u3.ip; | 174 | var_ipp = &tuple->dst.u3.ip; |
| @@ -189,7 +188,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, | |||
| 189 | minip = ntohl(range->min_ip); | 188 | minip = ntohl(range->min_ip); |
| 190 | maxip = ntohl(range->max_ip); | 189 | maxip = ntohl(range->max_ip); |
| 191 | j = jhash_2words((__force u32)tuple->src.u3.ip, | 190 | j = jhash_2words((__force u32)tuple->src.u3.ip, |
| 192 | range->flags & IP_NAT_RANGE_PERSISTENT ? | 191 | range->flags & NF_NAT_RANGE_PERSISTENT ? |
| 193 | 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); | 192 | 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); |
| 194 | j = ((u64)j * (maxip - minip + 1)) >> 32; | 193 | j = ((u64)j * (maxip - minip + 1)) >> 32; |
| 195 | *var_ipp = htonl(minip + j); | 194 | *var_ipp = htonl(minip + j); |
| @@ -204,7 +203,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, | |||
| 204 | static void | 203 | static void |
| 205 | get_unique_tuple(struct nf_conntrack_tuple *tuple, | 204 | get_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 206 | const struct nf_conntrack_tuple *orig_tuple, | 205 | const struct nf_conntrack_tuple *orig_tuple, |
| 207 | const struct nf_nat_range *range, | 206 | const struct nf_nat_ipv4_range *range, |
| 208 | struct nf_conn *ct, | 207 | struct nf_conn *ct, |
| 209 | enum nf_nat_manip_type maniptype) | 208 | enum nf_nat_manip_type maniptype) |
| 210 | { | 209 | { |
| @@ -219,8 +218,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 219 | This is only required for source (ie. NAT/masq) mappings. | 218 | This is only required for source (ie. NAT/masq) mappings. |
| 220 | So far, we don't do local source mappings, so multiple | 219 | So far, we don't do local source mappings, so multiple |
| 221 | manips not an issue. */ | 220 | manips not an issue. */ |
| 222 | if (maniptype == IP_NAT_MANIP_SRC && | 221 | if (maniptype == NF_NAT_MANIP_SRC && |
| 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 222 | !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { |
| 224 | /* try the original tuple first */ | 223 | /* try the original tuple first */ |
| 225 | if (in_range(orig_tuple, range)) { | 224 | if (in_range(orig_tuple, range)) { |
| 226 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | 225 | if (!nf_nat_used_tuple(orig_tuple, ct)) { |
| @@ -247,8 +246,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 247 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); | 246 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); |
| 248 | 247 | ||
| 249 | /* Only bother mapping if it's not already in range and unique */ | 248 | /* Only bother mapping if it's not already in range and unique */ |
| 250 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 249 | if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { |
| 251 | if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) { | 250 | if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { |
| 252 | if (proto->in_range(tuple, maniptype, &range->min, | 251 | if (proto->in_range(tuple, maniptype, &range->min, |
| 253 | &range->max) && | 252 | &range->max) && |
| 254 | (range->min.all == range->max.all || | 253 | (range->min.all == range->max.all || |
| @@ -267,7 +266,7 @@ out: | |||
| 267 | 266 | ||
| 268 | unsigned int | 267 | unsigned int |
| 269 | nf_nat_setup_info(struct nf_conn *ct, | 268 | nf_nat_setup_info(struct nf_conn *ct, |
| 270 | const struct nf_nat_range *range, | 269 | const struct nf_nat_ipv4_range *range, |
| 271 | enum nf_nat_manip_type maniptype) | 270 | enum nf_nat_manip_type maniptype) |
| 272 | { | 271 | { |
| 273 | struct net *net = nf_ct_net(ct); | 272 | struct net *net = nf_ct_net(ct); |
| @@ -284,8 +283,8 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 284 | } | 283 | } |
| 285 | } | 284 | } |
| 286 | 285 | ||
| 287 | NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC || | 286 | NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || |
| 288 | maniptype == IP_NAT_MANIP_DST); | 287 | maniptype == NF_NAT_MANIP_DST); |
| 289 | BUG_ON(nf_nat_initialized(ct, maniptype)); | 288 | BUG_ON(nf_nat_initialized(ct, maniptype)); |
| 290 | 289 | ||
| 291 | /* What we've got will look like inverse of reply. Normally | 290 | /* What we've got will look like inverse of reply. Normally |
| @@ -306,19 +305,19 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 306 | nf_conntrack_alter_reply(ct, &reply); | 305 | nf_conntrack_alter_reply(ct, &reply); |
| 307 | 306 | ||
| 308 | /* Non-atomic: we own this at the moment. */ | 307 | /* Non-atomic: we own this at the moment. */ |
| 309 | if (maniptype == IP_NAT_MANIP_SRC) | 308 | if (maniptype == NF_NAT_MANIP_SRC) |
| 310 | ct->status |= IPS_SRC_NAT; | 309 | ct->status |= IPS_SRC_NAT; |
| 311 | else | 310 | else |
| 312 | ct->status |= IPS_DST_NAT; | 311 | ct->status |= IPS_DST_NAT; |
| 313 | } | 312 | } |
| 314 | 313 | ||
| 315 | if (maniptype == IP_NAT_MANIP_SRC) { | 314 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 316 | unsigned int srchash; | 315 | unsigned int srchash; |
| 317 | 316 | ||
| 318 | srchash = hash_by_src(net, nf_ct_zone(ct), | 317 | srchash = hash_by_src(net, nf_ct_zone(ct), |
| 319 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 318 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
| 320 | spin_lock_bh(&nf_nat_lock); | 319 | spin_lock_bh(&nf_nat_lock); |
| 321 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ | 320 | /* nf_conntrack_alter_reply might re-allocate extension area */ |
| 322 | nat = nfct_nat(ct); | 321 | nat = nfct_nat(ct); |
| 323 | nat->ct = ct; | 322 | nat->ct = ct; |
| 324 | hlist_add_head_rcu(&nat->bysource, | 323 | hlist_add_head_rcu(&nat->bysource, |
| @@ -327,7 +326,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 327 | } | 326 | } |
| 328 | 327 | ||
| 329 | /* It's done. */ | 328 | /* It's done. */ |
| 330 | if (maniptype == IP_NAT_MANIP_DST) | 329 | if (maniptype == NF_NAT_MANIP_DST) |
| 331 | ct->status |= IPS_DST_NAT_DONE; | 330 | ct->status |= IPS_DST_NAT_DONE; |
| 332 | else | 331 | else |
| 333 | ct->status |= IPS_SRC_NAT_DONE; | 332 | ct->status |= IPS_SRC_NAT_DONE; |
| @@ -361,7 +360,7 @@ manip_pkt(u_int16_t proto, | |||
| 361 | 360 | ||
| 362 | iph = (void *)skb->data + iphdroff; | 361 | iph = (void *)skb->data + iphdroff; |
| 363 | 362 | ||
| 364 | if (maniptype == IP_NAT_MANIP_SRC) { | 363 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 365 | csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); | 364 | csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); |
| 366 | iph->saddr = target->src.u3.ip; | 365 | iph->saddr = target->src.u3.ip; |
| 367 | } else { | 366 | } else { |
| @@ -381,7 +380,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, | |||
| 381 | unsigned long statusbit; | 380 | unsigned long statusbit; |
| 382 | enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); | 381 | enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); |
| 383 | 382 | ||
| 384 | if (mtype == IP_NAT_MANIP_SRC) | 383 | if (mtype == NF_NAT_MANIP_SRC) |
| 385 | statusbit = IPS_SRC_NAT; | 384 | statusbit = IPS_SRC_NAT; |
| 386 | else | 385 | else |
| 387 | statusbit = IPS_DST_NAT; | 386 | statusbit = IPS_DST_NAT; |
| @@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 414 | struct icmphdr icmp; | 413 | struct icmphdr icmp; |
| 415 | struct iphdr ip; | 414 | struct iphdr ip; |
| 416 | } *inside; | 415 | } *inside; |
| 417 | const struct nf_conntrack_l4proto *l4proto; | 416 | struct nf_conntrack_tuple target; |
| 418 | struct nf_conntrack_tuple inner, target; | ||
| 419 | int hdrlen = ip_hdrlen(skb); | 417 | int hdrlen = ip_hdrlen(skb); |
| 420 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 418 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
| 421 | unsigned long statusbit; | 419 | unsigned long statusbit; |
| @@ -447,7 +445,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 447 | return 0; | 445 | return 0; |
| 448 | } | 446 | } |
| 449 | 447 | ||
| 450 | if (manip == IP_NAT_MANIP_SRC) | 448 | if (manip == NF_NAT_MANIP_SRC) |
| 451 | statusbit = IPS_SRC_NAT; | 449 | statusbit = IPS_SRC_NAT; |
| 452 | else | 450 | else |
| 453 | statusbit = IPS_DST_NAT; | 451 | statusbit = IPS_DST_NAT; |
| @@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
| 463 | "dir %s\n", skb, manip, | 461 | "dir %s\n", skb, manip, |
| 464 | dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); | 462 | dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); |
| 465 | 463 | ||
| 466 | /* rcu_read_lock()ed by nf_hook_slow */ | ||
| 467 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | ||
| 468 | |||
| 469 | if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), | ||
| 470 | (hdrlen + | ||
| 471 | sizeof(struct icmphdr) + inside->ip.ihl * 4), | ||
| 472 | (u_int16_t)AF_INET, inside->ip.protocol, | ||
| 473 | &inner, l3proto, l4proto)) | ||
| 474 | return 0; | ||
| 475 | |||
| 476 | /* Change inner back to look like incoming packet. We do the | 464 | /* Change inner back to look like incoming packet. We do the |
| 477 | opposite manip on this hook to normal, because it might not | 465 | opposite manip on this hook to normal, because it might not |
| 478 | pass all hooks (locally-generated ICMP). Consider incoming | 466 | pass all hooks (locally-generated ICMP). Consider incoming |
| @@ -575,26 +563,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { | |||
| 575 | #include <linux/netfilter/nfnetlink.h> | 563 | #include <linux/netfilter/nfnetlink.h> |
| 576 | #include <linux/netfilter/nfnetlink_conntrack.h> | 564 | #include <linux/netfilter/nfnetlink_conntrack.h> |
| 577 | 565 | ||
| 578 | static const struct nf_nat_protocol * | ||
| 579 | nf_nat_proto_find_get(u_int8_t protonum) | ||
| 580 | { | ||
| 581 | const struct nf_nat_protocol *p; | ||
| 582 | |||
| 583 | rcu_read_lock(); | ||
| 584 | p = __nf_nat_proto_find(protonum); | ||
| 585 | if (!try_module_get(p->me)) | ||
| 586 | p = &nf_nat_unknown_protocol; | ||
| 587 | rcu_read_unlock(); | ||
| 588 | |||
| 589 | return p; | ||
| 590 | } | ||
| 591 | |||
| 592 | static void | ||
| 593 | nf_nat_proto_put(const struct nf_nat_protocol *p) | ||
| 594 | { | ||
| 595 | module_put(p->me); | ||
| 596 | } | ||
| 597 | |||
| 598 | static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { | 566 | static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { |
| 599 | [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, | 567 | [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, |
| 600 | [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, | 568 | [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, |
| @@ -602,7 +570,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { | |||
| 602 | 570 | ||
| 603 | static int nfnetlink_parse_nat_proto(struct nlattr *attr, | 571 | static int nfnetlink_parse_nat_proto(struct nlattr *attr, |
| 604 | const struct nf_conn *ct, | 572 | const struct nf_conn *ct, |
| 605 | struct nf_nat_range *range) | 573 | struct nf_nat_ipv4_range *range) |
| 606 | { | 574 | { |
| 607 | struct nlattr *tb[CTA_PROTONAT_MAX+1]; | 575 | struct nlattr *tb[CTA_PROTONAT_MAX+1]; |
| 608 | const struct nf_nat_protocol *npt; | 576 | const struct nf_nat_protocol *npt; |
| @@ -612,21 +580,23 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, | |||
| 612 | if (err < 0) | 580 | if (err < 0) |
| 613 | return err; | 581 | return err; |
| 614 | 582 | ||
| 615 | npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); | 583 | rcu_read_lock(); |
| 584 | npt = __nf_nat_proto_find(nf_ct_protonum(ct)); | ||
| 616 | if (npt->nlattr_to_range) | 585 | if (npt->nlattr_to_range) |
| 617 | err = npt->nlattr_to_range(tb, range); | 586 | err = npt->nlattr_to_range(tb, range); |
| 618 | nf_nat_proto_put(npt); | 587 | rcu_read_unlock(); |
| 619 | return err; | 588 | return err; |
| 620 | } | 589 | } |
| 621 | 590 | ||
| 622 | static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { | 591 | static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { |
| 623 | [CTA_NAT_MINIP] = { .type = NLA_U32 }, | 592 | [CTA_NAT_MINIP] = { .type = NLA_U32 }, |
| 624 | [CTA_NAT_MAXIP] = { .type = NLA_U32 }, | 593 | [CTA_NAT_MAXIP] = { .type = NLA_U32 }, |
| 594 | [CTA_NAT_PROTO] = { .type = NLA_NESTED }, | ||
| 625 | }; | 595 | }; |
| 626 | 596 | ||
| 627 | static int | 597 | static int |
| 628 | nfnetlink_parse_nat(const struct nlattr *nat, | 598 | nfnetlink_parse_nat(const struct nlattr *nat, |
| 629 | const struct nf_conn *ct, struct nf_nat_range *range) | 599 | const struct nf_conn *ct, struct nf_nat_ipv4_range *range) |
| 630 | { | 600 | { |
| 631 | struct nlattr *tb[CTA_NAT_MAX+1]; | 601 | struct nlattr *tb[CTA_NAT_MAX+1]; |
| 632 | int err; | 602 | int err; |
| @@ -646,7 +616,7 @@ nfnetlink_parse_nat(const struct nlattr *nat, | |||
| 646 | range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); | 616 | range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); |
| 647 | 617 | ||
| 648 | if (range->min_ip) | 618 | if (range->min_ip) |
| 649 | range->flags |= IP_NAT_RANGE_MAP_IPS; | 619 | range->flags |= NF_NAT_RANGE_MAP_IPS; |
| 650 | 620 | ||
| 651 | if (!tb[CTA_NAT_PROTO]) | 621 | if (!tb[CTA_NAT_PROTO]) |
| 652 | return 0; | 622 | return 0; |
| @@ -663,7 +633,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, | |||
| 663 | enum nf_nat_manip_type manip, | 633 | enum nf_nat_manip_type manip, |
| 664 | const struct nlattr *attr) | 634 | const struct nlattr *attr) |
| 665 | { | 635 | { |
| 666 | struct nf_nat_range range; | 636 | struct nf_nat_ipv4_range range; |
| 667 | 637 | ||
| 668 | if (nfnetlink_parse_nat(attr, ct, &range) < 0) | 638 | if (nfnetlink_parse_nat(attr, ct, &range) < 0) |
| 669 | return -EINVAL; | 639 | return -EINVAL; |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index b9a1136addbd..dc1dd912baf4 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
| @@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
| 398 | static void ip_nat_q931_expect(struct nf_conn *new, | 398 | static void ip_nat_q931_expect(struct nf_conn *new, |
| 399 | struct nf_conntrack_expect *this) | 399 | struct nf_conntrack_expect *this) |
| 400 | { | 400 | { |
| 401 | struct nf_nat_range range; | 401 | struct nf_nat_ipv4_range range; |
| 402 | 402 | ||
| 403 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ | 403 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ |
| 404 | nf_nat_follow_master(new, this); | 404 | nf_nat_follow_master(new, this); |
| @@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new, | |||
| 409 | BUG_ON(new->status & IPS_NAT_DONE_MASK); | 409 | BUG_ON(new->status & IPS_NAT_DONE_MASK); |
| 410 | 410 | ||
| 411 | /* Change src to where master sends to */ | 411 | /* Change src to where master sends to */ |
| 412 | range.flags = IP_NAT_RANGE_MAP_IPS; | 412 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 413 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; | 413 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; |
| 414 | nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); | 414 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); |
| 415 | 415 | ||
| 416 | /* For DST manip, map port here to where it's expected. */ | 416 | /* For DST manip, map port here to where it's expected. */ |
| 417 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | 417 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
| 418 | range.min = range.max = this->saved_proto; | 418 | range.min = range.max = this->saved_proto; |
| 419 | range.min_ip = range.max_ip = | 419 | range.min_ip = range.max_ip = |
| 420 | new->master->tuplehash[!this->dir].tuple.src.u3.ip; | 420 | new->master->tuplehash[!this->dir].tuple.src.u3.ip; |
| 421 | nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); | 421 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); |
| 422 | } | 422 | } |
| 423 | 423 | ||
| 424 | /****************************************************************************/ | 424 | /****************************************************************************/ |
| @@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
| 496 | static void ip_nat_callforwarding_expect(struct nf_conn *new, | 496 | static void ip_nat_callforwarding_expect(struct nf_conn *new, |
| 497 | struct nf_conntrack_expect *this) | 497 | struct nf_conntrack_expect *this) |
| 498 | { | 498 | { |
| 499 | struct nf_nat_range range; | 499 | struct nf_nat_ipv4_range range; |
| 500 | 500 | ||
| 501 | /* This must be a fresh one. */ | 501 | /* This must be a fresh one. */ |
| 502 | BUG_ON(new->status & IPS_NAT_DONE_MASK); | 502 | BUG_ON(new->status & IPS_NAT_DONE_MASK); |
| 503 | 503 | ||
| 504 | /* Change src to where master sends to */ | 504 | /* Change src to where master sends to */ |
| 505 | range.flags = IP_NAT_RANGE_MAP_IPS; | 505 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 506 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; | 506 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; |
| 507 | nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); | 507 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); |
| 508 | 508 | ||
| 509 | /* For DST manip, map port here to where it's expected. */ | 509 | /* For DST manip, map port here to where it's expected. */ |
| 510 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | 510 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
| 511 | range.min = range.max = this->saved_proto; | 511 | range.min = range.max = this->saved_proto; |
| 512 | range.min_ip = range.max_ip = this->saved_ip; | 512 | range.min_ip = range.max_ip = this->saved_ip; |
| 513 | nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); | 513 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); |
| 514 | } | 514 | } |
| 515 | 515 | ||
| 516 | /****************************************************************************/ | 516 | /****************************************************************************/ |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ebc5f8894f99..af65958f6308 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
| @@ -253,12 +253,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, | |||
| 253 | struct udphdr *udph; | 253 | struct udphdr *udph; |
| 254 | int datalen, oldlen; | 254 | int datalen, oldlen; |
| 255 | 255 | ||
| 256 | /* UDP helpers might accidentally mangle the wrong packet */ | ||
| 257 | iph = ip_hdr(skb); | ||
| 258 | if (skb->len < iph->ihl*4 + sizeof(*udph) + | ||
| 259 | match_offset + match_len) | ||
| 260 | return 0; | ||
| 261 | |||
| 262 | if (!skb_make_writable(skb, skb->len)) | 256 | if (!skb_make_writable(skb, skb->len)) |
| 263 | return 0; | 257 | return 0; |
| 264 | 258 | ||
| @@ -430,22 +424,22 @@ nf_nat_seq_adjust(struct sk_buff *skb, | |||
| 430 | void nf_nat_follow_master(struct nf_conn *ct, | 424 | void nf_nat_follow_master(struct nf_conn *ct, |
| 431 | struct nf_conntrack_expect *exp) | 425 | struct nf_conntrack_expect *exp) |
| 432 | { | 426 | { |
| 433 | struct nf_nat_range range; | 427 | struct nf_nat_ipv4_range range; |
| 434 | 428 | ||
| 435 | /* This must be a fresh one. */ | 429 | /* This must be a fresh one. */ |
| 436 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | 430 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); |
| 437 | 431 | ||
| 438 | /* Change src to where master sends to */ | 432 | /* Change src to where master sends to */ |
| 439 | range.flags = IP_NAT_RANGE_MAP_IPS; | 433 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 440 | range.min_ip = range.max_ip | 434 | range.min_ip = range.max_ip |
| 441 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | 435 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; |
| 442 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); | 436 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); |
| 443 | 437 | ||
| 444 | /* For DST manip, map port here to where it's expected. */ | 438 | /* For DST manip, map port here to where it's expected. */ |
| 445 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | 439 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
| 446 | range.min = range.max = exp->saved_proto; | 440 | range.min = range.max = exp->saved_proto; |
| 447 | range.min_ip = range.max_ip | 441 | range.min_ip = range.max_ip |
| 448 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; | 442 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; |
| 449 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); | 443 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); |
| 450 | } | 444 | } |
| 451 | EXPORT_SYMBOL(nf_nat_follow_master); | 445 | EXPORT_SYMBOL(nf_nat_follow_master); |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 3e8284ba46b8..c273d58980ae 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
| @@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
| 47 | struct nf_conntrack_tuple t; | 47 | struct nf_conntrack_tuple t; |
| 48 | const struct nf_ct_pptp_master *ct_pptp_info; | 48 | const struct nf_ct_pptp_master *ct_pptp_info; |
| 49 | const struct nf_nat_pptp *nat_pptp_info; | 49 | const struct nf_nat_pptp *nat_pptp_info; |
| 50 | struct nf_nat_range range; | 50 | struct nf_nat_ipv4_range range; |
| 51 | 51 | ||
| 52 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; | 52 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; |
| 53 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; | 53 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; |
| @@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
| 88 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | 88 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); |
| 89 | 89 | ||
| 90 | /* Change src to where master sends to */ | 90 | /* Change src to where master sends to */ |
| 91 | range.flags = IP_NAT_RANGE_MAP_IPS; | 91 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 92 | range.min_ip = range.max_ip | 92 | range.min_ip = range.max_ip |
| 93 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | 93 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; |
| 94 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | 94 | if (exp->dir == IP_CT_DIR_ORIGINAL) { |
| 95 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | 95 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
| 96 | range.min = range.max = exp->saved_proto; | 96 | range.min = range.max = exp->saved_proto; |
| 97 | } | 97 | } |
| 98 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); | 98 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); |
| 99 | 99 | ||
| 100 | /* For DST manip, map port here to where it's expected. */ | 100 | /* For DST manip, map port here to where it's expected. */ |
| 101 | range.flags = IP_NAT_RANGE_MAP_IPS; | 101 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 102 | range.min_ip = range.max_ip | 102 | range.min_ip = range.max_ip |
| 103 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; | 103 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; |
| 104 | if (exp->dir == IP_CT_DIR_REPLY) { | 104 | if (exp->dir == IP_CT_DIR_REPLY) { |
| 105 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | 105 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
| 106 | range.min = range.max = exp->saved_proto; | 106 | range.min = range.max = exp->saved_proto; |
| 107 | } | 107 | } |
| 108 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); | 108 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /* outbound packets == from PNS to PAC */ | 111 | /* outbound packets == from PNS to PAC */ |
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index a3d997618602..9993bc93e102 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c | |||
| @@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 26 | { | 26 | { |
| 27 | __be16 port; | 27 | __be16 port; |
| 28 | 28 | ||
| 29 | if (maniptype == IP_NAT_MANIP_SRC) | 29 | if (maniptype == NF_NAT_MANIP_SRC) |
| 30 | port = tuple->src.u.all; | 30 | port = tuple->src.u.all; |
| 31 | else | 31 | else |
| 32 | port = tuple->dst.u.all; | 32 | port = tuple->dst.u.all; |
| @@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 37 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); | 37 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); |
| 38 | 38 | ||
| 39 | void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | 39 | void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 40 | const struct nf_nat_range *range, | 40 | const struct nf_nat_ipv4_range *range, |
| 41 | enum nf_nat_manip_type maniptype, | 41 | enum nf_nat_manip_type maniptype, |
| 42 | const struct nf_conn *ct, | 42 | const struct nf_conn *ct, |
| 43 | u_int16_t *rover) | 43 | u_int16_t *rover) |
| @@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 46 | __be16 *portptr; | 46 | __be16 *portptr; |
| 47 | u_int16_t off; | 47 | u_int16_t off; |
| 48 | 48 | ||
| 49 | if (maniptype == IP_NAT_MANIP_SRC) | 49 | if (maniptype == NF_NAT_MANIP_SRC) |
| 50 | portptr = &tuple->src.u.all; | 50 | portptr = &tuple->src.u.all; |
| 51 | else | 51 | else |
| 52 | portptr = &tuple->dst.u.all; | 52 | portptr = &tuple->dst.u.all; |
| 53 | 53 | ||
| 54 | /* If no range specified... */ | 54 | /* If no range specified... */ |
| 55 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | 55 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { |
| 56 | /* If it's dst rewrite, can't change port */ | 56 | /* If it's dst rewrite, can't change port */ |
| 57 | if (maniptype == IP_NAT_MANIP_DST) | 57 | if (maniptype == NF_NAT_MANIP_DST) |
| 58 | return; | 58 | return; |
| 59 | 59 | ||
| 60 | if (ntohs(*portptr) < 1024) { | 60 | if (ntohs(*portptr) < 1024) { |
| @@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 75 | range_size = ntohs(range->max.all) - min + 1; | 75 | range_size = ntohs(range->max.all) - min + 1; |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | 78 | if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) |
| 79 | off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, | 79 | off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, |
| 80 | maniptype == IP_NAT_MANIP_SRC | 80 | maniptype == NF_NAT_MANIP_SRC |
| 81 | ? tuple->dst.u.all | 81 | ? tuple->dst.u.all |
| 82 | : tuple->src.u.all); | 82 | : tuple->src.u.all); |
| 83 | else | 83 | else |
| @@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 87 | *portptr = htons(min + off % range_size); | 87 | *portptr = htons(min + off % range_size); |
| 88 | if (++i != range_size && nf_nat_used_tuple(tuple, ct)) | 88 | if (++i != range_size && nf_nat_used_tuple(tuple, ct)) |
| 89 | continue; | 89 | continue; |
| 90 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) | 90 | if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) |
| 91 | *rover = off; | 91 | *rover = off; |
| 92 | return; | 92 | return; |
| 93 | } | 93 | } |
| @@ -96,31 +96,19 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 96 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); | 96 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); |
| 97 | 97 | ||
| 98 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 98 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 99 | int nf_nat_proto_range_to_nlattr(struct sk_buff *skb, | ||
| 100 | const struct nf_nat_range *range) | ||
| 101 | { | ||
| 102 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all); | ||
| 103 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all); | ||
| 104 | return 0; | ||
| 105 | |||
| 106 | nla_put_failure: | ||
| 107 | return -1; | ||
| 108 | } | ||
| 109 | EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); | ||
| 110 | |||
| 111 | int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], | 99 | int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], |
| 112 | struct nf_nat_range *range) | 100 | struct nf_nat_ipv4_range *range) |
| 113 | { | 101 | { |
| 114 | if (tb[CTA_PROTONAT_PORT_MIN]) { | 102 | if (tb[CTA_PROTONAT_PORT_MIN]) { |
| 115 | range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); | 103 | range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); |
| 116 | range->max.all = range->min.tcp.port; | 104 | range->max.all = range->min.tcp.port; |
| 117 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | 105 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
| 118 | } | 106 | } |
| 119 | if (tb[CTA_PROTONAT_PORT_MAX]) { | 107 | if (tb[CTA_PROTONAT_PORT_MAX]) { |
| 120 | range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); | 108 | range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); |
| 121 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | 109 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
| 122 | } | 110 | } |
| 123 | return 0; | 111 | return 0; |
| 124 | } | 112 | } |
| 125 | EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr); | 113 | EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); |
| 126 | #endif | 114 | #endif |
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 570faf2667b2..3f67138d187c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c | |||
| @@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover; | |||
| 24 | 24 | ||
| 25 | static void | 25 | static void |
| 26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, | 26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 27 | const struct nf_nat_range *range, | 27 | const struct nf_nat_ipv4_range *range, |
| 28 | enum nf_nat_manip_type maniptype, | 28 | enum nf_nat_manip_type maniptype, |
| 29 | const struct nf_conn *ct) | 29 | const struct nf_conn *ct) |
| 30 | { | 30 | { |
| @@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb, | |||
| 54 | iph = (struct iphdr *)(skb->data + iphdroff); | 54 | iph = (struct iphdr *)(skb->data + iphdroff); |
| 55 | hdr = (struct dccp_hdr *)(skb->data + hdroff); | 55 | hdr = (struct dccp_hdr *)(skb->data + hdroff); |
| 56 | 56 | ||
| 57 | if (maniptype == IP_NAT_MANIP_SRC) { | 57 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 58 | oldip = iph->saddr; | 58 | oldip = iph->saddr; |
| 59 | newip = tuple->src.u3.ip; | 59 | newip = tuple->src.u3.ip; |
| 60 | newport = tuple->src.u.dccp.port; | 60 | newport = tuple->src.u.dccp.port; |
| @@ -80,12 +80,10 @@ dccp_manip_pkt(struct sk_buff *skb, | |||
| 80 | 80 | ||
| 81 | static const struct nf_nat_protocol nf_nat_protocol_dccp = { | 81 | static const struct nf_nat_protocol nf_nat_protocol_dccp = { |
| 82 | .protonum = IPPROTO_DCCP, | 82 | .protonum = IPPROTO_DCCP, |
| 83 | .me = THIS_MODULE, | ||
| 84 | .manip_pkt = dccp_manip_pkt, | 83 | .manip_pkt = dccp_manip_pkt, |
| 85 | .in_range = nf_nat_proto_in_range, | 84 | .in_range = nf_nat_proto_in_range, |
| 86 | .unique_tuple = dccp_unique_tuple, | 85 | .unique_tuple = dccp_unique_tuple, |
| 87 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 86 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 88 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 89 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 87 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 90 | #endif | 88 | #endif |
| 91 | }; | 89 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index bc8d83a31c73..46ba0b9ab985 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
| @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | |||
| 39 | /* generate unique tuple ... */ | 39 | /* generate unique tuple ... */ |
| 40 | static void | 40 | static void |
| 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 42 | const struct nf_nat_range *range, | 42 | const struct nf_nat_ipv4_range *range, |
| 43 | enum nf_nat_manip_type maniptype, | 43 | enum nf_nat_manip_type maniptype, |
| 44 | const struct nf_conn *ct) | 44 | const struct nf_conn *ct) |
| 45 | { | 45 | { |
| @@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 52 | if (!ct->master) | 52 | if (!ct->master) |
| 53 | return; | 53 | return; |
| 54 | 54 | ||
| 55 | if (maniptype == IP_NAT_MANIP_SRC) | 55 | if (maniptype == NF_NAT_MANIP_SRC) |
| 56 | keyptr = &tuple->src.u.gre.key; | 56 | keyptr = &tuple->src.u.gre.key; |
| 57 | else | 57 | else |
| 58 | keyptr = &tuple->dst.u.gre.key; | 58 | keyptr = &tuple->dst.u.gre.key; |
| 59 | 59 | ||
| 60 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | 60 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { |
| 61 | pr_debug("%p: NATing GRE PPTP\n", ct); | 61 | pr_debug("%p: NATing GRE PPTP\n", ct); |
| 62 | min = 1; | 62 | min = 1; |
| 63 | range_size = 0xffff; | 63 | range_size = 0xffff; |
| @@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
| 99 | 99 | ||
| 100 | /* we only have destination manip of a packet, since 'source key' | 100 | /* we only have destination manip of a packet, since 'source key' |
| 101 | * is not present in the packet itself */ | 101 | * is not present in the packet itself */ |
| 102 | if (maniptype != IP_NAT_MANIP_DST) | 102 | if (maniptype != NF_NAT_MANIP_DST) |
| 103 | return true; | 103 | return true; |
| 104 | switch (greh->version) { | 104 | switch (greh->version) { |
| 105 | case GRE_VERSION_1701: | 105 | case GRE_VERSION_1701: |
| @@ -119,12 +119,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
| 119 | 119 | ||
| 120 | static const struct nf_nat_protocol gre = { | 120 | static const struct nf_nat_protocol gre = { |
| 121 | .protonum = IPPROTO_GRE, | 121 | .protonum = IPPROTO_GRE, |
| 122 | .me = THIS_MODULE, | ||
| 123 | .manip_pkt = gre_manip_pkt, | 122 | .manip_pkt = gre_manip_pkt, |
| 124 | .in_range = nf_nat_proto_in_range, | 123 | .in_range = nf_nat_proto_in_range, |
| 125 | .unique_tuple = gre_unique_tuple, | 124 | .unique_tuple = gre_unique_tuple, |
| 126 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 125 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 127 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 128 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 126 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 129 | #endif | 127 | #endif |
| 130 | }; | 128 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 9f4dc1235dc7..b35172851bae 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
| @@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 30 | 30 | ||
| 31 | static void | 31 | static void |
| 32 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | 32 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 33 | const struct nf_nat_range *range, | 33 | const struct nf_nat_ipv4_range *range, |
| 34 | enum nf_nat_manip_type maniptype, | 34 | enum nf_nat_manip_type maniptype, |
| 35 | const struct nf_conn *ct) | 35 | const struct nf_conn *ct) |
| 36 | { | 36 | { |
| @@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 40 | 40 | ||
| 41 | range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; | 41 | range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; |
| 42 | /* If no range specified... */ | 42 | /* If no range specified... */ |
| 43 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) | 43 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) |
| 44 | range_size = 0xFFFF; | 44 | range_size = 0xFFFF; |
| 45 | 45 | ||
| 46 | for (i = 0; ; ++id) { | 46 | for (i = 0; ; ++id) { |
| @@ -74,12 +74,10 @@ icmp_manip_pkt(struct sk_buff *skb, | |||
| 74 | 74 | ||
| 75 | const struct nf_nat_protocol nf_nat_protocol_icmp = { | 75 | const struct nf_nat_protocol nf_nat_protocol_icmp = { |
| 76 | .protonum = IPPROTO_ICMP, | 76 | .protonum = IPPROTO_ICMP, |
| 77 | .me = THIS_MODULE, | ||
| 78 | .manip_pkt = icmp_manip_pkt, | 77 | .manip_pkt = icmp_manip_pkt, |
| 79 | .in_range = icmp_in_range, | 78 | .in_range = icmp_in_range, |
| 80 | .unique_tuple = icmp_unique_tuple, | 79 | .unique_tuple = icmp_unique_tuple, |
| 81 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 82 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 83 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 81 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 84 | #endif | 82 | #endif |
| 85 | }; | 83 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index bd5a80a62a5b..3cce9b6c1c29 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c | |||
| @@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover; | |||
| 19 | 19 | ||
| 20 | static void | 20 | static void |
| 21 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, | 21 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 22 | const struct nf_nat_range *range, | 22 | const struct nf_nat_ipv4_range *range, |
| 23 | enum nf_nat_manip_type maniptype, | 23 | enum nf_nat_manip_type maniptype, |
| 24 | const struct nf_conn *ct) | 24 | const struct nf_conn *ct) |
| 25 | { | 25 | { |
| @@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb, | |||
| 46 | iph = (struct iphdr *)(skb->data + iphdroff); | 46 | iph = (struct iphdr *)(skb->data + iphdroff); |
| 47 | hdr = (struct sctphdr *)(skb->data + hdroff); | 47 | hdr = (struct sctphdr *)(skb->data + hdroff); |
| 48 | 48 | ||
| 49 | if (maniptype == IP_NAT_MANIP_SRC) { | 49 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 50 | /* Get rid of src ip and src pt */ | 50 | /* Get rid of src ip and src pt */ |
| 51 | oldip = iph->saddr; | 51 | oldip = iph->saddr; |
| 52 | newip = tuple->src.u3.ip; | 52 | newip = tuple->src.u3.ip; |
| @@ -70,12 +70,10 @@ sctp_manip_pkt(struct sk_buff *skb, | |||
| 70 | 70 | ||
| 71 | static const struct nf_nat_protocol nf_nat_protocol_sctp = { | 71 | static const struct nf_nat_protocol nf_nat_protocol_sctp = { |
| 72 | .protonum = IPPROTO_SCTP, | 72 | .protonum = IPPROTO_SCTP, |
| 73 | .me = THIS_MODULE, | ||
| 74 | .manip_pkt = sctp_manip_pkt, | 73 | .manip_pkt = sctp_manip_pkt, |
| 75 | .in_range = nf_nat_proto_in_range, | 74 | .in_range = nf_nat_proto_in_range, |
| 76 | .unique_tuple = sctp_unique_tuple, | 75 | .unique_tuple = sctp_unique_tuple, |
| 77 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 76 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 78 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 79 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 77 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 80 | #endif | 78 | #endif |
| 81 | }; | 79 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 0d67bb80130f..9fb4b4e72bbf 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | |||
| @@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover; | |||
| 23 | 23 | ||
| 24 | static void | 24 | static void |
| 25 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, | 25 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 26 | const struct nf_nat_range *range, | 26 | const struct nf_nat_ipv4_range *range, |
| 27 | enum nf_nat_manip_type maniptype, | 27 | enum nf_nat_manip_type maniptype, |
| 28 | const struct nf_conn *ct) | 28 | const struct nf_conn *ct) |
| 29 | { | 29 | { |
| @@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb, | |||
| 55 | iph = (struct iphdr *)(skb->data + iphdroff); | 55 | iph = (struct iphdr *)(skb->data + iphdroff); |
| 56 | hdr = (struct tcphdr *)(skb->data + hdroff); | 56 | hdr = (struct tcphdr *)(skb->data + hdroff); |
| 57 | 57 | ||
| 58 | if (maniptype == IP_NAT_MANIP_SRC) { | 58 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 59 | /* Get rid of src ip and src pt */ | 59 | /* Get rid of src ip and src pt */ |
| 60 | oldip = iph->saddr; | 60 | oldip = iph->saddr; |
| 61 | newip = tuple->src.u3.ip; | 61 | newip = tuple->src.u3.ip; |
| @@ -82,12 +82,10 @@ tcp_manip_pkt(struct sk_buff *skb, | |||
| 82 | 82 | ||
| 83 | const struct nf_nat_protocol nf_nat_protocol_tcp = { | 83 | const struct nf_nat_protocol nf_nat_protocol_tcp = { |
| 84 | .protonum = IPPROTO_TCP, | 84 | .protonum = IPPROTO_TCP, |
| 85 | .me = THIS_MODULE, | ||
| 86 | .manip_pkt = tcp_manip_pkt, | 85 | .manip_pkt = tcp_manip_pkt, |
| 87 | .in_range = nf_nat_proto_in_range, | 86 | .in_range = nf_nat_proto_in_range, |
| 88 | .unique_tuple = tcp_unique_tuple, | 87 | .unique_tuple = tcp_unique_tuple, |
| 89 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 88 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 90 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 91 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 89 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 92 | #endif | 90 | #endif |
| 93 | }; | 91 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 0b1b8601cba7..9883336e628f 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | |||
| @@ -22,7 +22,7 @@ static u_int16_t udp_port_rover; | |||
| 22 | 22 | ||
| 23 | static void | 23 | static void |
| 24 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, | 24 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 25 | const struct nf_nat_range *range, | 25 | const struct nf_nat_ipv4_range *range, |
| 26 | enum nf_nat_manip_type maniptype, | 26 | enum nf_nat_manip_type maniptype, |
| 27 | const struct nf_conn *ct) | 27 | const struct nf_conn *ct) |
| 28 | { | 28 | { |
| @@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb, | |||
| 47 | iph = (struct iphdr *)(skb->data + iphdroff); | 47 | iph = (struct iphdr *)(skb->data + iphdroff); |
| 48 | hdr = (struct udphdr *)(skb->data + hdroff); | 48 | hdr = (struct udphdr *)(skb->data + hdroff); |
| 49 | 49 | ||
| 50 | if (maniptype == IP_NAT_MANIP_SRC) { | 50 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 51 | /* Get rid of src ip and src pt */ | 51 | /* Get rid of src ip and src pt */ |
| 52 | oldip = iph->saddr; | 52 | oldip = iph->saddr; |
| 53 | newip = tuple->src.u3.ip; | 53 | newip = tuple->src.u3.ip; |
| @@ -73,12 +73,10 @@ udp_manip_pkt(struct sk_buff *skb, | |||
| 73 | 73 | ||
| 74 | const struct nf_nat_protocol nf_nat_protocol_udp = { | 74 | const struct nf_nat_protocol nf_nat_protocol_udp = { |
| 75 | .protonum = IPPROTO_UDP, | 75 | .protonum = IPPROTO_UDP, |
| 76 | .me = THIS_MODULE, | ||
| 77 | .manip_pkt = udp_manip_pkt, | 76 | .manip_pkt = udp_manip_pkt, |
| 78 | .in_range = nf_nat_proto_in_range, | 77 | .in_range = nf_nat_proto_in_range, |
| 79 | .unique_tuple = udp_unique_tuple, | 78 | .unique_tuple = udp_unique_tuple, |
| 80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 79 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 81 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 82 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 80 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 83 | #endif | 81 | #endif |
| 84 | }; | 82 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index f83ef23e2ab7..d24d10a7beb2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c | |||
| @@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover; | |||
| 21 | 21 | ||
| 22 | static void | 22 | static void |
| 23 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, | 23 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 24 | const struct nf_nat_range *range, | 24 | const struct nf_nat_ipv4_range *range, |
| 25 | enum nf_nat_manip_type maniptype, | 25 | enum nf_nat_manip_type maniptype, |
| 26 | const struct nf_conn *ct) | 26 | const struct nf_conn *ct) |
| 27 | { | 27 | { |
| @@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb, | |||
| 47 | iph = (struct iphdr *)(skb->data + iphdroff); | 47 | iph = (struct iphdr *)(skb->data + iphdroff); |
| 48 | hdr = (struct udphdr *)(skb->data + hdroff); | 48 | hdr = (struct udphdr *)(skb->data + hdroff); |
| 49 | 49 | ||
| 50 | if (maniptype == IP_NAT_MANIP_SRC) { | 50 | if (maniptype == NF_NAT_MANIP_SRC) { |
| 51 | /* Get rid of src ip and src pt */ | 51 | /* Get rid of src ip and src pt */ |
| 52 | oldip = iph->saddr; | 52 | oldip = iph->saddr; |
| 53 | newip = tuple->src.u3.ip; | 53 | newip = tuple->src.u3.ip; |
| @@ -72,12 +72,10 @@ udplite_manip_pkt(struct sk_buff *skb, | |||
| 72 | 72 | ||
| 73 | static const struct nf_nat_protocol nf_nat_protocol_udplite = { | 73 | static const struct nf_nat_protocol nf_nat_protocol_udplite = { |
| 74 | .protonum = IPPROTO_UDPLITE, | 74 | .protonum = IPPROTO_UDPLITE, |
| 75 | .me = THIS_MODULE, | ||
| 76 | .manip_pkt = udplite_manip_pkt, | 75 | .manip_pkt = udplite_manip_pkt, |
| 77 | .in_range = nf_nat_proto_in_range, | 76 | .in_range = nf_nat_proto_in_range, |
| 78 | .unique_tuple = udplite_unique_tuple, | 77 | .unique_tuple = udplite_unique_tuple, |
| 79 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 78 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
| 80 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
| 81 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 79 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
| 82 | #endif | 80 | #endif |
| 83 | }; | 81 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index a50f2bc1c732..e0afe8112b1c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c | |||
| @@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, | |||
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, | 29 | static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, |
| 30 | const struct nf_nat_range *range, | 30 | const struct nf_nat_ipv4_range *range, |
| 31 | enum nf_nat_manip_type maniptype, | 31 | enum nf_nat_manip_type maniptype, |
| 32 | const struct nf_conn *ct) | 32 | const struct nf_conn *ct) |
| 33 | { | 33 | { |
| @@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb, | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | const struct nf_nat_protocol nf_nat_unknown_protocol = { | 48 | const struct nf_nat_protocol nf_nat_unknown_protocol = { |
| 49 | /* .me isn't set: getting a ref to this cannot fail. */ | ||
| 50 | .manip_pkt = unknown_manip_pkt, | 49 | .manip_pkt = unknown_manip_pkt, |
| 51 | .in_range = unknown_in_range, | 50 | .in_range = unknown_in_range, |
| 52 | .unique_tuple = unknown_unique_tuple, | 51 | .unique_tuple = unknown_unique_tuple, |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 733c9abc1cbd..d2a9dc314e0e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
| @@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 44 | { | 44 | { |
| 45 | struct nf_conn *ct; | 45 | struct nf_conn *ct; |
| 46 | enum ip_conntrack_info ctinfo; | 46 | enum ip_conntrack_info ctinfo; |
| 47 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 47 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 48 | 48 | ||
| 49 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || |
| 50 | par->hooknum == NF_INET_LOCAL_IN); | 50 | par->hooknum == NF_INET_LOCAL_IN); |
| @@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 56 | ctinfo == IP_CT_RELATED_REPLY)); | 56 | ctinfo == IP_CT_RELATED_REPLY)); |
| 57 | NF_CT_ASSERT(par->out != NULL); | 57 | NF_CT_ASSERT(par->out != NULL); |
| 58 | 58 | ||
| 59 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); | 59 | return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static unsigned int | 62 | static unsigned int |
| @@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 64 | { | 64 | { |
| 65 | struct nf_conn *ct; | 65 | struct nf_conn *ct; |
| 66 | enum ip_conntrack_info ctinfo; | 66 | enum ip_conntrack_info ctinfo; |
| 67 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 67 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 68 | 68 | ||
| 69 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | 69 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || |
| 70 | par->hooknum == NF_INET_LOCAL_OUT); | 70 | par->hooknum == NF_INET_LOCAL_OUT); |
| @@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 74 | /* Connection must be valid and new. */ | 74 | /* Connection must be valid and new. */ |
| 75 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | 75 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); |
| 76 | 76 | ||
| 77 | return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); | 77 | return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | static int ipt_snat_checkentry(const struct xt_tgchk_param *par) | 80 | static int ipt_snat_checkentry(const struct xt_tgchk_param *par) |
| 81 | { | 81 | { |
| 82 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 82 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 83 | 83 | ||
| 84 | /* Must be a valid range */ | 84 | /* Must be a valid range */ |
| 85 | if (mr->rangesize != 1) { | 85 | if (mr->rangesize != 1) { |
| @@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par) | |||
| 91 | 91 | ||
| 92 | static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) | 92 | static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) |
| 93 | { | 93 | { |
| 94 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 94 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; |
| 95 | 95 | ||
| 96 | /* Must be a valid range */ | 96 | /* Must be a valid range */ |
| 97 | if (mr->rangesize != 1) { | 97 | if (mr->rangesize != 1) { |
| @@ -105,13 +105,13 @@ static unsigned int | |||
| 105 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | 105 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
| 106 | { | 106 | { |
| 107 | /* Force range to this IP; let proto decide mapping for | 107 | /* Force range to this IP; let proto decide mapping for |
| 108 | per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). | 108 | per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). |
| 109 | */ | 109 | */ |
| 110 | struct nf_nat_range range; | 110 | struct nf_nat_ipv4_range range; |
| 111 | 111 | ||
| 112 | range.flags = 0; | 112 | range.flags = 0; |
| 113 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, | 113 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, |
| 114 | HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? | 114 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? |
| 115 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : | 115 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : |
| 116 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); | 116 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); |
| 117 | 117 | ||
| @@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb, | |||
| 140 | static struct xt_target ipt_snat_reg __read_mostly = { | 140 | static struct xt_target ipt_snat_reg __read_mostly = { |
| 141 | .name = "SNAT", | 141 | .name = "SNAT", |
| 142 | .target = ipt_snat_target, | 142 | .target = ipt_snat_target, |
| 143 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 143 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), |
| 144 | .table = "nat", | 144 | .table = "nat", |
| 145 | .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), | 145 | .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), |
| 146 | .checkentry = ipt_snat_checkentry, | 146 | .checkentry = ipt_snat_checkentry, |
| @@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { | |||
| 150 | static struct xt_target ipt_dnat_reg __read_mostly = { | 150 | static struct xt_target ipt_dnat_reg __read_mostly = { |
| 151 | .name = "DNAT", | 151 | .name = "DNAT", |
| 152 | .target = ipt_dnat_target, | 152 | .target = ipt_dnat_target, |
| 153 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 153 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), |
| 154 | .table = "nat", | 154 | .table = "nat", |
| 155 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), | 155 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), |
| 156 | .checkentry = ipt_dnat_checkentry, | 156 | .checkentry = ipt_dnat_checkentry, |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 78844d9208f1..d0319f96269f 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
| @@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) | |||
| 249 | static void ip_nat_sip_expected(struct nf_conn *ct, | 249 | static void ip_nat_sip_expected(struct nf_conn *ct, |
| 250 | struct nf_conntrack_expect *exp) | 250 | struct nf_conntrack_expect *exp) |
| 251 | { | 251 | { |
| 252 | struct nf_nat_range range; | 252 | struct nf_nat_ipv4_range range; |
| 253 | 253 | ||
| 254 | /* This must be a fresh one. */ | 254 | /* This must be a fresh one. */ |
| 255 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | 255 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); |
| 256 | 256 | ||
| 257 | /* For DST manip, map port here to where it's expected. */ | 257 | /* For DST manip, map port here to where it's expected. */ |
| 258 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | 258 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
| 259 | range.min = range.max = exp->saved_proto; | 259 | range.min = range.max = exp->saved_proto; |
| 260 | range.min_ip = range.max_ip = exp->saved_ip; | 260 | range.min_ip = range.max_ip = exp->saved_ip; |
| 261 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); | 261 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); |
| 262 | 262 | ||
| 263 | /* Change src to where master sends to, but only if the connection | 263 | /* Change src to where master sends to, but only if the connection |
| 264 | * actually came from the same source. */ | 264 | * actually came from the same source. */ |
| 265 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == | 265 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == |
| 266 | ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { | 266 | ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { |
| 267 | range.flags = IP_NAT_RANGE_MAP_IPS; | 267 | range.flags = NF_NAT_RANGE_MAP_IPS; |
| 268 | range.min_ip = range.max_ip | 268 | range.min_ip = range.max_ip |
| 269 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | 269 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; |
| 270 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); | 270 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); |
| 271 | } | 271 | } |
| 272 | } | 272 | } |
| 273 | 273 | ||
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 92900482edea..3828a4229822 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
| @@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum, | |||
| 137 | return ret; | 137 | return ret; |
| 138 | } else | 138 | } else |
| 139 | pr_debug("Already setup manip %s for ct %p\n", | 139 | pr_debug("Already setup manip %s for ct %p\n", |
| 140 | maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", | 140 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", |
| 141 | ct); | 141 | ct); |
| 142 | break; | 142 | break; |
| 143 | 143 | ||
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 43d4c3b22369..b072386cee21 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c | |||
| @@ -140,13 +140,14 @@ static void ping_v4_unhash(struct sock *sk) | |||
| 140 | write_lock_bh(&ping_table.lock); | 140 | write_lock_bh(&ping_table.lock); |
| 141 | hlist_nulls_del(&sk->sk_nulls_node); | 141 | hlist_nulls_del(&sk->sk_nulls_node); |
| 142 | sock_put(sk); | 142 | sock_put(sk); |
| 143 | isk->inet_num = isk->inet_sport = 0; | 143 | isk->inet_num = 0; |
| 144 | isk->inet_sport = 0; | ||
| 144 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 145 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
| 145 | write_unlock_bh(&ping_table.lock); | 146 | write_unlock_bh(&ping_table.lock); |
| 146 | } | 147 | } |
| 147 | } | 148 | } |
| 148 | 149 | ||
| 149 | static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, | 150 | static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, |
| 150 | u16 ident, int dif) | 151 | u16 ident, int dif) |
| 151 | { | 152 | { |
| 152 | struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); | 153 | struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); |
| @@ -154,15 +155,15 @@ static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr, | |||
| 154 | struct inet_sock *isk; | 155 | struct inet_sock *isk; |
| 155 | struct hlist_nulls_node *hnode; | 156 | struct hlist_nulls_node *hnode; |
| 156 | 157 | ||
| 157 | pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n", | 158 | pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", |
| 158 | (int)ident, (unsigned long)daddr, dif); | 159 | (int)ident, &daddr, dif); |
| 159 | read_lock_bh(&ping_table.lock); | 160 | read_lock_bh(&ping_table.lock); |
| 160 | 161 | ||
| 161 | ping_portaddr_for_each_entry(sk, hnode, hslot) { | 162 | ping_portaddr_for_each_entry(sk, hnode, hslot) { |
| 162 | isk = inet_sk(sk); | 163 | isk = inet_sk(sk); |
| 163 | 164 | ||
| 164 | pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk, | 165 | pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, |
| 165 | (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr, | 166 | (int)isk->inet_num, &isk->inet_rcv_saddr, |
| 166 | sk->sk_bound_dev_if); | 167 | sk->sk_bound_dev_if); |
| 167 | 168 | ||
| 168 | pr_debug("iterate\n"); | 169 | pr_debug("iterate\n"); |
| @@ -254,7 +255,7 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 254 | sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); | 255 | sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); |
| 255 | 256 | ||
| 256 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); | 257 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); |
| 257 | if (addr->sin_addr.s_addr == INADDR_ANY) | 258 | if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) |
| 258 | chk_addr_ret = RTN_LOCAL; | 259 | chk_addr_ret = RTN_LOCAL; |
| 259 | 260 | ||
| 260 | if ((sysctl_ip_nonlocal_bind == 0 && | 261 | if ((sysctl_ip_nonlocal_bind == 0 && |
| @@ -278,9 +279,9 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 278 | goto out; | 279 | goto out; |
| 279 | } | 280 | } |
| 280 | 281 | ||
| 281 | pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n", | 282 | pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", |
| 282 | (int)isk->inet_num, | 283 | (int)isk->inet_num, |
| 283 | (unsigned long) isk->inet_rcv_saddr, | 284 | &isk->inet_rcv_saddr, |
| 284 | (int)sk->sk_bound_dev_if); | 285 | (int)sk->sk_bound_dev_if); |
| 285 | 286 | ||
| 286 | err = 0; | 287 | err = 0; |
| @@ -407,7 +408,7 @@ out: | |||
| 407 | struct pingfakehdr { | 408 | struct pingfakehdr { |
| 408 | struct icmphdr icmph; | 409 | struct icmphdr icmph; |
| 409 | struct iovec *iov; | 410 | struct iovec *iov; |
| 410 | u32 wcheck; | 411 | __wsum wcheck; |
| 411 | }; | 412 | }; |
| 412 | 413 | ||
| 413 | static int ping_getfrag(void *from, char * to, | 414 | static int ping_getfrag(void *from, char * to, |
| @@ -459,7 +460,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 459 | struct rtable *rt = NULL; | 460 | struct rtable *rt = NULL; |
| 460 | struct ip_options_data opt_copy; | 461 | struct ip_options_data opt_copy; |
| 461 | int free = 0; | 462 | int free = 0; |
| 462 | u32 saddr, daddr, faddr; | 463 | __be32 saddr, daddr, faddr; |
| 463 | u8 tos; | 464 | u8 tos; |
| 464 | int err; | 465 | int err; |
| 465 | 466 | ||
| @@ -629,6 +630,7 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 629 | 630 | ||
| 630 | pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); | 631 | pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); |
| 631 | 632 | ||
| 633 | err = -EOPNOTSUPP; | ||
| 632 | if (flags & MSG_OOB) | 634 | if (flags & MSG_OOB) |
| 633 | goto out; | 635 | goto out; |
| 634 | 636 | ||
| @@ -696,8 +698,8 @@ void ping_rcv(struct sk_buff *skb) | |||
| 696 | struct net *net = dev_net(skb->dev); | 698 | struct net *net = dev_net(skb->dev); |
| 697 | struct iphdr *iph = ip_hdr(skb); | 699 | struct iphdr *iph = ip_hdr(skb); |
| 698 | struct icmphdr *icmph = icmp_hdr(skb); | 700 | struct icmphdr *icmph = icmp_hdr(skb); |
| 699 | u32 saddr = iph->saddr; | 701 | __be32 saddr = iph->saddr; |
| 700 | u32 daddr = iph->daddr; | 702 | __be32 daddr = iph->daddr; |
| 701 | 703 | ||
| 702 | /* We assume the packet has already been checked by icmp_rcv */ | 704 | /* We assume the packet has already been checked by icmp_rcv */ |
| 703 | 705 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 466ea8bb7a4d..6afc807ee2ad 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
| @@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
| 56 | 56 | ||
| 57 | local_bh_disable(); | 57 | local_bh_disable(); |
| 58 | orphans = percpu_counter_sum_positive(&tcp_orphan_count); | 58 | orphans = percpu_counter_sum_positive(&tcp_orphan_count); |
| 59 | sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); | 59 | sockets = proto_sockets_allocated_sum_positive(&tcp_prot); |
| 60 | local_bh_enable(); | 60 | local_bh_enable(); |
| 61 | 61 | ||
| 62 | socket_seq_show(seq); | 62 | socket_seq_show(seq); |
| 63 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", | 63 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", |
| 64 | sock_prot_inuse_get(net, &tcp_prot), orphans, | 64 | sock_prot_inuse_get(net, &tcp_prot), orphans, |
| 65 | tcp_death_row.tw_count, sockets, | 65 | tcp_death_row.tw_count, sockets, |
| 66 | atomic_long_read(&tcp_memory_allocated)); | 66 | proto_memory_allocated(&tcp_prot)); |
| 67 | seq_printf(seq, "UDP: inuse %d mem %ld\n", | 67 | seq_printf(seq, "UDP: inuse %d mem %ld\n", |
| 68 | sock_prot_inuse_get(net, &udp_prot), | 68 | sock_prot_inuse_get(net, &udp_prot), |
| 69 | atomic_long_read(&udp_memory_allocated)); | 69 | proto_memory_allocated(&udp_prot)); |
| 70 | seq_printf(seq, "UDPLITE: inuse %d\n", | 70 | seq_printf(seq, "UDPLITE: inuse %d\n", |
| 71 | sock_prot_inuse_get(net, &udplite_prot)); | 71 | sock_prot_inuse_get(net, &udplite_prot)); |
| 72 | seq_printf(seq, "RAW: inuse %d\n", | 72 | seq_printf(seq, "RAW: inuse %d\n", |
| @@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
| 216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), | 216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), |
| 217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), | 217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), |
| 218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), | 218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), |
| 219 | SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS), | ||
| 220 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), | 219 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), |
| 221 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), | 220 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), |
| 222 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), | 221 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), |
| @@ -288,7 +287,7 @@ static void icmpmsg_put(struct seq_file *seq) | |||
| 288 | 287 | ||
| 289 | count = 0; | 288 | count = 0; |
| 290 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { | 289 | for (i = 0; i < ICMPMSG_MIB_MAX; i++) { |
| 291 | val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); | 290 | val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]); |
| 292 | if (val) { | 291 | if (val) { |
| 293 | type[count] = i; | 292 | type[count] = i; |
| 294 | vals[count++] = val; | 293 | vals[count++] = val; |
| @@ -307,6 +306,7 @@ static void icmp_put(struct seq_file *seq) | |||
| 307 | { | 306 | { |
| 308 | int i; | 307 | int i; |
| 309 | struct net *net = seq->private; | 308 | struct net *net = seq->private; |
| 309 | atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; | ||
| 310 | 310 | ||
| 311 | seq_puts(seq, "\nIcmp: InMsgs InErrors"); | 311 | seq_puts(seq, "\nIcmp: InMsgs InErrors"); |
| 312 | for (i=0; icmpmibmap[i].name != NULL; i++) | 312 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| @@ -319,15 +319,13 @@ static void icmp_put(struct seq_file *seq) | |||
| 319 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); | 319 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); |
| 320 | for (i=0; icmpmibmap[i].name != NULL; i++) | 320 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| 321 | seq_printf(seq, " %lu", | 321 | seq_printf(seq, " %lu", |
| 322 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, | 322 | atomic_long_read(ptr + icmpmibmap[i].index)); |
| 323 | icmpmibmap[i].index)); | ||
| 324 | seq_printf(seq, " %lu %lu", | 323 | seq_printf(seq, " %lu %lu", |
| 325 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), | 324 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), |
| 326 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); | 325 | snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); |
| 327 | for (i=0; icmpmibmap[i].name != NULL; i++) | 326 | for (i=0; icmpmibmap[i].name != NULL; i++) |
| 328 | seq_printf(seq, " %lu", | 327 | seq_printf(seq, " %lu", |
| 329 | snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, | 328 | atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); |
| 330 | icmpmibmap[i].index | 0x100)); | ||
| 331 | } | 329 | } |
| 332 | 330 | ||
| 333 | /* | 331 | /* |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 007e2eb769d3..3ccda5ae8a27 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
| @@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) | |||
| 292 | { | 292 | { |
| 293 | /* Charge it to the socket. */ | 293 | /* Charge it to the socket. */ |
| 294 | 294 | ||
| 295 | if (ip_queue_rcv_skb(sk, skb) < 0) { | 295 | ipv4_pktinfo_prepare(skb); |
| 296 | if (sock_queue_rcv_skb(sk, skb) < 0) { | ||
| 296 | kfree_skb(skb); | 297 | kfree_skb(skb); |
| 297 | return NET_RX_DROP; | 298 | return NET_RX_DROP; |
| 298 | } | 299 | } |
| @@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, | |||
| 327 | unsigned int iphlen; | 328 | unsigned int iphlen; |
| 328 | int err; | 329 | int err; |
| 329 | struct rtable *rt = *rtp; | 330 | struct rtable *rt = *rtp; |
| 331 | int hlen, tlen; | ||
| 330 | 332 | ||
| 331 | if (length > rt->dst.dev->mtu) { | 333 | if (length > rt->dst.dev->mtu) { |
| 332 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, | 334 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
| @@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, | |||
| 336 | if (flags&MSG_PROBE) | 338 | if (flags&MSG_PROBE) |
| 337 | goto out; | 339 | goto out; |
| 338 | 340 | ||
| 341 | hlen = LL_RESERVED_SPACE(rt->dst.dev); | ||
| 342 | tlen = rt->dst.dev->needed_tailroom; | ||
| 339 | skb = sock_alloc_send_skb(sk, | 343 | skb = sock_alloc_send_skb(sk, |
| 340 | length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, | 344 | length + hlen + tlen + 15, |
| 341 | flags & MSG_DONTWAIT, &err); | 345 | flags & MSG_DONTWAIT, &err); |
| 342 | if (skb == NULL) | 346 | if (skb == NULL) |
| 343 | goto error; | 347 | goto error; |
| 344 | skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); | 348 | skb_reserve(skb, hlen); |
| 345 | 349 | ||
| 346 | skb->priority = sk->sk_priority; | 350 | skb->priority = sk->sk_priority; |
| 347 | skb->mark = sk->sk_mark; | 351 | skb->mark = sk->sk_mark; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 94cdbc55ca7e..019774796174 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -109,7 +109,6 @@ | |||
| 109 | #ifdef CONFIG_SYSCTL | 109 | #ifdef CONFIG_SYSCTL |
| 110 | #include <linux/sysctl.h> | 110 | #include <linux/sysctl.h> |
| 111 | #endif | 111 | #endif |
| 112 | #include <net/atmclip.h> | ||
| 113 | #include <net/secure_seq.h> | 112 | #include <net/secure_seq.h> |
| 114 | 113 | ||
| 115 | #define RT_FL_TOS(oldflp4) \ | 114 | #define RT_FL_TOS(oldflp4) \ |
| @@ -133,7 +132,6 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | |||
| 133 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 132 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
| 134 | static int ip_rt_min_advmss __read_mostly = 256; | 133 | static int ip_rt_min_advmss __read_mostly = 256; |
| 135 | static int rt_chain_length_max __read_mostly = 20; | 134 | static int rt_chain_length_max __read_mostly = 20; |
| 136 | static int redirect_genid; | ||
| 137 | 135 | ||
| 138 | static struct delayed_work expires_work; | 136 | static struct delayed_work expires_work; |
| 139 | static unsigned long expires_ljiffies; | 137 | static unsigned long expires_ljiffies; |
| @@ -425,7 +423,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
| 425 | int len, HHUptod; | 423 | int len, HHUptod; |
| 426 | 424 | ||
| 427 | rcu_read_lock(); | 425 | rcu_read_lock(); |
| 428 | n = dst_get_neighbour(&r->dst); | 426 | n = dst_get_neighbour_noref(&r->dst); |
| 429 | HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; | 427 | HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; |
| 430 | rcu_read_unlock(); | 428 | rcu_read_unlock(); |
| 431 | 429 | ||
| @@ -938,7 +936,7 @@ static void rt_cache_invalidate(struct net *net) | |||
| 938 | 936 | ||
| 939 | get_random_bytes(&shuffle, sizeof(shuffle)); | 937 | get_random_bytes(&shuffle, sizeof(shuffle)); |
| 940 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); | 938 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
| 941 | redirect_genid++; | 939 | inetpeer_invalidate_tree(AF_INET); |
| 942 | } | 940 | } |
| 943 | 941 | ||
| 944 | /* | 942 | /* |
| @@ -1115,23 +1113,18 @@ static int slow_chain_length(const struct rtable *head) | |||
| 1115 | 1113 | ||
| 1116 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) | 1114 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) |
| 1117 | { | 1115 | { |
| 1118 | struct neigh_table *tbl = &arp_tbl; | ||
| 1119 | static const __be32 inaddr_any = 0; | 1116 | static const __be32 inaddr_any = 0; |
| 1120 | struct net_device *dev = dst->dev; | 1117 | struct net_device *dev = dst->dev; |
| 1121 | const __be32 *pkey = daddr; | 1118 | const __be32 *pkey = daddr; |
| 1122 | struct neighbour *n; | 1119 | struct neighbour *n; |
| 1123 | 1120 | ||
| 1124 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
| 1125 | if (dev->type == ARPHRD_ATM) | ||
| 1126 | tbl = clip_tbl_hook; | ||
| 1127 | #endif | ||
| 1128 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | 1121 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) |
| 1129 | pkey = &inaddr_any; | 1122 | pkey = &inaddr_any; |
| 1130 | 1123 | ||
| 1131 | n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); | 1124 | n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey); |
| 1132 | if (n) | 1125 | if (n) |
| 1133 | return n; | 1126 | return n; |
| 1134 | return neigh_create(tbl, pkey, dev); | 1127 | return neigh_create(&arp_tbl, pkey, dev); |
| 1135 | } | 1128 | } |
| 1136 | 1129 | ||
| 1137 | static int rt_bind_neighbour(struct rtable *rt) | 1130 | static int rt_bind_neighbour(struct rtable *rt) |
| @@ -1491,10 +1484,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1491 | 1484 | ||
| 1492 | peer = rt->peer; | 1485 | peer = rt->peer; |
| 1493 | if (peer) { | 1486 | if (peer) { |
| 1494 | if (peer->redirect_learned.a4 != new_gw || | 1487 | if (peer->redirect_learned.a4 != new_gw) { |
| 1495 | peer->redirect_genid != redirect_genid) { | ||
| 1496 | peer->redirect_learned.a4 = new_gw; | 1488 | peer->redirect_learned.a4 = new_gw; |
| 1497 | peer->redirect_genid = redirect_genid; | ||
| 1498 | atomic_inc(&__rt_peer_genid); | 1489 | atomic_inc(&__rt_peer_genid); |
| 1499 | } | 1490 | } |
| 1500 | check_peer_redir(&rt->dst, peer); | 1491 | check_peer_redir(&rt->dst, peer); |
| @@ -1799,8 +1790,6 @@ static void ipv4_validate_peer(struct rtable *rt) | |||
| 1799 | if (peer) { | 1790 | if (peer) { |
| 1800 | check_peer_pmtu(&rt->dst, peer); | 1791 | check_peer_pmtu(&rt->dst, peer); |
| 1801 | 1792 | ||
| 1802 | if (peer->redirect_genid != redirect_genid) | ||
| 1803 | peer->redirect_learned.a4 = 0; | ||
| 1804 | if (peer->redirect_learned.a4 && | 1793 | if (peer->redirect_learned.a4 && |
| 1805 | peer->redirect_learned.a4 != rt->rt_gateway) | 1794 | peer->redirect_learned.a4 != rt->rt_gateway) |
| 1806 | check_peer_redir(&rt->dst, peer); | 1795 | check_peer_redir(&rt->dst, peer); |
| @@ -1964,8 +1953,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | |||
| 1964 | dst_init_metrics(&rt->dst, peer->metrics, false); | 1953 | dst_init_metrics(&rt->dst, peer->metrics, false); |
| 1965 | 1954 | ||
| 1966 | check_peer_pmtu(&rt->dst, peer); | 1955 | check_peer_pmtu(&rt->dst, peer); |
| 1967 | if (peer->redirect_genid != redirect_genid) | 1956 | |
| 1968 | peer->redirect_learned.a4 = 0; | ||
| 1969 | if (peer->redirect_learned.a4 && | 1957 | if (peer->redirect_learned.a4 && |
| 1970 | peer->redirect_learned.a4 != rt->rt_gateway) { | 1958 | peer->redirect_learned.a4 != rt->rt_gateway) { |
| 1971 | rt->rt_gateway = peer->redirect_learned.a4; | 1959 | rt->rt_gateway = peer->redirect_learned.a4; |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 90f6544c13e2..eab2a7fb15d1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) | |||
| 245 | if (!sysctl_tcp_timestamps) | 245 | if (!sysctl_tcp_timestamps) |
| 246 | return false; | 246 | return false; |
| 247 | 247 | ||
| 248 | tcp_opt->sack_ok = (options >> 4) & 0x1; | 248 | tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0; |
| 249 | *ecn_ok = (options >> 5) & 1; | 249 | *ecn_ok = (options >> 5) & 1; |
| 250 | if (*ecn_ok && !sysctl_tcp_ecn) | 250 | if (*ecn_ok && !sysctl_tcp_ecn) |
| 251 | return false; | 251 | return false; |
| @@ -278,6 +278,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 278 | struct rtable *rt; | 278 | struct rtable *rt; |
| 279 | __u8 rcv_wscale; | 279 | __u8 rcv_wscale; |
| 280 | bool ecn_ok = false; | 280 | bool ecn_ok = false; |
| 281 | struct flowi4 fl4; | ||
| 281 | 282 | ||
| 282 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) | 283 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) |
| 283 | goto out; | 284 | goto out; |
| @@ -346,20 +347,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 346 | * hasn't changed since we received the original syn, but I see | 347 | * hasn't changed since we received the original syn, but I see |
| 347 | * no easy way to do this. | 348 | * no easy way to do this. |
| 348 | */ | 349 | */ |
| 349 | { | 350 | flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), |
| 350 | struct flowi4 fl4; | 351 | RT_SCOPE_UNIVERSE, IPPROTO_TCP, |
| 351 | 352 | inet_sk_flowi_flags(sk), | |
| 352 | flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), | 353 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, |
| 353 | RT_SCOPE_UNIVERSE, IPPROTO_TCP, | 354 | ireq->loc_addr, th->source, th->dest); |
| 354 | inet_sk_flowi_flags(sk), | 355 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
| 355 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, | 356 | rt = ip_route_output_key(sock_net(sk), &fl4); |
| 356 | ireq->loc_addr, th->source, th->dest); | 357 | if (IS_ERR(rt)) { |
| 357 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 358 | reqsk_free(req); |
| 358 | rt = ip_route_output_key(sock_net(sk), &fl4); | 359 | goto out; |
| 359 | if (IS_ERR(rt)) { | ||
| 360 | reqsk_free(req); | ||
| 361 | goto out; | ||
| 362 | } | ||
| 363 | } | 360 | } |
| 364 | 361 | ||
| 365 | /* Try to redo what tcp_v4_send_synack did. */ | 362 | /* Try to redo what tcp_v4_send_synack did. */ |
| @@ -373,5 +370,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 373 | ireq->rcv_wscale = rcv_wscale; | 370 | ireq->rcv_wscale = rcv_wscale; |
| 374 | 371 | ||
| 375 | ret = get_cookie_sock(sk, skb, req, &rt->dst); | 372 | ret = get_cookie_sock(sk, skb, req, &rt->dst); |
| 373 | /* ip_queue_xmit() depends on our flow being setup | ||
| 374 | * Normal sockets get it right from inet_csk_route_child_sock() | ||
| 375 | */ | ||
| 376 | if (ret) | ||
| 377 | inet_sk(ret)->cork.fl.u.ip4 = fl4; | ||
| 376 | out: return ret; | 378 | out: return ret; |
| 377 | } | 379 | } |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201129a..7a7724da9bff 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/nsproxy.h> | 16 | #include <linux/nsproxy.h> |
| 17 | #include <linux/swap.h> | ||
| 17 | #include <net/snmp.h> | 18 | #include <net/snmp.h> |
| 18 | #include <net/icmp.h> | 19 | #include <net/icmp.h> |
| 19 | #include <net/ip.h> | 20 | #include <net/ip.h> |
| @@ -23,6 +24,7 @@ | |||
| 23 | #include <net/cipso_ipv4.h> | 24 | #include <net/cipso_ipv4.h> |
| 24 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
| 25 | #include <net/ping.h> | 26 | #include <net/ping.h> |
| 27 | #include <net/tcp_memcontrol.h> | ||
| 26 | 28 | ||
| 27 | static int zero; | 29 | static int zero; |
| 28 | static int tcp_retr1_max = 255; | 30 | static int tcp_retr1_max = 255; |
| @@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, | |||
| 73 | } | 75 | } |
| 74 | 76 | ||
| 75 | 77 | ||
| 76 | void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) | 78 | static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) |
| 77 | { | 79 | { |
| 78 | gid_t *data = table->data; | 80 | gid_t *data = table->data; |
| 79 | unsigned seq; | 81 | unsigned seq; |
| @@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t | |||
| 86 | } | 88 | } |
| 87 | 89 | ||
| 88 | /* Update system visible IP port range */ | 90 | /* Update system visible IP port range */ |
| 89 | static void set_ping_group_range(struct ctl_table *table, int range[2]) | 91 | static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) |
| 90 | { | 92 | { |
| 91 | gid_t *data = table->data; | 93 | gid_t *data = table->data; |
| 92 | write_seqlock(&sysctl_local_ports.lock); | 94 | write_seqlock(&sysctl_local_ports.lock); |
| @@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl, | |||
| 174 | return ret; | 176 | return ret; |
| 175 | } | 177 | } |
| 176 | 178 | ||
| 179 | static int ipv4_tcp_mem(ctl_table *ctl, int write, | ||
| 180 | void __user *buffer, size_t *lenp, | ||
| 181 | loff_t *ppos) | ||
| 182 | { | ||
| 183 | int ret; | ||
| 184 | unsigned long vec[3]; | ||
| 185 | struct net *net = current->nsproxy->net_ns; | ||
| 186 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
| 187 | struct mem_cgroup *memcg; | ||
| 188 | #endif | ||
| 189 | |||
| 190 | ctl_table tmp = { | ||
| 191 | .data = &vec, | ||
| 192 | .maxlen = sizeof(vec), | ||
| 193 | .mode = ctl->mode, | ||
| 194 | }; | ||
| 195 | |||
| 196 | if (!write) { | ||
| 197 | ctl->data = &net->ipv4.sysctl_tcp_mem; | ||
| 198 | return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos); | ||
| 199 | } | ||
| 200 | |||
| 201 | ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); | ||
| 202 | if (ret) | ||
| 203 | return ret; | ||
| 204 | |||
| 205 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
| 206 | rcu_read_lock(); | ||
| 207 | memcg = mem_cgroup_from_task(current); | ||
| 208 | |||
| 209 | tcp_prot_mem(memcg, vec[0], 0); | ||
| 210 | tcp_prot_mem(memcg, vec[1], 1); | ||
| 211 | tcp_prot_mem(memcg, vec[2], 2); | ||
| 212 | rcu_read_unlock(); | ||
| 213 | #endif | ||
| 214 | |||
| 215 | net->ipv4.sysctl_tcp_mem[0] = vec[0]; | ||
| 216 | net->ipv4.sysctl_tcp_mem[1] = vec[1]; | ||
| 217 | net->ipv4.sysctl_tcp_mem[2] = vec[2]; | ||
| 218 | |||
| 219 | return 0; | ||
| 220 | } | ||
| 221 | |||
| 177 | static struct ctl_table ipv4_table[] = { | 222 | static struct ctl_table ipv4_table[] = { |
| 178 | { | 223 | { |
| 179 | .procname = "tcp_timestamps", | 224 | .procname = "tcp_timestamps", |
| @@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = { | |||
| 433 | .proc_handler = proc_dointvec | 478 | .proc_handler = proc_dointvec |
| 434 | }, | 479 | }, |
| 435 | { | 480 | { |
| 436 | .procname = "tcp_mem", | ||
| 437 | .data = &sysctl_tcp_mem, | ||
| 438 | .maxlen = sizeof(sysctl_tcp_mem), | ||
| 439 | .mode = 0644, | ||
| 440 | .proc_handler = proc_doulongvec_minmax | ||
| 441 | }, | ||
| 442 | { | ||
| 443 | .procname = "tcp_wmem", | 481 | .procname = "tcp_wmem", |
| 444 | .data = &sysctl_tcp_wmem, | 482 | .data = &sysctl_tcp_wmem, |
| 445 | .maxlen = sizeof(sysctl_tcp_wmem), | 483 | .maxlen = sizeof(sysctl_tcp_wmem), |
| @@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = { | |||
| 721 | .mode = 0644, | 759 | .mode = 0644, |
| 722 | .proc_handler = ipv4_ping_group_range, | 760 | .proc_handler = ipv4_ping_group_range, |
| 723 | }, | 761 | }, |
| 762 | { | ||
| 763 | .procname = "tcp_mem", | ||
| 764 | .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), | ||
| 765 | .mode = 0644, | ||
| 766 | .proc_handler = ipv4_tcp_mem, | ||
| 767 | }, | ||
| 724 | { } | 768 | { } |
| 725 | }; | 769 | }; |
| 726 | 770 | ||
| @@ -769,6 +813,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
| 769 | 813 | ||
| 770 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; | 814 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; |
| 771 | 815 | ||
| 816 | tcp_init_mem(net); | ||
| 817 | |||
| 772 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 818 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, |
| 773 | net_ipv4_ctl_path, table); | 819 | net_ipv4_ctl_path, table); |
| 774 | if (net->ipv4.ipv4_hdr == NULL) | 820 | if (net->ipv4.ipv4_hdr == NULL) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 34f5db1e1c8b..22ef5f9fd2ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | |||
| 282 | struct percpu_counter tcp_orphan_count; | 282 | struct percpu_counter tcp_orphan_count; |
| 283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
| 284 | 284 | ||
| 285 | long sysctl_tcp_mem[3] __read_mostly; | ||
| 286 | int sysctl_tcp_wmem[3] __read_mostly; | 285 | int sysctl_tcp_wmem[3] __read_mostly; |
| 287 | int sysctl_tcp_rmem[3] __read_mostly; | 286 | int sysctl_tcp_rmem[3] __read_mostly; |
| 288 | 287 | ||
| 289 | EXPORT_SYMBOL(sysctl_tcp_mem); | ||
| 290 | EXPORT_SYMBOL(sysctl_tcp_rmem); | 288 | EXPORT_SYMBOL(sysctl_tcp_rmem); |
| 291 | EXPORT_SYMBOL(sysctl_tcp_wmem); | 289 | EXPORT_SYMBOL(sysctl_tcp_wmem); |
| 292 | 290 | ||
| @@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, | |||
| 888 | } | 886 | } |
| 889 | EXPORT_SYMBOL(tcp_sendpage); | 887 | EXPORT_SYMBOL(tcp_sendpage); |
| 890 | 888 | ||
| 891 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 889 | static inline int select_size(const struct sock *sk, bool sg) |
| 892 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | ||
| 893 | |||
| 894 | static inline int select_size(const struct sock *sk, int sg) | ||
| 895 | { | 890 | { |
| 896 | const struct tcp_sock *tp = tcp_sk(sk); | 891 | const struct tcp_sock *tp = tcp_sk(sk); |
| 897 | int tmp = tp->mss_cache; | 892 | int tmp = tp->mss_cache; |
| 898 | 893 | ||
| 899 | if (sg) { | 894 | if (sg) { |
| 900 | if (sk_can_gso(sk)) | 895 | if (sk_can_gso(sk)) { |
| 901 | tmp = 0; | 896 | /* Small frames wont use a full page: |
| 902 | else { | 897 | * Payload will immediately follow tcp header. |
| 898 | */ | ||
| 899 | tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); | ||
| 900 | } else { | ||
| 903 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); | 901 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); |
| 904 | 902 | ||
| 905 | if (tmp >= pgbreak && | 903 | if (tmp >= pgbreak && |
| @@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 917 | struct iovec *iov; | 915 | struct iovec *iov; |
| 918 | struct tcp_sock *tp = tcp_sk(sk); | 916 | struct tcp_sock *tp = tcp_sk(sk); |
| 919 | struct sk_buff *skb; | 917 | struct sk_buff *skb; |
| 920 | int iovlen, flags; | 918 | int iovlen, flags, err, copied; |
| 921 | int mss_now, size_goal; | 919 | int mss_now, size_goal; |
| 922 | int sg, err, copied; | 920 | bool sg; |
| 923 | long timeo; | 921 | long timeo; |
| 924 | 922 | ||
| 925 | lock_sock(sk); | 923 | lock_sock(sk); |
| @@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 946 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 944 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
| 947 | goto out_err; | 945 | goto out_err; |
| 948 | 946 | ||
| 949 | sg = sk->sk_route_caps & NETIF_F_SG; | 947 | sg = !!(sk->sk_route_caps & NETIF_F_SG); |
| 950 | 948 | ||
| 951 | while (--iovlen >= 0) { | 949 | while (--iovlen >= 0) { |
| 952 | size_t seglen = iov->iov_len; | 950 | size_t seglen = iov->iov_len; |
| @@ -1005,8 +1003,13 @@ new_segment: | |||
| 1005 | } else { | 1003 | } else { |
| 1006 | int merge = 0; | 1004 | int merge = 0; |
| 1007 | int i = skb_shinfo(skb)->nr_frags; | 1005 | int i = skb_shinfo(skb)->nr_frags; |
| 1008 | struct page *page = TCP_PAGE(sk); | 1006 | struct page *page = sk->sk_sndmsg_page; |
| 1009 | int off = TCP_OFF(sk); | 1007 | int off; |
| 1008 | |||
| 1009 | if (page && page_count(page) == 1) | ||
| 1010 | sk->sk_sndmsg_off = 0; | ||
| 1011 | |||
| 1012 | off = sk->sk_sndmsg_off; | ||
| 1010 | 1013 | ||
| 1011 | if (skb_can_coalesce(skb, i, page, off) && | 1014 | if (skb_can_coalesce(skb, i, page, off) && |
| 1012 | off != PAGE_SIZE) { | 1015 | off != PAGE_SIZE) { |
| @@ -1023,7 +1026,7 @@ new_segment: | |||
| 1023 | } else if (page) { | 1026 | } else if (page) { |
| 1024 | if (off == PAGE_SIZE) { | 1027 | if (off == PAGE_SIZE) { |
| 1025 | put_page(page); | 1028 | put_page(page); |
| 1026 | TCP_PAGE(sk) = page = NULL; | 1029 | sk->sk_sndmsg_page = page = NULL; |
| 1027 | off = 0; | 1030 | off = 0; |
| 1028 | } | 1031 | } |
| 1029 | } else | 1032 | } else |
| @@ -1049,9 +1052,9 @@ new_segment: | |||
| 1049 | /* If this page was new, give it to the | 1052 | /* If this page was new, give it to the |
| 1050 | * socket so it does not get leaked. | 1053 | * socket so it does not get leaked. |
| 1051 | */ | 1054 | */ |
| 1052 | if (!TCP_PAGE(sk)) { | 1055 | if (!sk->sk_sndmsg_page) { |
| 1053 | TCP_PAGE(sk) = page; | 1056 | sk->sk_sndmsg_page = page; |
| 1054 | TCP_OFF(sk) = 0; | 1057 | sk->sk_sndmsg_off = 0; |
| 1055 | } | 1058 | } |
| 1056 | goto do_error; | 1059 | goto do_error; |
| 1057 | } | 1060 | } |
| @@ -1061,15 +1064,15 @@ new_segment: | |||
| 1061 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | 1064 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); |
| 1062 | } else { | 1065 | } else { |
| 1063 | skb_fill_page_desc(skb, i, page, off, copy); | 1066 | skb_fill_page_desc(skb, i, page, off, copy); |
| 1064 | if (TCP_PAGE(sk)) { | 1067 | if (sk->sk_sndmsg_page) { |
| 1065 | get_page(page); | 1068 | get_page(page); |
| 1066 | } else if (off + copy < PAGE_SIZE) { | 1069 | } else if (off + copy < PAGE_SIZE) { |
| 1067 | get_page(page); | 1070 | get_page(page); |
| 1068 | TCP_PAGE(sk) = page; | 1071 | sk->sk_sndmsg_page = page; |
| 1069 | } | 1072 | } |
| 1070 | } | 1073 | } |
| 1071 | 1074 | ||
| 1072 | TCP_OFF(sk) = off + copy; | 1075 | sk->sk_sndmsg_off = off + copy; |
| 1073 | } | 1076 | } |
| 1074 | 1077 | ||
| 1075 | if (!copied) | 1078 | if (!copied) |
| @@ -1873,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how) | |||
| 1873 | } | 1876 | } |
| 1874 | EXPORT_SYMBOL(tcp_shutdown); | 1877 | EXPORT_SYMBOL(tcp_shutdown); |
| 1875 | 1878 | ||
| 1879 | bool tcp_check_oom(struct sock *sk, int shift) | ||
| 1880 | { | ||
| 1881 | bool too_many_orphans, out_of_socket_memory; | ||
| 1882 | |||
| 1883 | too_many_orphans = tcp_too_many_orphans(sk, shift); | ||
| 1884 | out_of_socket_memory = tcp_out_of_memory(sk); | ||
| 1885 | |||
| 1886 | if (too_many_orphans && net_ratelimit()) | ||
| 1887 | pr_info("TCP: too many orphaned sockets\n"); | ||
| 1888 | if (out_of_socket_memory && net_ratelimit()) | ||
| 1889 | pr_info("TCP: out of memory -- consider tuning tcp_mem\n"); | ||
| 1890 | return too_many_orphans || out_of_socket_memory; | ||
| 1891 | } | ||
| 1892 | |||
| 1876 | void tcp_close(struct sock *sk, long timeout) | 1893 | void tcp_close(struct sock *sk, long timeout) |
| 1877 | { | 1894 | { |
| 1878 | struct sk_buff *skb; | 1895 | struct sk_buff *skb; |
| @@ -2012,10 +2029,7 @@ adjudge_to_death: | |||
| 2012 | } | 2029 | } |
| 2013 | if (sk->sk_state != TCP_CLOSE) { | 2030 | if (sk->sk_state != TCP_CLOSE) { |
| 2014 | sk_mem_reclaim(sk); | 2031 | sk_mem_reclaim(sk); |
| 2015 | if (tcp_too_many_orphans(sk, 0)) { | 2032 | if (tcp_check_oom(sk, 0)) { |
| 2016 | if (net_ratelimit()) | ||
| 2017 | printk(KERN_INFO "TCP: too many of orphaned " | ||
| 2018 | "sockets\n"); | ||
| 2019 | tcp_set_state(sk, TCP_CLOSE); | 2033 | tcp_set_state(sk, TCP_CLOSE); |
| 2020 | tcp_send_active_reset(sk, GFP_ATOMIC); | 2034 | tcp_send_active_reset(sk, GFP_ATOMIC); |
| 2021 | NET_INC_STATS_BH(sock_net(sk), | 2035 | NET_INC_STATS_BH(sock_net(sk), |
| @@ -2653,7 +2667,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
| 2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2667 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
| 2654 | #endif | 2668 | #endif |
| 2655 | 2669 | ||
| 2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) | 2670 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, |
| 2671 | netdev_features_t features) | ||
| 2657 | { | 2672 | { |
| 2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2673 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 2659 | struct tcphdr *th; | 2674 | struct tcphdr *th; |
| @@ -3212,11 +3227,21 @@ static int __init set_thash_entries(char *str) | |||
| 3212 | } | 3227 | } |
| 3213 | __setup("thash_entries=", set_thash_entries); | 3228 | __setup("thash_entries=", set_thash_entries); |
| 3214 | 3229 | ||
| 3230 | void tcp_init_mem(struct net *net) | ||
| 3231 | { | ||
| 3232 | unsigned long limit = nr_free_buffer_pages() / 8; | ||
| 3233 | limit = max(limit, 128UL); | ||
| 3234 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; | ||
| 3235 | net->ipv4.sysctl_tcp_mem[1] = limit; | ||
| 3236 | net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; | ||
| 3237 | } | ||
| 3238 | |||
| 3215 | void __init tcp_init(void) | 3239 | void __init tcp_init(void) |
| 3216 | { | 3240 | { |
| 3217 | struct sk_buff *skb = NULL; | 3241 | struct sk_buff *skb = NULL; |
| 3218 | unsigned long limit; | 3242 | unsigned long limit; |
| 3219 | int i, max_share, cnt; | 3243 | int max_share, cnt; |
| 3244 | unsigned int i; | ||
| 3220 | unsigned long jiffy = jiffies; | 3245 | unsigned long jiffy = jiffies; |
| 3221 | 3246 | ||
| 3222 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3247 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
| @@ -3259,7 +3284,7 @@ void __init tcp_init(void) | |||
| 3259 | &tcp_hashinfo.bhash_size, | 3284 | &tcp_hashinfo.bhash_size, |
| 3260 | NULL, | 3285 | NULL, |
| 3261 | 64 * 1024); | 3286 | 64 * 1024); |
| 3262 | tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; | 3287 | tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; |
| 3263 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { | 3288 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { |
| 3264 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); | 3289 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); |
| 3265 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); | 3290 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
| @@ -3272,14 +3297,10 @@ void __init tcp_init(void) | |||
| 3272 | sysctl_tcp_max_orphans = cnt / 2; | 3297 | sysctl_tcp_max_orphans = cnt / 2; |
| 3273 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3298 | sysctl_max_syn_backlog = max(128, cnt / 256); |
| 3274 | 3299 | ||
| 3275 | limit = nr_free_buffer_pages() / 8; | 3300 | tcp_init_mem(&init_net); |
| 3276 | limit = max(limit, 128UL); | ||
| 3277 | sysctl_tcp_mem[0] = limit / 4 * 3; | ||
| 3278 | sysctl_tcp_mem[1] = limit; | ||
| 3279 | sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; | ||
| 3280 | |||
| 3281 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | 3301 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ |
| 3282 | limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); | 3302 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); |
| 3303 | limit = max(limit, 128UL); | ||
| 3283 | max_share = min(4UL*1024*1024, limit); | 3304 | max_share = min(4UL*1024*1024, limit); |
| 3284 | 3305 | ||
| 3285 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; | 3306 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 6187eb4d1dcf..f45e1c242440 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
| @@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
| 63 | { | 63 | { |
| 64 | ca->cnt = 0; | 64 | ca->cnt = 0; |
| 65 | ca->last_max_cwnd = 0; | 65 | ca->last_max_cwnd = 0; |
| 66 | ca->loss_cwnd = 0; | ||
| 67 | ca->last_cwnd = 0; | 66 | ca->last_cwnd = 0; |
| 68 | ca->last_time = 0; | 67 | ca->last_time = 0; |
| 69 | ca->epoch_start = 0; | 68 | ca->epoch_start = 0; |
| @@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
| 72 | 71 | ||
| 73 | static void bictcp_init(struct sock *sk) | 72 | static void bictcp_init(struct sock *sk) |
| 74 | { | 73 | { |
| 75 | bictcp_reset(inet_csk_ca(sk)); | 74 | struct bictcp *ca = inet_csk_ca(sk); |
| 75 | |||
| 76 | bictcp_reset(ca); | ||
| 77 | ca->loss_cwnd = 0; | ||
| 78 | |||
| 76 | if (initial_ssthresh) | 79 | if (initial_ssthresh) |
| 77 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 80 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
| 78 | } | 81 | } |
| @@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 127 | } | 130 | } |
| 128 | 131 | ||
| 129 | /* if in slow start or link utilization is very low */ | 132 | /* if in slow start or link utilization is very low */ |
| 130 | if (ca->loss_cwnd == 0) { | 133 | if (ca->last_max_cwnd == 0) { |
| 131 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ | 134 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ |
| 132 | ca->cnt = 20; | 135 | ca->cnt = 20; |
| 133 | } | 136 | } |
| @@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
| 185 | { | 188 | { |
| 186 | const struct tcp_sock *tp = tcp_sk(sk); | 189 | const struct tcp_sock *tp = tcp_sk(sk); |
| 187 | const struct bictcp *ca = inet_csk_ca(sk); | 190 | const struct bictcp *ca = inet_csk_ca(sk); |
| 188 | return max(tp->snd_cwnd, ca->last_max_cwnd); | 191 | return max(tp->snd_cwnd, ca->loss_cwnd); |
| 189 | } | 192 | } |
| 190 | 193 | ||
| 191 | static void bictcp_state(struct sock *sk, u8 new_state) | 194 | static void bictcp_state(struct sock *sk, u8 new_state) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 850c737e08e2..fc6d475f488f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | |||
| 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && |
| 293 | left * tp->mss_cache < sk->sk_gso_max_size) | 293 | left * tp->mss_cache < sk->sk_gso_max_size) |
| 294 | return 1; | 294 | return 1; |
| 295 | return left <= tcp_max_burst(tp); | 295 | return left <= tcp_max_tso_deferred_mss(tp); |
| 296 | } | 296 | } |
| 297 | EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); | 297 | EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); |
| 298 | 298 | ||
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f376b05cca81..a9077f441cb2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
| @@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
| 107 | { | 107 | { |
| 108 | ca->cnt = 0; | 108 | ca->cnt = 0; |
| 109 | ca->last_max_cwnd = 0; | 109 | ca->last_max_cwnd = 0; |
| 110 | ca->loss_cwnd = 0; | ||
| 111 | ca->last_cwnd = 0; | 110 | ca->last_cwnd = 0; |
| 112 | ca->last_time = 0; | 111 | ca->last_time = 0; |
| 113 | ca->bic_origin_point = 0; | 112 | ca->bic_origin_point = 0; |
| @@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk) | |||
| 142 | 141 | ||
| 143 | static void bictcp_init(struct sock *sk) | 142 | static void bictcp_init(struct sock *sk) |
| 144 | { | 143 | { |
| 145 | bictcp_reset(inet_csk_ca(sk)); | 144 | struct bictcp *ca = inet_csk_ca(sk); |
| 145 | |||
| 146 | bictcp_reset(ca); | ||
| 147 | ca->loss_cwnd = 0; | ||
| 146 | 148 | ||
| 147 | if (hystart) | 149 | if (hystart) |
| 148 | bictcp_hystart_reset(sk); | 150 | bictcp_hystart_reset(sk); |
| @@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 275 | * The initial growth of cubic function may be too conservative | 277 | * The initial growth of cubic function may be too conservative |
| 276 | * when the available bandwidth is still unknown. | 278 | * when the available bandwidth is still unknown. |
| 277 | */ | 279 | */ |
| 278 | if (ca->loss_cwnd == 0 && ca->cnt > 20) | 280 | if (ca->last_max_cwnd == 0 && ca->cnt > 20) |
| 279 | ca->cnt = 20; /* increase cwnd 5% per RTT */ | 281 | ca->cnt = 20; /* increase cwnd 5% per RTT */ |
| 280 | 282 | ||
| 281 | /* TCP Friendly */ | 283 | /* TCP Friendly */ |
| @@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
| 342 | { | 344 | { |
| 343 | struct bictcp *ca = inet_csk_ca(sk); | 345 | struct bictcp *ca = inet_csk_ca(sk); |
| 344 | 346 | ||
| 345 | return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); | 347 | return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); |
| 346 | } | 348 | } |
| 347 | 349 | ||
| 348 | static void bictcp_state(struct sock *sk, u8 new_state) | 350 | static void bictcp_state(struct sock *sk, u8 new_state) |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 939edb3b8e4d..ed3f2ad42e0f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
| @@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | |||
| 34 | tcp_get_info(sk, info); | 34 | tcp_get_info(sk, info); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 38 | struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 39 | { | ||
| 40 | inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); | ||
| 41 | } | ||
| 42 | |||
| 43 | static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, | ||
| 44 | struct inet_diag_req_v2 *req) | ||
| 45 | { | ||
| 46 | return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); | ||
| 47 | } | ||
| 48 | |||
| 37 | static const struct inet_diag_handler tcp_diag_handler = { | 49 | static const struct inet_diag_handler tcp_diag_handler = { |
| 38 | .idiag_hashinfo = &tcp_hashinfo, | 50 | .dump = tcp_diag_dump, |
| 51 | .dump_one = tcp_diag_dump_one, | ||
| 39 | .idiag_get_info = tcp_diag_get_info, | 52 | .idiag_get_info = tcp_diag_get_info, |
| 40 | .idiag_type = TCPDIAG_GETSOCK, | 53 | .idiag_type = IPPROTO_TCP, |
| 41 | .idiag_info_size = sizeof(struct tcp_info), | ||
| 42 | }; | 54 | }; |
| 43 | 55 | ||
| 44 | static int __init tcp_diag_init(void) | 56 | static int __init tcp_diag_init(void) |
| @@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void) | |||
| 54 | module_init(tcp_diag_init); | 66 | module_init(tcp_diag_init); |
| 55 | module_exit(tcp_diag_exit); | 67 | module_exit(tcp_diag_exit); |
| 56 | MODULE_LICENSE("GPL"); | 68 | MODULE_LICENSE("GPL"); |
| 57 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); | 69 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 52b5c2d0ecd0..b5e315f13641 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; | |||
| 105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ | 105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ |
| 106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ | 106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ |
| 107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
| 108 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | ||
| 109 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 108 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
| 110 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | 109 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ |
| 111 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ | 110 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ |
| @@ -322,7 +321,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) | |||
| 322 | /* Check #1 */ | 321 | /* Check #1 */ |
| 323 | if (tp->rcv_ssthresh < tp->window_clamp && | 322 | if (tp->rcv_ssthresh < tp->window_clamp && |
| 324 | (int)tp->rcv_ssthresh < tcp_space(sk) && | 323 | (int)tp->rcv_ssthresh < tcp_space(sk) && |
| 325 | !tcp_memory_pressure) { | 324 | !sk_under_memory_pressure(sk)) { |
| 326 | int incr; | 325 | int incr; |
| 327 | 326 | ||
| 328 | /* Check #2. Increase window, if skb with such overhead | 327 | /* Check #2. Increase window, if skb with such overhead |
| @@ -411,8 +410,8 @@ static void tcp_clamp_window(struct sock *sk) | |||
| 411 | 410 | ||
| 412 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 411 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
| 413 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 412 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
| 414 | !tcp_memory_pressure && | 413 | !sk_under_memory_pressure(sk) && |
| 415 | atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | 414 | sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { |
| 416 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 415 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
| 417 | sysctl_tcp_rmem[2]); | 416 | sysctl_tcp_rmem[2]); |
| 418 | } | 417 | } |
| @@ -865,13 +864,13 @@ static void tcp_disable_fack(struct tcp_sock *tp) | |||
| 865 | /* RFC3517 uses different metric in lost marker => reset on change */ | 864 | /* RFC3517 uses different metric in lost marker => reset on change */ |
| 866 | if (tcp_is_fack(tp)) | 865 | if (tcp_is_fack(tp)) |
| 867 | tp->lost_skb_hint = NULL; | 866 | tp->lost_skb_hint = NULL; |
| 868 | tp->rx_opt.sack_ok &= ~2; | 867 | tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; |
| 869 | } | 868 | } |
| 870 | 869 | ||
| 871 | /* Take a notice that peer is sending D-SACKs */ | 870 | /* Take a notice that peer is sending D-SACKs */ |
| 872 | static void tcp_dsack_seen(struct tcp_sock *tp) | 871 | static void tcp_dsack_seen(struct tcp_sock *tp) |
| 873 | { | 872 | { |
| 874 | tp->rx_opt.sack_ok |= 4; | 873 | tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; |
| 875 | } | 874 | } |
| 876 | 875 | ||
| 877 | /* Initialize metrics on socket. */ | 876 | /* Initialize metrics on socket. */ |
| @@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, | |||
| 1040 | * These 6 states form finite state machine, controlled by the following events: | 1039 | * These 6 states form finite state machine, controlled by the following events: |
| 1041 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) | 1040 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) |
| 1042 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) | 1041 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) |
| 1043 | * 3. Loss detection event of one of three flavors: | 1042 | * 3. Loss detection event of two flavors: |
| 1044 | * A. Scoreboard estimator decided the packet is lost. | 1043 | * A. Scoreboard estimator decided the packet is lost. |
| 1045 | * A'. Reno "three dupacks" marks head of queue lost. | 1044 | * A'. Reno "three dupacks" marks head of queue lost. |
| 1046 | * A''. Its FACK modfication, head until snd.fack is lost. | 1045 | * A''. Its FACK modification, head until snd.fack is lost. |
| 1047 | * B. SACK arrives sacking data transmitted after never retransmitted | 1046 | * B. SACK arrives sacking SND.NXT at the moment, when the |
| 1048 | * hole was sent out. | ||
| 1049 | * C. SACK arrives sacking SND.NXT at the moment, when the | ||
| 1050 | * segment was retransmitted. | 1047 | * segment was retransmitted. |
| 1051 | * 4. D-SACK added new rule: D-SACK changes any tag to S. | 1048 | * 4. D-SACK added new rule: D-SACK changes any tag to S. |
| 1052 | * | 1049 | * |
| @@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | |||
| 1153 | } | 1150 | } |
| 1154 | 1151 | ||
| 1155 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". | 1152 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". |
| 1156 | * Event "C". Later note: FACK people cheated me again 8), we have to account | 1153 | * Event "B". Later note: FACK people cheated me again 8), we have to account |
| 1157 | * for reordering! Ugly, but should help. | 1154 | * for reordering! Ugly, but should help. |
| 1158 | * | 1155 | * |
| 1159 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was | 1156 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was |
| @@ -1310,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
| 1310 | return in_sack; | 1307 | return in_sack; |
| 1311 | } | 1308 | } |
| 1312 | 1309 | ||
| 1313 | static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | 1310 | /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ |
| 1314 | struct tcp_sacktag_state *state, | 1311 | static u8 tcp_sacktag_one(struct sock *sk, |
| 1312 | struct tcp_sacktag_state *state, u8 sacked, | ||
| 1313 | u32 start_seq, u32 end_seq, | ||
| 1315 | int dup_sack, int pcount) | 1314 | int dup_sack, int pcount) |
| 1316 | { | 1315 | { |
| 1317 | struct tcp_sock *tp = tcp_sk(sk); | 1316 | struct tcp_sock *tp = tcp_sk(sk); |
| 1318 | u8 sacked = TCP_SKB_CB(skb)->sacked; | ||
| 1319 | int fack_count = state->fack_count; | 1317 | int fack_count = state->fack_count; |
| 1320 | 1318 | ||
| 1321 | /* Account D-SACK for retransmitted packet. */ | 1319 | /* Account D-SACK for retransmitted packet. */ |
| 1322 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1320 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
| 1323 | if (tp->undo_marker && tp->undo_retrans && | 1321 | if (tp->undo_marker && tp->undo_retrans && |
| 1324 | after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1322 | after(end_seq, tp->undo_marker)) |
| 1325 | tp->undo_retrans--; | 1323 | tp->undo_retrans--; |
| 1326 | if (sacked & TCPCB_SACKED_ACKED) | 1324 | if (sacked & TCPCB_SACKED_ACKED) |
| 1327 | state->reord = min(fack_count, state->reord); | 1325 | state->reord = min(fack_count, state->reord); |
| 1328 | } | 1326 | } |
| 1329 | 1327 | ||
| 1330 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ | 1328 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ |
| 1331 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1329 | if (!after(end_seq, tp->snd_una)) |
| 1332 | return sacked; | 1330 | return sacked; |
| 1333 | 1331 | ||
| 1334 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 1332 | if (!(sacked & TCPCB_SACKED_ACKED)) { |
| @@ -1347,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
| 1347 | /* New sack for not retransmitted frame, | 1345 | /* New sack for not retransmitted frame, |
| 1348 | * which was in hole. It is reordering. | 1346 | * which was in hole. It is reordering. |
| 1349 | */ | 1347 | */ |
| 1350 | if (before(TCP_SKB_CB(skb)->seq, | 1348 | if (before(start_seq, |
| 1351 | tcp_highest_sack_seq(tp))) | 1349 | tcp_highest_sack_seq(tp))) |
| 1352 | state->reord = min(fack_count, | 1350 | state->reord = min(fack_count, |
| 1353 | state->reord); | 1351 | state->reord); |
| 1354 | 1352 | ||
| 1355 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1353 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ |
| 1356 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) | 1354 | if (!after(end_seq, tp->frto_highmark)) |
| 1357 | state->flag |= FLAG_ONLY_ORIG_SACKED; | 1355 | state->flag |= FLAG_ONLY_ORIG_SACKED; |
| 1358 | } | 1356 | } |
| 1359 | 1357 | ||
| @@ -1371,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
| 1371 | 1369 | ||
| 1372 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1370 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
| 1373 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1371 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && |
| 1374 | before(TCP_SKB_CB(skb)->seq, | 1372 | before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
| 1375 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) | ||
| 1376 | tp->lost_cnt_hint += pcount; | 1373 | tp->lost_cnt_hint += pcount; |
| 1377 | 1374 | ||
| 1378 | if (fack_count > tp->fackets_out) | 1375 | if (fack_count > tp->fackets_out) |
| @@ -1391,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
| 1391 | return sacked; | 1388 | return sacked; |
| 1392 | } | 1389 | } |
| 1393 | 1390 | ||
| 1391 | /* Shift newly-SACKed bytes from this skb to the immediately previous | ||
| 1392 | * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. | ||
| 1393 | */ | ||
| 1394 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | 1394 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, |
| 1395 | struct tcp_sacktag_state *state, | 1395 | struct tcp_sacktag_state *state, |
| 1396 | unsigned int pcount, int shifted, int mss, | 1396 | unsigned int pcount, int shifted, int mss, |
| @@ -1398,9 +1398,20 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
| 1398 | { | 1398 | { |
| 1399 | struct tcp_sock *tp = tcp_sk(sk); | 1399 | struct tcp_sock *tp = tcp_sk(sk); |
| 1400 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | 1400 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); |
| 1401 | u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ | ||
| 1402 | u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ | ||
| 1401 | 1403 | ||
| 1402 | BUG_ON(!pcount); | 1404 | BUG_ON(!pcount); |
| 1403 | 1405 | ||
| 1406 | /* Adjust counters and hints for the newly sacked sequence | ||
| 1407 | * range but discard the return value since prev is already | ||
| 1408 | * marked. We must tag the range first because the seq | ||
| 1409 | * advancement below implicitly advances | ||
| 1410 | * tcp_highest_sack_seq() when skb is highest_sack. | ||
| 1411 | */ | ||
| 1412 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | ||
| 1413 | start_seq, end_seq, dup_sack, pcount); | ||
| 1414 | |||
| 1404 | if (skb == tp->lost_skb_hint) | 1415 | if (skb == tp->lost_skb_hint) |
| 1405 | tp->lost_cnt_hint += pcount; | 1416 | tp->lost_cnt_hint += pcount; |
| 1406 | 1417 | ||
| @@ -1427,9 +1438,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
| 1427 | skb_shinfo(skb)->gso_type = 0; | 1438 | skb_shinfo(skb)->gso_type = 0; |
| 1428 | } | 1439 | } |
| 1429 | 1440 | ||
| 1430 | /* We discard results */ | ||
| 1431 | tcp_sacktag_one(skb, sk, state, dup_sack, pcount); | ||
| 1432 | |||
| 1433 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1441 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
| 1434 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1442 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
| 1435 | 1443 | ||
| @@ -1577,6 +1585,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | |||
| 1577 | } | 1585 | } |
| 1578 | } | 1586 | } |
| 1579 | 1587 | ||
| 1588 | /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ | ||
| 1589 | if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) | ||
| 1590 | goto fallback; | ||
| 1591 | |||
| 1580 | if (!skb_shift(prev, skb, len)) | 1592 | if (!skb_shift(prev, skb, len)) |
| 1581 | goto fallback; | 1593 | goto fallback; |
| 1582 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) | 1594 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) |
| @@ -1667,10 +1679,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
| 1667 | break; | 1679 | break; |
| 1668 | 1680 | ||
| 1669 | if (in_sack) { | 1681 | if (in_sack) { |
| 1670 | TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, | 1682 | TCP_SKB_CB(skb)->sacked = |
| 1671 | state, | 1683 | tcp_sacktag_one(sk, |
| 1672 | dup_sack, | 1684 | state, |
| 1673 | tcp_skb_pcount(skb)); | 1685 | TCP_SKB_CB(skb)->sacked, |
| 1686 | TCP_SKB_CB(skb)->seq, | ||
| 1687 | TCP_SKB_CB(skb)->end_seq, | ||
| 1688 | dup_sack, | ||
| 1689 | tcp_skb_pcount(skb)); | ||
| 1674 | 1690 | ||
| 1675 | if (!before(TCP_SKB_CB(skb)->seq, | 1691 | if (!before(TCP_SKB_CB(skb)->seq, |
| 1676 | tcp_highest_sack_seq(tp))) | 1692 | tcp_highest_sack_seq(tp))) |
| @@ -1844,10 +1860,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
| 1844 | if (found_dup_sack && ((i + 1) == first_sack_index)) | 1860 | if (found_dup_sack && ((i + 1) == first_sack_index)) |
| 1845 | next_dup = &sp[i + 1]; | 1861 | next_dup = &sp[i + 1]; |
| 1846 | 1862 | ||
| 1847 | /* Event "B" in the comment above. */ | ||
| 1848 | if (after(end_seq, tp->high_seq)) | ||
| 1849 | state.flag |= FLAG_DATA_LOST; | ||
| 1850 | |||
| 1851 | /* Skip too early cached blocks */ | 1863 | /* Skip too early cached blocks */ |
| 1852 | while (tcp_sack_cache_ok(tp, cache) && | 1864 | while (tcp_sack_cache_ok(tp, cache) && |
| 1853 | !before(start_seq, cache->end_seq)) | 1865 | !before(start_seq, cache->end_seq)) |
| @@ -2515,8 +2527,11 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
| 2515 | tcp_verify_left_out(tp); | 2527 | tcp_verify_left_out(tp); |
| 2516 | } | 2528 | } |
| 2517 | 2529 | ||
| 2518 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2530 | /* Detect loss in event "A" above by marking head of queue up as lost. |
| 2519 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2531 | * For FACK or non-SACK(Reno) senders, the first "packets" number of segments |
| 2532 | * are considered lost. For RFC3517 SACK, a segment is considered lost if it | ||
| 2533 | * has at least tp->reordering SACKed seqments above it; "packets" refers to | ||
| 2534 | * the maximum SACKed segments to pass before reaching this limit. | ||
| 2520 | */ | 2535 | */ |
| 2521 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | 2536 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
| 2522 | { | 2537 | { |
| @@ -2525,6 +2540,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
| 2525 | int cnt, oldcnt; | 2540 | int cnt, oldcnt; |
| 2526 | int err; | 2541 | int err; |
| 2527 | unsigned int mss; | 2542 | unsigned int mss; |
| 2543 | /* Use SACK to deduce losses of new sequences sent during recovery */ | ||
| 2544 | const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; | ||
| 2528 | 2545 | ||
| 2529 | WARN_ON(packets > tp->packets_out); | 2546 | WARN_ON(packets > tp->packets_out); |
| 2530 | if (tp->lost_skb_hint) { | 2547 | if (tp->lost_skb_hint) { |
| @@ -2546,7 +2563,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
| 2546 | tp->lost_skb_hint = skb; | 2563 | tp->lost_skb_hint = skb; |
| 2547 | tp->lost_cnt_hint = cnt; | 2564 | tp->lost_cnt_hint = cnt; |
| 2548 | 2565 | ||
| 2549 | if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | 2566 | if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) |
| 2550 | break; | 2567 | break; |
| 2551 | 2568 | ||
| 2552 | oldcnt = cnt; | 2569 | oldcnt = cnt; |
| @@ -2556,6 +2573,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
| 2556 | 2573 | ||
| 2557 | if (cnt > packets) { | 2574 | if (cnt > packets) { |
| 2558 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || | 2575 | if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || |
| 2576 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || | ||
| 2559 | (oldcnt >= packets)) | 2577 | (oldcnt >= packets)) |
| 2560 | break; | 2578 | break; |
| 2561 | 2579 | ||
| @@ -2663,7 +2681,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2663 | tp->snd_ssthresh, tp->prior_ssthresh, | 2681 | tp->snd_ssthresh, tp->prior_ssthresh, |
| 2664 | tp->packets_out); | 2682 | tp->packets_out); |
| 2665 | } | 2683 | } |
| 2666 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 2684 | #if IS_ENABLED(CONFIG_IPV6) |
| 2667 | else if (sk->sk_family == AF_INET6) { | 2685 | else if (sk->sk_family == AF_INET6) { |
| 2668 | struct ipv6_pinfo *np = inet6_sk(sk); | 2686 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 2669 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", | 2687 | printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", |
| @@ -2858,7 +2876,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
| 2858 | struct tcp_sock *tp = tcp_sk(sk); | 2876 | struct tcp_sock *tp = tcp_sk(sk); |
| 2859 | int state = TCP_CA_Open; | 2877 | int state = TCP_CA_Open; |
| 2860 | 2878 | ||
| 2861 | if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) | 2879 | if (tcp_left_out(tp) || tcp_any_retrans_done(sk)) |
| 2862 | state = TCP_CA_Disorder; | 2880 | state = TCP_CA_Disorder; |
| 2863 | 2881 | ||
| 2864 | if (inet_csk(sk)->icsk_ca_state != state) { | 2882 | if (inet_csk(sk)->icsk_ca_state != state) { |
| @@ -2881,7 +2899,8 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
| 2881 | 2899 | ||
| 2882 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { | 2900 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
| 2883 | tcp_try_keep_open(sk); | 2901 | tcp_try_keep_open(sk); |
| 2884 | tcp_moderate_cwnd(tp); | 2902 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
| 2903 | tcp_moderate_cwnd(tp); | ||
| 2885 | } else { | 2904 | } else { |
| 2886 | tcp_cwnd_down(sk, flag); | 2905 | tcp_cwnd_down(sk, flag); |
| 2887 | } | 2906 | } |
| @@ -3009,11 +3028,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | |||
| 3009 | * tcp_xmit_retransmit_queue(). | 3028 | * tcp_xmit_retransmit_queue(). |
| 3010 | */ | 3029 | */ |
| 3011 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | 3030 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, |
| 3012 | int newly_acked_sacked, int flag) | 3031 | int newly_acked_sacked, bool is_dupack, |
| 3032 | int flag) | ||
| 3013 | { | 3033 | { |
| 3014 | struct inet_connection_sock *icsk = inet_csk(sk); | 3034 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 3015 | struct tcp_sock *tp = tcp_sk(sk); | 3035 | struct tcp_sock *tp = tcp_sk(sk); |
| 3016 | int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | ||
| 3017 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && | 3036 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && |
| 3018 | (tcp_fackets_out(tp) > tp->reordering)); | 3037 | (tcp_fackets_out(tp) > tp->reordering)); |
| 3019 | int fast_rexmit = 0, mib_idx; | 3038 | int fast_rexmit = 0, mib_idx; |
| @@ -3032,19 +3051,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 3032 | if (tcp_check_sack_reneging(sk, flag)) | 3051 | if (tcp_check_sack_reneging(sk, flag)) |
| 3033 | return; | 3052 | return; |
| 3034 | 3053 | ||
| 3035 | /* C. Process data loss notification, provided it is valid. */ | 3054 | /* C. Check consistency of the current state. */ |
| 3036 | if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && | ||
| 3037 | before(tp->snd_una, tp->high_seq) && | ||
| 3038 | icsk->icsk_ca_state != TCP_CA_Open && | ||
| 3039 | tp->fackets_out > tp->reordering) { | ||
| 3040 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); | ||
| 3041 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | ||
| 3042 | } | ||
| 3043 | |||
| 3044 | /* D. Check consistency of the current state. */ | ||
| 3045 | tcp_verify_left_out(tp); | 3055 | tcp_verify_left_out(tp); |
| 3046 | 3056 | ||
| 3047 | /* E. Check state exit conditions. State can be terminated | 3057 | /* D. Check state exit conditions. State can be terminated |
| 3048 | * when high_seq is ACKed. */ | 3058 | * when high_seq is ACKed. */ |
| 3049 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 3059 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
| 3050 | WARN_ON(tp->retrans_out != 0); | 3060 | WARN_ON(tp->retrans_out != 0); |
| @@ -3066,17 +3076,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 3066 | } | 3076 | } |
| 3067 | break; | 3077 | break; |
| 3068 | 3078 | ||
| 3069 | case TCP_CA_Disorder: | ||
| 3070 | tcp_try_undo_dsack(sk); | ||
| 3071 | if (!tp->undo_marker || | ||
| 3072 | /* For SACK case do not Open to allow to undo | ||
| 3073 | * catching for all duplicate ACKs. */ | ||
| 3074 | tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { | ||
| 3075 | tp->undo_marker = 0; | ||
| 3076 | tcp_set_ca_state(sk, TCP_CA_Open); | ||
| 3077 | } | ||
| 3078 | break; | ||
| 3079 | |||
| 3080 | case TCP_CA_Recovery: | 3079 | case TCP_CA_Recovery: |
| 3081 | if (tcp_is_reno(tp)) | 3080 | if (tcp_is_reno(tp)) |
| 3082 | tcp_reset_reno_sack(tp); | 3081 | tcp_reset_reno_sack(tp); |
| @@ -3087,7 +3086,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 3087 | } | 3086 | } |
| 3088 | } | 3087 | } |
| 3089 | 3088 | ||
| 3090 | /* F. Process state. */ | 3089 | /* E. Process state. */ |
| 3091 | switch (icsk->icsk_ca_state) { | 3090 | switch (icsk->icsk_ca_state) { |
| 3092 | case TCP_CA_Recovery: | 3091 | case TCP_CA_Recovery: |
| 3093 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { | 3092 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { |
| @@ -3117,7 +3116,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 3117 | tcp_add_reno_sack(sk); | 3116 | tcp_add_reno_sack(sk); |
| 3118 | } | 3117 | } |
| 3119 | 3118 | ||
| 3120 | if (icsk->icsk_ca_state == TCP_CA_Disorder) | 3119 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) |
| 3121 | tcp_try_undo_dsack(sk); | 3120 | tcp_try_undo_dsack(sk); |
| 3122 | 3121 | ||
| 3123 | if (!tcp_time_to_recover(sk)) { | 3122 | if (!tcp_time_to_recover(sk)) { |
| @@ -3681,10 +3680,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3681 | u32 prior_snd_una = tp->snd_una; | 3680 | u32 prior_snd_una = tp->snd_una; |
| 3682 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3681 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
| 3683 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3682 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
| 3683 | bool is_dupack = false; | ||
| 3684 | u32 prior_in_flight; | 3684 | u32 prior_in_flight; |
| 3685 | u32 prior_fackets; | 3685 | u32 prior_fackets; |
| 3686 | int prior_packets; | 3686 | int prior_packets; |
| 3687 | int prior_sacked = tp->sacked_out; | 3687 | int prior_sacked = tp->sacked_out; |
| 3688 | int pkts_acked = 0; | ||
| 3688 | int newly_acked_sacked = 0; | 3689 | int newly_acked_sacked = 0; |
| 3689 | int frto_cwnd = 0; | 3690 | int frto_cwnd = 0; |
| 3690 | 3691 | ||
| @@ -3757,6 +3758,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3757 | /* See if we can take anything off of the retransmit queue. */ | 3758 | /* See if we can take anything off of the retransmit queue. */ |
| 3758 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); | 3759 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); |
| 3759 | 3760 | ||
| 3761 | pkts_acked = prior_packets - tp->packets_out; | ||
| 3760 | newly_acked_sacked = (prior_packets - prior_sacked) - | 3762 | newly_acked_sacked = (prior_packets - prior_sacked) - |
| 3761 | (tp->packets_out - tp->sacked_out); | 3763 | (tp->packets_out - tp->sacked_out); |
| 3762 | 3764 | ||
| @@ -3771,8 +3773,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3771 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && | 3773 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && |
| 3772 | tcp_may_raise_cwnd(sk, flag)) | 3774 | tcp_may_raise_cwnd(sk, flag)) |
| 3773 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3775 | tcp_cong_avoid(sk, ack, prior_in_flight); |
| 3774 | tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, | 3776 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
| 3775 | newly_acked_sacked, flag); | 3777 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, |
| 3778 | is_dupack, flag); | ||
| 3776 | } else { | 3779 | } else { |
| 3777 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) | 3780 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) |
| 3778 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3781 | tcp_cong_avoid(sk, ack, prior_in_flight); |
| @@ -3784,6 +3787,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3784 | return 1; | 3787 | return 1; |
| 3785 | 3788 | ||
| 3786 | no_queue: | 3789 | no_queue: |
| 3790 | /* If data was DSACKed, see if we can undo a cwnd reduction. */ | ||
| 3791 | if (flag & FLAG_DSACKING_ACK) | ||
| 3792 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, | ||
| 3793 | is_dupack, flag); | ||
| 3787 | /* If this ack opens up a zero window, clear backoff. It was | 3794 | /* If this ack opens up a zero window, clear backoff. It was |
| 3788 | * being used to time the probes, and is probably far higher than | 3795 | * being used to time the probes, and is probably far higher than |
| 3789 | * it needs to be for normal retransmission. | 3796 | * it needs to be for normal retransmission. |
| @@ -3797,10 +3804,14 @@ invalid_ack: | |||
| 3797 | return -1; | 3804 | return -1; |
| 3798 | 3805 | ||
| 3799 | old_ack: | 3806 | old_ack: |
| 3807 | /* If data was SACKed, tag it and see if we should send more data. | ||
| 3808 | * If data was DSACKed, see if we can undo a cwnd reduction. | ||
| 3809 | */ | ||
| 3800 | if (TCP_SKB_CB(skb)->sacked) { | 3810 | if (TCP_SKB_CB(skb)->sacked) { |
| 3801 | tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3811 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); |
| 3802 | if (icsk->icsk_ca_state == TCP_CA_Open) | 3812 | newly_acked_sacked = tp->sacked_out - prior_sacked; |
| 3803 | tcp_try_keep_open(sk); | 3813 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, |
| 3814 | is_dupack, flag); | ||
| 3804 | } | 3815 | } |
| 3805 | 3816 | ||
| 3806 | SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); | 3817 | SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); |
| @@ -3876,7 +3887,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o | |||
| 3876 | case TCPOPT_SACK_PERM: | 3887 | case TCPOPT_SACK_PERM: |
| 3877 | if (opsize == TCPOLEN_SACK_PERM && th->syn && | 3888 | if (opsize == TCPOLEN_SACK_PERM && th->syn && |
| 3878 | !estab && sysctl_tcp_sack) { | 3889 | !estab && sysctl_tcp_sack) { |
| 3879 | opt_rx->sack_ok = 1; | 3890 | opt_rx->sack_ok = TCP_SACK_SEEN; |
| 3880 | tcp_sack_reset(opt_rx); | 3891 | tcp_sack_reset(opt_rx); |
| 3881 | } | 3892 | } |
| 3882 | break; | 3893 | break; |
| @@ -4864,7 +4875,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
| 4864 | 4875 | ||
| 4865 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | 4876 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) |
| 4866 | tcp_clamp_window(sk); | 4877 | tcp_clamp_window(sk); |
| 4867 | else if (tcp_memory_pressure) | 4878 | else if (sk_under_memory_pressure(sk)) |
| 4868 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 4879 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
| 4869 | 4880 | ||
| 4870 | tcp_collapse_ofo_queue(sk); | 4881 | tcp_collapse_ofo_queue(sk); |
| @@ -4930,11 +4941,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk) | |||
| 4930 | return 0; | 4941 | return 0; |
| 4931 | 4942 | ||
| 4932 | /* If we are under global TCP memory pressure, do not expand. */ | 4943 | /* If we are under global TCP memory pressure, do not expand. */ |
| 4933 | if (tcp_memory_pressure) | 4944 | if (sk_under_memory_pressure(sk)) |
| 4934 | return 0; | 4945 | return 0; |
| 4935 | 4946 | ||
| 4936 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4947 | /* If we are under soft global TCP memory pressure, do not expand. */ |
| 4937 | if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) | 4948 | if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) |
| 4938 | return 0; | 4949 | return 0; |
| 4939 | 4950 | ||
| 4940 | /* If we filled the congestion window, do not expand. */ | 4951 | /* If we filled the congestion window, do not expand. */ |
| @@ -5809,6 +5820,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 5809 | goto discard; | 5820 | goto discard; |
| 5810 | 5821 | ||
| 5811 | if (th->syn) { | 5822 | if (th->syn) { |
| 5823 | if (th->fin) | ||
| 5824 | goto discard; | ||
| 5812 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) | 5825 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) |
| 5813 | return 1; | 5826 | return 1; |
| 5814 | 5827 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a9db4b1a2215..fd54c5f8a255 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -73,6 +73,7 @@ | |||
| 73 | #include <net/xfrm.h> | 73 | #include <net/xfrm.h> |
| 74 | #include <net/netdma.h> | 74 | #include <net/netdma.h> |
| 75 | #include <net/secure_seq.h> | 75 | #include <net/secure_seq.h> |
| 76 | #include <net/tcp_memcontrol.h> | ||
| 76 | 77 | ||
| 77 | #include <linux/inet.h> | 78 | #include <linux/inet.h> |
| 78 | #include <linux/ipv6.h> | 79 | #include <linux/ipv6.h> |
| @@ -630,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
| 630 | arg.iov[0].iov_len = sizeof(rep.th); | 631 | arg.iov[0].iov_len = sizeof(rep.th); |
| 631 | 632 | ||
| 632 | #ifdef CONFIG_TCP_MD5SIG | 633 | #ifdef CONFIG_TCP_MD5SIG |
| 633 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; | 634 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; |
| 634 | if (key) { | 635 | if (key) { |
| 635 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | | 636 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | |
| 636 | (TCPOPT_NOP << 16) | | 637 | (TCPOPT_NOP << 16) | |
| @@ -650,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
| 650 | arg.iov[0].iov_len, IPPROTO_TCP, 0); | 651 | arg.iov[0].iov_len, IPPROTO_TCP, 0); |
| 651 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; | 652 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
| 652 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; | 653 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; |
| 654 | /* When socket is gone, all binding information is lost. | ||
| 655 | * routing might fail in this case. using iif for oif to | ||
| 656 | * make sure we can deliver it | ||
| 657 | */ | ||
| 658 | arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); | ||
| 653 | 659 | ||
| 654 | net = dev_net(skb_dst(skb)->dev); | 660 | net = dev_net(skb_dst(skb)->dev); |
| 655 | arg.tos = ip_hdr(skb)->tos; | 661 | arg.tos = ip_hdr(skb)->tos; |
| @@ -1460,9 +1466,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1460 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; | 1466 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
| 1461 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1467 | newinet->inet_id = newtp->write_seq ^ jiffies; |
| 1462 | 1468 | ||
| 1463 | if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) | 1469 | if (!dst) { |
| 1464 | goto put_and_exit; | 1470 | dst = inet_csk_route_child_sock(sk, newsk, req); |
| 1465 | 1471 | if (!dst) | |
| 1472 | goto put_and_exit; | ||
| 1473 | } else { | ||
| 1474 | /* syncookie case : see end of cookie_v4_check() */ | ||
| 1475 | } | ||
| 1466 | sk_setup_caps(newsk, dst); | 1476 | sk_setup_caps(newsk, dst); |
| 1467 | 1477 | ||
| 1468 | tcp_mtup_init(newsk); | 1478 | tcp_mtup_init(newsk); |
| @@ -1511,6 +1521,7 @@ exit: | |||
| 1511 | return NULL; | 1521 | return NULL; |
| 1512 | put_and_exit: | 1522 | put_and_exit: |
| 1513 | tcp_clear_xmit_timers(newsk); | 1523 | tcp_clear_xmit_timers(newsk); |
| 1524 | tcp_cleanup_congestion_control(newsk); | ||
| 1514 | bh_unlock_sock(newsk); | 1525 | bh_unlock_sock(newsk); |
| 1515 | sock_put(newsk); | 1526 | sock_put(newsk); |
| 1516 | goto exit; | 1527 | goto exit; |
| @@ -1916,7 +1927,8 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
| 1916 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1927 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
| 1917 | 1928 | ||
| 1918 | local_bh_disable(); | 1929 | local_bh_disable(); |
| 1919 | percpu_counter_inc(&tcp_sockets_allocated); | 1930 | sock_update_memcg(sk); |
| 1931 | sk_sockets_allocated_inc(sk); | ||
| 1920 | local_bh_enable(); | 1932 | local_bh_enable(); |
| 1921 | 1933 | ||
| 1922 | return 0; | 1934 | return 0; |
| @@ -1972,7 +1984,8 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
| 1972 | tp->cookie_values = NULL; | 1984 | tp->cookie_values = NULL; |
| 1973 | } | 1985 | } |
| 1974 | 1986 | ||
| 1975 | percpu_counter_dec(&tcp_sockets_allocated); | 1987 | sk_sockets_allocated_dec(sk); |
| 1988 | sock_release_memcg(sk); | ||
| 1976 | } | 1989 | } |
| 1977 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1990 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
| 1978 | 1991 | ||
| @@ -2619,7 +2632,6 @@ struct proto tcp_prot = { | |||
| 2619 | .orphan_count = &tcp_orphan_count, | 2632 | .orphan_count = &tcp_orphan_count, |
| 2620 | .memory_allocated = &tcp_memory_allocated, | 2633 | .memory_allocated = &tcp_memory_allocated, |
| 2621 | .memory_pressure = &tcp_memory_pressure, | 2634 | .memory_pressure = &tcp_memory_pressure, |
| 2622 | .sysctl_mem = sysctl_tcp_mem, | ||
| 2623 | .sysctl_wmem = sysctl_tcp_wmem, | 2635 | .sysctl_wmem = sysctl_tcp_wmem, |
| 2624 | .sysctl_rmem = sysctl_tcp_rmem, | 2636 | .sysctl_rmem = sysctl_tcp_rmem, |
| 2625 | .max_header = MAX_TCP_HEADER, | 2637 | .max_header = MAX_TCP_HEADER, |
| @@ -2633,10 +2645,14 @@ struct proto tcp_prot = { | |||
| 2633 | .compat_setsockopt = compat_tcp_setsockopt, | 2645 | .compat_setsockopt = compat_tcp_setsockopt, |
| 2634 | .compat_getsockopt = compat_tcp_getsockopt, | 2646 | .compat_getsockopt = compat_tcp_getsockopt, |
| 2635 | #endif | 2647 | #endif |
| 2648 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
| 2649 | .init_cgroup = tcp_init_cgroup, | ||
| 2650 | .destroy_cgroup = tcp_destroy_cgroup, | ||
| 2651 | .proto_cgroup = tcp_proto_cgroup, | ||
| 2652 | #endif | ||
| 2636 | }; | 2653 | }; |
| 2637 | EXPORT_SYMBOL(tcp_prot); | 2654 | EXPORT_SYMBOL(tcp_prot); |
| 2638 | 2655 | ||
| 2639 | |||
| 2640 | static int __net_init tcp_sk_init(struct net *net) | 2656 | static int __net_init tcp_sk_init(struct net *net) |
| 2641 | { | 2657 | { |
| 2642 | return inet_ctl_sock_create(&net->ipv4.tcp_sock, | 2658 | return inet_ctl_sock_create(&net->ipv4.tcp_sock, |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c new file mode 100644 index 000000000000..49978788a9dc --- /dev/null +++ b/net/ipv4/tcp_memcontrol.c | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | #include <net/tcp.h> | ||
| 2 | #include <net/tcp_memcontrol.h> | ||
| 3 | #include <net/sock.h> | ||
| 4 | #include <net/ip.h> | ||
| 5 | #include <linux/nsproxy.h> | ||
| 6 | #include <linux/memcontrol.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | |||
| 9 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); | ||
| 10 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
| 11 | const char *buffer); | ||
| 12 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event); | ||
| 13 | |||
| 14 | static struct cftype tcp_files[] = { | ||
| 15 | { | ||
| 16 | .name = "kmem.tcp.limit_in_bytes", | ||
| 17 | .write_string = tcp_cgroup_write, | ||
| 18 | .read_u64 = tcp_cgroup_read, | ||
| 19 | .private = RES_LIMIT, | ||
| 20 | }, | ||
| 21 | { | ||
| 22 | .name = "kmem.tcp.usage_in_bytes", | ||
| 23 | .read_u64 = tcp_cgroup_read, | ||
| 24 | .private = RES_USAGE, | ||
| 25 | }, | ||
| 26 | { | ||
| 27 | .name = "kmem.tcp.failcnt", | ||
| 28 | .private = RES_FAILCNT, | ||
| 29 | .trigger = tcp_cgroup_reset, | ||
| 30 | .read_u64 = tcp_cgroup_read, | ||
| 31 | }, | ||
| 32 | { | ||
| 33 | .name = "kmem.tcp.max_usage_in_bytes", | ||
| 34 | .private = RES_MAX_USAGE, | ||
| 35 | .trigger = tcp_cgroup_reset, | ||
| 36 | .read_u64 = tcp_cgroup_read, | ||
| 37 | }, | ||
| 38 | }; | ||
| 39 | |||
| 40 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) | ||
| 41 | { | ||
| 42 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | ||
| 43 | } | ||
| 44 | |||
| 45 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) | ||
| 46 | { | ||
| 47 | if (sk->sk_cgrp->memory_pressure) | ||
| 48 | *sk->sk_cgrp->memory_pressure = 1; | ||
| 49 | } | ||
| 50 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | ||
| 51 | |||
| 52 | int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
| 53 | { | ||
| 54 | /* | ||
| 55 | * The root cgroup does not use res_counters, but rather, | ||
| 56 | * rely on the data already collected by the network | ||
| 57 | * subsystem | ||
| 58 | */ | ||
| 59 | struct res_counter *res_parent = NULL; | ||
| 60 | struct cg_proto *cg_proto, *parent_cg; | ||
| 61 | struct tcp_memcontrol *tcp; | ||
| 62 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
| 63 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | ||
| 64 | struct net *net = current->nsproxy->net_ns; | ||
| 65 | |||
| 66 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 67 | if (!cg_proto) | ||
| 68 | goto create_files; | ||
| 69 | |||
| 70 | tcp = tcp_from_cgproto(cg_proto); | ||
| 71 | |||
| 72 | tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; | ||
| 73 | tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; | ||
| 74 | tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; | ||
| 75 | tcp->tcp_memory_pressure = 0; | ||
| 76 | |||
| 77 | parent_cg = tcp_prot.proto_cgroup(parent); | ||
| 78 | if (parent_cg) | ||
| 79 | res_parent = parent_cg->memory_allocated; | ||
| 80 | |||
| 81 | res_counter_init(&tcp->tcp_memory_allocated, res_parent); | ||
| 82 | percpu_counter_init(&tcp->tcp_sockets_allocated, 0); | ||
| 83 | |||
| 84 | cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; | ||
| 85 | cg_proto->memory_pressure = &tcp->tcp_memory_pressure; | ||
| 86 | cg_proto->sysctl_mem = tcp->tcp_prot_mem; | ||
| 87 | cg_proto->memory_allocated = &tcp->tcp_memory_allocated; | ||
| 88 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | ||
| 89 | cg_proto->memcg = memcg; | ||
| 90 | |||
| 91 | create_files: | ||
| 92 | return cgroup_add_files(cgrp, ss, tcp_files, | ||
| 93 | ARRAY_SIZE(tcp_files)); | ||
| 94 | } | ||
| 95 | EXPORT_SYMBOL(tcp_init_cgroup); | ||
| 96 | |||
| 97 | void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
| 98 | { | ||
| 99 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
| 100 | struct cg_proto *cg_proto; | ||
| 101 | struct tcp_memcontrol *tcp; | ||
| 102 | u64 val; | ||
| 103 | |||
| 104 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 105 | if (!cg_proto) | ||
| 106 | return; | ||
| 107 | |||
| 108 | tcp = tcp_from_cgproto(cg_proto); | ||
| 109 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | ||
| 110 | |||
| 111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | ||
| 112 | |||
| 113 | if (val != RESOURCE_MAX) | ||
| 114 | jump_label_dec(&memcg_socket_limit_enabled); | ||
| 115 | } | ||
| 116 | EXPORT_SYMBOL(tcp_destroy_cgroup); | ||
| 117 | |||
| 118 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | ||
| 119 | { | ||
| 120 | struct net *net = current->nsproxy->net_ns; | ||
| 121 | struct tcp_memcontrol *tcp; | ||
| 122 | struct cg_proto *cg_proto; | ||
| 123 | u64 old_lim; | ||
| 124 | int i; | ||
| 125 | int ret; | ||
| 126 | |||
| 127 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 128 | if (!cg_proto) | ||
| 129 | return -EINVAL; | ||
| 130 | |||
| 131 | if (val > RESOURCE_MAX) | ||
| 132 | val = RESOURCE_MAX; | ||
| 133 | |||
| 134 | tcp = tcp_from_cgproto(cg_proto); | ||
| 135 | |||
| 136 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | ||
| 137 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | ||
| 138 | if (ret) | ||
| 139 | return ret; | ||
| 140 | |||
| 141 | for (i = 0; i < 3; i++) | ||
| 142 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | ||
| 143 | net->ipv4.sysctl_tcp_mem[i]); | ||
| 144 | |||
| 145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) | ||
| 146 | jump_label_dec(&memcg_socket_limit_enabled); | ||
| 147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) | ||
| 148 | jump_label_inc(&memcg_socket_limit_enabled); | ||
| 149 | |||
| 150 | return 0; | ||
| 151 | } | ||
| 152 | |||
| 153 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
| 154 | const char *buffer) | ||
| 155 | { | ||
| 156 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | ||
| 157 | unsigned long long val; | ||
| 158 | int ret = 0; | ||
| 159 | |||
| 160 | switch (cft->private) { | ||
| 161 | case RES_LIMIT: | ||
| 162 | /* see memcontrol.c */ | ||
| 163 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
| 164 | if (ret) | ||
| 165 | break; | ||
| 166 | ret = tcp_update_limit(memcg, val); | ||
| 167 | break; | ||
| 168 | default: | ||
| 169 | ret = -EINVAL; | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | return ret; | ||
| 173 | } | ||
| 174 | |||
| 175 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | ||
| 176 | { | ||
| 177 | struct tcp_memcontrol *tcp; | ||
| 178 | struct cg_proto *cg_proto; | ||
| 179 | |||
| 180 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 181 | if (!cg_proto) | ||
| 182 | return default_val; | ||
| 183 | |||
| 184 | tcp = tcp_from_cgproto(cg_proto); | ||
| 185 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | ||
| 186 | } | ||
| 187 | |||
| 188 | static u64 tcp_read_usage(struct mem_cgroup *memcg) | ||
| 189 | { | ||
| 190 | struct tcp_memcontrol *tcp; | ||
| 191 | struct cg_proto *cg_proto; | ||
| 192 | |||
| 193 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 194 | if (!cg_proto) | ||
| 195 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | ||
| 196 | |||
| 197 | tcp = tcp_from_cgproto(cg_proto); | ||
| 198 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | ||
| 199 | } | ||
| 200 | |||
| 201 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) | ||
| 202 | { | ||
| 203 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | ||
| 204 | u64 val; | ||
| 205 | |||
| 206 | switch (cft->private) { | ||
| 207 | case RES_LIMIT: | ||
| 208 | val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); | ||
| 209 | break; | ||
| 210 | case RES_USAGE: | ||
| 211 | val = tcp_read_usage(memcg); | ||
| 212 | break; | ||
| 213 | case RES_FAILCNT: | ||
| 214 | case RES_MAX_USAGE: | ||
| 215 | val = tcp_read_stat(memcg, cft->private, 0); | ||
| 216 | break; | ||
| 217 | default: | ||
| 218 | BUG(); | ||
| 219 | } | ||
| 220 | return val; | ||
| 221 | } | ||
| 222 | |||
| 223 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) | ||
| 224 | { | ||
| 225 | struct mem_cgroup *memcg; | ||
| 226 | struct tcp_memcontrol *tcp; | ||
| 227 | struct cg_proto *cg_proto; | ||
| 228 | |||
| 229 | memcg = mem_cgroup_from_cont(cont); | ||
| 230 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 231 | if (!cg_proto) | ||
| 232 | return 0; | ||
| 233 | tcp = tcp_from_cgproto(cg_proto); | ||
| 234 | |||
| 235 | switch (event) { | ||
| 236 | case RES_MAX_USAGE: | ||
| 237 | res_counter_reset_max(&tcp->tcp_memory_allocated); | ||
| 238 | break; | ||
| 239 | case RES_FAILCNT: | ||
| 240 | res_counter_reset_failcnt(&tcp->tcp_memory_allocated); | ||
| 241 | break; | ||
| 242 | } | ||
| 243 | |||
| 244 | return 0; | ||
| 245 | } | ||
| 246 | |||
| 247 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) | ||
| 248 | { | ||
| 249 | struct tcp_memcontrol *tcp; | ||
| 250 | struct cg_proto *cg_proto; | ||
| 251 | |||
| 252 | cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); | ||
| 253 | if (!cg_proto) | ||
| 254 | return 0; | ||
| 255 | |||
| 256 | tcp = tcp_from_cgproto(cg_proto); | ||
| 257 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | ||
| 258 | } | ||
| 259 | |||
| 260 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) | ||
| 261 | { | ||
| 262 | struct tcp_memcontrol *tcp; | ||
| 263 | struct cg_proto *cg_proto; | ||
| 264 | |||
| 265 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
| 266 | if (!cg_proto) | ||
| 267 | return; | ||
| 268 | |||
| 269 | tcp = tcp_from_cgproto(cg_proto); | ||
| 270 | |||
| 271 | tcp->tcp_prot_mem[idx] = val; | ||
| 272 | } | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 66363b689ad6..550e755747e0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -336,15 +336,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
| 336 | tcptw->tw_ts_recent = tp->rx_opt.ts_recent; | 336 | tcptw->tw_ts_recent = tp->rx_opt.ts_recent; |
| 337 | tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; | 337 | tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; |
| 338 | 338 | ||
| 339 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 339 | #if IS_ENABLED(CONFIG_IPV6) |
| 340 | if (tw->tw_family == PF_INET6) { | 340 | if (tw->tw_family == PF_INET6) { |
| 341 | struct ipv6_pinfo *np = inet6_sk(sk); | 341 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 342 | struct inet6_timewait_sock *tw6; | 342 | struct inet6_timewait_sock *tw6; |
| 343 | 343 | ||
| 344 | tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); | 344 | tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); |
| 345 | tw6 = inet6_twsk((struct sock *)tw); | 345 | tw6 = inet6_twsk((struct sock *)tw); |
| 346 | ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); | 346 | tw6->tw_v6_daddr = np->daddr; |
| 347 | ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); | 347 | tw6->tw_v6_rcv_saddr = np->rcv_saddr; |
| 348 | tw->tw_tclass = np->tclass; | 348 | tw->tw_tclass = np->tclass; |
| 349 | tw->tw_ipv6only = np->ipv6only; | 349 | tw->tw_ipv6only = np->ipv6only; |
| 350 | } | 350 | } |
| @@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, | |||
| 425 | */ | 425 | */ |
| 426 | struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) | 426 | struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) |
| 427 | { | 427 | { |
| 428 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); | 428 | struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); |
| 429 | 429 | ||
| 430 | if (newsk != NULL) { | 430 | if (newsk != NULL) { |
| 431 | const struct inet_request_sock *ireq = inet_rsk(req); | 431 | const struct inet_request_sock *ireq = inet_rsk(req); |
| @@ -495,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
| 495 | newtp->frto_counter = 0; | 495 | newtp->frto_counter = 0; |
| 496 | newtp->frto_highmark = 0; | 496 | newtp->frto_highmark = 0; |
| 497 | 497 | ||
| 498 | newicsk->icsk_ca_ops = &tcp_init_congestion_ops; | 498 | if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && |
| 499 | !try_module_get(newicsk->icsk_ca_ops->owner)) | ||
| 500 | newicsk->icsk_ca_ops = &tcp_init_congestion_ops; | ||
| 499 | 501 | ||
| 500 | tcp_set_ca_state(newsk, TCP_CA_Open); | 502 | tcp_set_ca_state(newsk, TCP_CA_Open); |
| 501 | tcp_init_xmit_timers(newsk); | 503 | tcp_init_xmit_timers(newsk); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 63170e297540..4ff3b6dc74fc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
| 1093 | { | 1093 | { |
| 1094 | int i, k, eat; | 1094 | int i, k, eat; |
| 1095 | 1095 | ||
| 1096 | eat = min_t(int, len, skb_headlen(skb)); | ||
| 1097 | if (eat) { | ||
| 1098 | __skb_pull(skb, eat); | ||
| 1099 | len -= eat; | ||
| 1100 | if (!len) | ||
| 1101 | return; | ||
| 1102 | } | ||
| 1096 | eat = len; | 1103 | eat = len; |
| 1097 | k = 0; | 1104 | k = 0; |
| 1098 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 1105 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
| @@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
| 1124 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | 1131 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
| 1125 | return -ENOMEM; | 1132 | return -ENOMEM; |
| 1126 | 1133 | ||
| 1127 | /* If len == headlen, we avoid __skb_pull to preserve alignment. */ | 1134 | __pskb_trim_head(skb, len); |
| 1128 | if (unlikely(len < skb_headlen(skb))) | ||
| 1129 | __skb_pull(skb, len); | ||
| 1130 | else | ||
| 1131 | __pskb_trim_head(skb, len - skb_headlen(skb)); | ||
| 1132 | 1135 | ||
| 1133 | TCP_SKB_CB(skb)->seq += len; | 1136 | TCP_SKB_CB(skb)->seq += len; |
| 1134 | skb->ip_summed = CHECKSUM_PARTIAL; | 1137 | skb->ip_summed = CHECKSUM_PARTIAL; |
| @@ -1138,11 +1141,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
| 1138 | sk_mem_uncharge(sk, len); | 1141 | sk_mem_uncharge(sk, len); |
| 1139 | sock_set_flag(sk, SOCK_QUEUE_SHRUNK); | 1142 | sock_set_flag(sk, SOCK_QUEUE_SHRUNK); |
| 1140 | 1143 | ||
| 1141 | /* Any change of skb->len requires recalculation of tso | 1144 | /* Any change of skb->len requires recalculation of tso factor. */ |
| 1142 | * factor and mss. | ||
| 1143 | */ | ||
| 1144 | if (tcp_skb_pcount(skb) > 1) | 1145 | if (tcp_skb_pcount(skb) > 1) |
| 1145 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); | 1146 | tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); |
| 1146 | 1147 | ||
| 1147 | return 0; | 1148 | return 0; |
| 1148 | } | 1149 | } |
| @@ -1581,7 +1582,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1581 | * frame, so if we have space for more than 3 frames | 1582 | * frame, so if we have space for more than 3 frames |
| 1582 | * then send now. | 1583 | * then send now. |
| 1583 | */ | 1584 | */ |
| 1584 | if (limit > tcp_max_burst(tp) * tp->mss_cache) | 1585 | if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) |
| 1585 | goto send_now; | 1586 | goto send_now; |
| 1586 | } | 1587 | } |
| 1587 | 1588 | ||
| @@ -1919,7 +1920,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
| 1919 | if (free_space < (full_space >> 1)) { | 1920 | if (free_space < (full_space >> 1)) { |
| 1920 | icsk->icsk_ack.quick = 0; | 1921 | icsk->icsk_ack.quick = 0; |
| 1921 | 1922 | ||
| 1922 | if (tcp_memory_pressure) | 1923 | if (sk_under_memory_pressure(sk)) |
| 1923 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, | 1924 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, |
| 1924 | 4U * tp->advmss); | 1925 | 4U * tp->advmss); |
| 1925 | 1926 | ||
| @@ -2147,7 +2148,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2147 | */ | 2148 | */ |
| 2148 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2149 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2149 | 2150 | ||
| 2150 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2151 | /* make sure skb->data is aligned on arches that require it */ |
| 2152 | if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { | ||
| 2153 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, | ||
| 2154 | GFP_ATOMIC); | ||
| 2155 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | ||
| 2156 | -ENOBUFS; | ||
| 2157 | } else { | ||
| 2158 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | ||
| 2159 | } | ||
| 2151 | 2160 | ||
| 2152 | if (err == 0) { | 2161 | if (err == 0) { |
| 2153 | /* Update global TCP statistics. */ | 2162 | /* Update global TCP statistics. */ |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2e0f0af76c19..cd2e0723266d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) | |||
| 77 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
| 78 | shift++; | 78 | shift++; |
| 79 | 79 | ||
| 80 | if (tcp_too_many_orphans(sk, shift)) { | 80 | if (tcp_check_oom(sk, shift)) { |
| 81 | if (net_ratelimit()) | ||
| 82 | printk(KERN_INFO "Out of socket memory\n"); | ||
| 83 | |||
| 84 | /* Catch exceptional cases, when connection requires reset. | 81 | /* Catch exceptional cases, when connection requires reset. |
| 85 | * 1. Last segment was sent recently. */ | 82 | * 1. Last segment was sent recently. */ |
| 86 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || | 83 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || |
| @@ -171,13 +168,13 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 171 | { | 168 | { |
| 172 | struct inet_connection_sock *icsk = inet_csk(sk); | 169 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 173 | int retry_until; | 170 | int retry_until; |
| 174 | bool do_reset, syn_set = 0; | 171 | bool do_reset, syn_set = false; |
| 175 | 172 | ||
| 176 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 173 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
| 177 | if (icsk->icsk_retransmits) | 174 | if (icsk->icsk_retransmits) |
| 178 | dst_negative_advice(sk); | 175 | dst_negative_advice(sk); |
| 179 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
| 180 | syn_set = 1; | 177 | syn_set = true; |
| 181 | } else { | 178 | } else { |
| 182 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { | 179 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { |
| 183 | /* Black hole detection */ | 180 | /* Black hole detection */ |
| @@ -261,7 +258,7 @@ static void tcp_delack_timer(unsigned long data) | |||
| 261 | } | 258 | } |
| 262 | 259 | ||
| 263 | out: | 260 | out: |
| 264 | if (tcp_memory_pressure) | 261 | if (sk_under_memory_pressure(sk)) |
| 265 | sk_mem_reclaim(sk); | 262 | sk_mem_reclaim(sk); |
| 266 | out_unlock: | 263 | out_unlock: |
| 267 | bh_unlock_sock(sk); | 264 | bh_unlock_sock(sk); |
| @@ -340,7 +337,7 @@ void tcp_retransmit_timer(struct sock *sk) | |||
| 340 | &inet->inet_daddr, ntohs(inet->inet_dport), | 337 | &inet->inet_daddr, ntohs(inet->inet_dport), |
| 341 | inet->inet_num, tp->snd_una, tp->snd_nxt); | 338 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
| 342 | } | 339 | } |
| 343 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 340 | #if IS_ENABLED(CONFIG_IPV6) |
| 344 | else if (sk->sk_family == AF_INET6) { | 341 | else if (sk->sk_family == AF_INET6) { |
| 345 | struct ipv6_pinfo *np = inet6_sk(sk); | 342 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 346 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", | 343 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ac3b3ee4b07c..01775983b997 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c | |||
| @@ -105,7 +105,7 @@ drop: | |||
| 105 | return 0; | 105 | return 0; |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 108 | #if IS_ENABLED(CONFIG_IPV6) |
| 109 | static int tunnel64_rcv(struct sk_buff *skb) | 109 | static int tunnel64_rcv(struct sk_buff *skb) |
| 110 | { | 110 | { |
| 111 | struct xfrm_tunnel *handler; | 111 | struct xfrm_tunnel *handler; |
| @@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info) | |||
| 134 | break; | 134 | break; |
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 137 | #if IS_ENABLED(CONFIG_IPV6) |
| 138 | static void tunnel64_err(struct sk_buff *skb, u32 info) | 138 | static void tunnel64_err(struct sk_buff *skb, u32 info) |
| 139 | { | 139 | { |
| 140 | struct xfrm_tunnel *handler; | 140 | struct xfrm_tunnel *handler; |
| @@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = { | |||
| 152 | .netns_ok = 1, | 152 | .netns_ok = 1, |
| 153 | }; | 153 | }; |
| 154 | 154 | ||
| 155 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 155 | #if IS_ENABLED(CONFIG_IPV6) |
| 156 | static const struct net_protocol tunnel64_protocol = { | 156 | static const struct net_protocol tunnel64_protocol = { |
| 157 | .handler = tunnel64_rcv, | 157 | .handler = tunnel64_rcv, |
| 158 | .err_handler = tunnel64_err, | 158 | .err_handler = tunnel64_err, |
| @@ -167,7 +167,7 @@ static int __init tunnel4_init(void) | |||
| 167 | printk(KERN_ERR "tunnel4 init: can't add protocol\n"); | 167 | printk(KERN_ERR "tunnel4 init: can't add protocol\n"); |
| 168 | return -EAGAIN; | 168 | return -EAGAIN; |
| 169 | } | 169 | } |
| 170 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 170 | #if IS_ENABLED(CONFIG_IPV6) |
| 171 | if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { | 171 | if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { |
| 172 | printk(KERN_ERR "tunnel64 init: can't add protocol\n"); | 172 | printk(KERN_ERR "tunnel64 init: can't add protocol\n"); |
| 173 | inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); | 173 | inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); |
| @@ -179,7 +179,7 @@ static int __init tunnel4_init(void) | |||
| 179 | 179 | ||
| 180 | static void __exit tunnel4_fini(void) | 180 | static void __exit tunnel4_fini(void) |
| 181 | { | 181 | { |
| 182 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 182 | #if IS_ENABLED(CONFIG_IPV6) |
| 183 | if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) | 183 | if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) |
| 184 | printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); | 184 | printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); |
| 185 | #endif | 185 | #endif |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5a65eeac1d29..5d075b5f70fc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -445,7 +445,7 @@ exact_match: | |||
| 445 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 445 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
| 446 | * harder than this. -DaveM | 446 | * harder than this. -DaveM |
| 447 | */ | 447 | */ |
| 448 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | 448 | struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, |
| 449 | __be16 sport, __be32 daddr, __be16 dport, | 449 | __be16 sport, __be32 daddr, __be16 dport, |
| 450 | int dif, struct udp_table *udptable) | 450 | int dif, struct udp_table *udptable) |
| 451 | { | 451 | { |
| @@ -512,6 +512,7 @@ begin: | |||
| 512 | rcu_read_unlock(); | 512 | rcu_read_unlock(); |
| 513 | return result; | 513 | return result; |
| 514 | } | 514 | } |
| 515 | EXPORT_SYMBOL_GPL(__udp4_lib_lookup); | ||
| 515 | 516 | ||
| 516 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | 517 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, |
| 517 | __be16 sport, __be16 dport, | 518 | __be16 sport, __be16 dport, |
| @@ -1358,7 +1359,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1358 | if (inet_sk(sk)->inet_daddr) | 1359 | if (inet_sk(sk)->inet_daddr) |
| 1359 | sock_rps_save_rxhash(sk, skb); | 1360 | sock_rps_save_rxhash(sk, skb); |
| 1360 | 1361 | ||
| 1361 | rc = ip_queue_rcv_skb(sk, skb); | 1362 | rc = sock_queue_rcv_skb(sk, skb); |
| 1362 | if (rc < 0) { | 1363 | if (rc < 0) { |
| 1363 | int is_udplite = IS_UDPLITE(sk); | 1364 | int is_udplite = IS_UDPLITE(sk); |
| 1364 | 1365 | ||
| @@ -1474,6 +1475,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1474 | 1475 | ||
| 1475 | rc = 0; | 1476 | rc = 0; |
| 1476 | 1477 | ||
| 1478 | ipv4_pktinfo_prepare(skb); | ||
| 1477 | bh_lock_sock(sk); | 1479 | bh_lock_sock(sk); |
| 1478 | if (!sock_owned_by_user(sk)) | 1480 | if (!sock_owned_by_user(sk)) |
| 1479 | rc = __udp_queue_rcv_skb(sk, skb); | 1481 | rc = __udp_queue_rcv_skb(sk, skb); |
| @@ -2247,7 +2249,8 @@ int udp4_ufo_send_check(struct sk_buff *skb) | |||
| 2247 | return 0; | 2249 | return 0; |
| 2248 | } | 2250 | } |
| 2249 | 2251 | ||
| 2250 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) | 2252 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, |
| 2253 | netdev_features_t features) | ||
| 2251 | { | 2254 | { |
| 2252 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2255 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 2253 | unsigned int mss; | 2256 | unsigned int mss; |
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c new file mode 100644 index 000000000000..8a949f19deb6 --- /dev/null +++ b/net/ipv4/udp_diag.c | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | /* | ||
| 2 | * udp_diag.c Module for monitoring UDP transport protocols sockets. | ||
| 3 | * | ||
| 4 | * Authors: Pavel Emelyanov, <xemul@parallels.com> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | |||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/inet_diag.h> | ||
| 15 | #include <linux/udp.h> | ||
| 16 | #include <net/udp.h> | ||
| 17 | #include <net/udplite.h> | ||
| 18 | #include <linux/sock_diag.h> | ||
| 19 | |||
| 20 | static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, | ||
| 21 | struct netlink_callback *cb, struct inet_diag_req_v2 *req, | ||
| 22 | struct nlattr *bc) | ||
| 23 | { | ||
| 24 | if (!inet_diag_bc_sk(bc, sk)) | ||
| 25 | return 0; | ||
| 26 | |||
| 27 | return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, | ||
| 28 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | ||
| 29 | } | ||
| 30 | |||
| 31 | static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, | ||
| 32 | const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) | ||
| 33 | { | ||
| 34 | int err = -EINVAL; | ||
| 35 | struct sock *sk; | ||
| 36 | struct sk_buff *rep; | ||
| 37 | |||
| 38 | if (req->sdiag_family == AF_INET) | ||
| 39 | sk = __udp4_lib_lookup(&init_net, | ||
| 40 | req->id.idiag_src[0], req->id.idiag_sport, | ||
| 41 | req->id.idiag_dst[0], req->id.idiag_dport, | ||
| 42 | req->id.idiag_if, tbl); | ||
| 43 | #if IS_ENABLED(CONFIG_IPV6) | ||
| 44 | else if (req->sdiag_family == AF_INET6) | ||
| 45 | sk = __udp6_lib_lookup(&init_net, | ||
| 46 | (struct in6_addr *)req->id.idiag_src, | ||
| 47 | req->id.idiag_sport, | ||
| 48 | (struct in6_addr *)req->id.idiag_dst, | ||
| 49 | req->id.idiag_dport, | ||
| 50 | req->id.idiag_if, tbl); | ||
| 51 | #endif | ||
| 52 | else | ||
| 53 | goto out_nosk; | ||
| 54 | |||
| 55 | err = -ENOENT; | ||
| 56 | if (sk == NULL) | ||
| 57 | goto out_nosk; | ||
| 58 | |||
| 59 | err = sock_diag_check_cookie(sk, req->id.idiag_cookie); | ||
| 60 | if (err) | ||
| 61 | goto out; | ||
| 62 | |||
| 63 | err = -ENOMEM; | ||
| 64 | rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + | ||
| 65 | sizeof(struct inet_diag_meminfo) + | ||
| 66 | 64)), GFP_KERNEL); | ||
| 67 | if (!rep) | ||
| 68 | goto out; | ||
| 69 | |||
| 70 | err = inet_sk_diag_fill(sk, NULL, rep, req, | ||
| 71 | NETLINK_CB(in_skb).pid, | ||
| 72 | nlh->nlmsg_seq, 0, nlh); | ||
| 73 | if (err < 0) { | ||
| 74 | WARN_ON(err == -EMSGSIZE); | ||
| 75 | kfree_skb(rep); | ||
| 76 | goto out; | ||
| 77 | } | ||
| 78 | err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, | ||
| 79 | MSG_DONTWAIT); | ||
| 80 | if (err > 0) | ||
| 81 | err = 0; | ||
| 82 | out: | ||
| 83 | if (sk) | ||
| 84 | sock_put(sk); | ||
| 85 | out_nosk: | ||
| 86 | return err; | ||
| 87 | } | ||
| 88 | |||
| 89 | static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, | ||
| 90 | struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 91 | { | ||
| 92 | int num, s_num, slot, s_slot; | ||
| 93 | |||
| 94 | s_slot = cb->args[0]; | ||
| 95 | num = s_num = cb->args[1]; | ||
| 96 | |||
| 97 | for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { | ||
| 98 | struct sock *sk; | ||
| 99 | struct hlist_nulls_node *node; | ||
| 100 | struct udp_hslot *hslot = &table->hash[slot]; | ||
| 101 | |||
| 102 | if (hlist_nulls_empty(&hslot->head)) | ||
| 103 | continue; | ||
| 104 | |||
| 105 | spin_lock_bh(&hslot->lock); | ||
| 106 | sk_nulls_for_each(sk, node, &hslot->head) { | ||
| 107 | struct inet_sock *inet = inet_sk(sk); | ||
| 108 | |||
| 109 | if (num < s_num) | ||
| 110 | goto next; | ||
| 111 | if (!(r->idiag_states & (1 << sk->sk_state))) | ||
| 112 | goto next; | ||
| 113 | if (r->sdiag_family != AF_UNSPEC && | ||
| 114 | sk->sk_family != r->sdiag_family) | ||
| 115 | goto next; | ||
| 116 | if (r->id.idiag_sport != inet->inet_sport && | ||
| 117 | r->id.idiag_sport) | ||
| 118 | goto next; | ||
| 119 | if (r->id.idiag_dport != inet->inet_dport && | ||
| 120 | r->id.idiag_dport) | ||
| 121 | goto next; | ||
| 122 | |||
| 123 | if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { | ||
| 124 | spin_unlock_bh(&hslot->lock); | ||
| 125 | goto done; | ||
| 126 | } | ||
| 127 | next: | ||
| 128 | num++; | ||
| 129 | } | ||
| 130 | spin_unlock_bh(&hslot->lock); | ||
| 131 | } | ||
| 132 | done: | ||
| 133 | cb->args[0] = slot; | ||
| 134 | cb->args[1] = num; | ||
| 135 | } | ||
| 136 | |||
| 137 | static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 138 | struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 139 | { | ||
| 140 | udp_dump(&udp_table, skb, cb, r, bc); | ||
| 141 | } | ||
| 142 | |||
| 143 | static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, | ||
| 144 | struct inet_diag_req_v2 *req) | ||
| 145 | { | ||
| 146 | return udp_dump_one(&udp_table, in_skb, nlh, req); | ||
| 147 | } | ||
| 148 | |||
| 149 | static const struct inet_diag_handler udp_diag_handler = { | ||
| 150 | .dump = udp_diag_dump, | ||
| 151 | .dump_one = udp_diag_dump_one, | ||
| 152 | .idiag_type = IPPROTO_UDP, | ||
| 153 | }; | ||
| 154 | |||
| 155 | static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 156 | struct inet_diag_req_v2 *r, struct nlattr *bc) | ||
| 157 | { | ||
| 158 | udp_dump(&udplite_table, skb, cb, r, bc); | ||
| 159 | } | ||
| 160 | |||
| 161 | static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, | ||
| 162 | struct inet_diag_req_v2 *req) | ||
| 163 | { | ||
| 164 | return udp_dump_one(&udplite_table, in_skb, nlh, req); | ||
| 165 | } | ||
| 166 | |||
| 167 | static const struct inet_diag_handler udplite_diag_handler = { | ||
| 168 | .dump = udplite_diag_dump, | ||
| 169 | .dump_one = udplite_diag_dump_one, | ||
| 170 | .idiag_type = IPPROTO_UDPLITE, | ||
| 171 | }; | ||
| 172 | |||
| 173 | static int __init udp_diag_init(void) | ||
| 174 | { | ||
| 175 | int err; | ||
| 176 | |||
| 177 | err = inet_diag_register(&udp_diag_handler); | ||
| 178 | if (err) | ||
| 179 | goto out; | ||
| 180 | err = inet_diag_register(&udplite_diag_handler); | ||
| 181 | if (err) | ||
| 182 | goto out_lite; | ||
| 183 | out: | ||
| 184 | return err; | ||
| 185 | out_lite: | ||
| 186 | inet_diag_unregister(&udp_diag_handler); | ||
| 187 | goto out; | ||
| 188 | } | ||
| 189 | |||
| 190 | static void __exit udp_diag_exit(void) | ||
| 191 | { | ||
| 192 | inet_diag_unregister(&udplite_diag_handler); | ||
| 193 | inet_diag_unregister(&udp_diag_handler); | ||
| 194 | } | ||
| 195 | |||
| 196 | module_init(udp_diag_init); | ||
| 197 | module_exit(udp_diag_exit); | ||
| 198 | MODULE_LICENSE("GPL"); | ||
| 199 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); | ||
| 200 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); | ||
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 63418185f524..e3db3f915114 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c | |||
| @@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 110 | 110 | ||
| 111 | skb_push(skb, sizeof(*iph)); | 111 | skb_push(skb, sizeof(*iph)); |
| 112 | skb_reset_network_header(skb); | 112 | skb_reset_network_header(skb); |
| 113 | 113 | skb_mac_header_rebuild(skb); | |
| 114 | memmove(skb->data - skb->mac_len, skb_mac_header(skb), | ||
| 115 | skb->mac_len); | ||
| 116 | skb_set_mac_header(skb, -skb->mac_len); | ||
| 117 | 114 | ||
| 118 | xfrm4_beet_make_header(skb); | 115 | xfrm4_beet_make_header(skb); |
| 119 | 116 | ||
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 534972e114ac..ed4bf11ef9f4 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
| @@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 66 | 66 | ||
| 67 | static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | 67 | static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) |
| 68 | { | 68 | { |
| 69 | const unsigned char *old_mac; | ||
| 70 | int err = -EINVAL; | 69 | int err = -EINVAL; |
| 71 | 70 | ||
| 72 | if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) | 71 | if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) |
| @@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 84 | if (!(x->props.flags & XFRM_STATE_NOECN)) | 83 | if (!(x->props.flags & XFRM_STATE_NOECN)) |
| 85 | ipip_ecn_decapsulate(skb); | 84 | ipip_ecn_decapsulate(skb); |
| 86 | 85 | ||
| 87 | old_mac = skb_mac_header(skb); | ||
| 88 | skb_set_mac_header(skb, -skb->mac_len); | ||
| 89 | memmove(skb_mac_header(skb), old_mac, skb->mac_len); | ||
| 90 | skb_reset_network_header(skb); | 86 | skb_reset_network_header(skb); |
| 87 | skb_mac_header_rebuild(skb); | ||
| 88 | |||
| 91 | err = 0; | 89 | err = 0; |
| 92 | 90 | ||
| 93 | out: | 91 | out: |
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 82806455e859..9247d9d70e9d 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c | |||
| @@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { | |||
| 64 | .priority = 2, | 64 | .priority = 2, |
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 67 | #if IS_ENABLED(CONFIG_IPV6) |
| 68 | static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { | 68 | static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { |
| 69 | .handler = xfrm_tunnel_rcv, | 69 | .handler = xfrm_tunnel_rcv, |
| 70 | .err_handler = xfrm_tunnel_err, | 70 | .err_handler = xfrm_tunnel_err, |
| @@ -84,7 +84,7 @@ static int __init ipip_init(void) | |||
| 84 | xfrm_unregister_type(&ipip_type, AF_INET); | 84 | xfrm_unregister_type(&ipip_type, AF_INET); |
| 85 | return -EAGAIN; | 85 | return -EAGAIN; |
| 86 | } | 86 | } |
| 87 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 87 | #if IS_ENABLED(CONFIG_IPV6) |
| 88 | if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { | 88 | if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { |
| 89 | printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); | 89 | printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); |
| 90 | xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); | 90 | xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); |
| @@ -97,7 +97,7 @@ static int __init ipip_init(void) | |||
| 97 | 97 | ||
| 98 | static void __exit ipip_fini(void) | 98 | static void __exit ipip_fini(void) |
| 99 | { | 99 | { |
| 100 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 100 | #if IS_ENABLED(CONFIG_IPV6) |
| 101 | if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) | 101 | if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) |
| 102 | printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); | 102 | printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); |
| 103 | #endif | 103 | #endif |
