aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/arp.c40
-rw-r--r--net/ipv4/fib_rules.c1
-rw-r--r--net/ipv4/fib_trie.c1
-rw-r--r--net/ipv4/igmp.c17
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_diag.c470
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_output.c23
-rw-r--r--net/ipv4/ip_sockglue.c41
-rw-r--r--net/ipv4/ipconfig.c15
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/Kconfig18
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c16
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c14
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c16
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c127
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c141
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c36
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c2
-rw-r--r--net/ipv4/proc.c15
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c12
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c69
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_diag.c20
-rw-r--r--net/ipv4/tcp_input.c66
-rw-r--r--net/ipv4/tcp_ipv4.c15
-rw-r--r--net/ipv4/tcp_memcontrol.c272
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/tunnel4.c10
-rw-r--r--net/ipv4/udp.c9
-rw-r--r--net/ipv4/udp_diag.c201
-rw-r--r--net/ipv4/xfrm4_tunnel.c6
59 files changed, 1386 insertions, 702 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index cbb505ba9324..1a8f93bd2d4f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,6 +409,10 @@ config INET_TCP_DIAG
409 depends on INET_DIAG 409 depends on INET_DIAG
410 def_tristate INET_DIAG 410 def_tristate INET_DIAG
411 411
412config INET_UDP_DIAG
413 depends on INET_DIAG
414 def_tristate INET_DIAG && IPV6
415
412menuconfig TCP_CONG_ADVANCED 416menuconfig TCP_CONG_ADVANCED
413 bool "TCP: advanced congestion control" 417 bool "TCP: advanced congestion control"
414 ---help--- 418 ---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f2dc69cffb57..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o
34obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ 34obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
35obj-$(CONFIG_INET_DIAG) += inet_diag.o 35obj-$(CONFIG_INET_DIAG) += inet_diag.o
36obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 36obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
37obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
37obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o 38obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
38obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 39obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
39obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o 40obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
@@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
47obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 48obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
48obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 49obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
49obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 50obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
51obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
50obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 52obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
51 53
52obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 54obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b5096a9875a..f7b5670744f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
1250 return err; 1250 return err;
1251} 1251}
1252 1252
1253static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) 1253static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1254 netdev_features_t features)
1254{ 1255{
1255 struct sk_buff *segs = ERR_PTR(-EINVAL); 1256 struct sk_buff *segs = ERR_PTR(-EINVAL);
1256 struct iphdr *iph; 1257 struct iphdr *iph;
@@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net)
1572 sizeof(struct icmp_mib), 1573 sizeof(struct icmp_mib),
1573 __alignof__(struct icmp_mib)) < 0) 1574 __alignof__(struct icmp_mib)) < 0)
1574 goto err_icmp_mib; 1575 goto err_icmp_mib;
1575 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, 1576 net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
1576 sizeof(struct icmpmsg_mib), 1577 GFP_KERNEL);
1577 __alignof__(struct icmpmsg_mib)) < 0) 1578 if (!net->mib.icmpmsg_statistics)
1578 goto err_icmpmsg_mib; 1579 goto err_icmpmsg_mib;
1579 1580
1580 tcp_mib_init(net); 1581 tcp_mib_init(net);
@@ -1598,7 +1599,7 @@ err_tcp_mib:
1598 1599
1599static __net_exit void ipv4_mib_exit_net(struct net *net) 1600static __net_exit void ipv4_mib_exit_net(struct net *net)
1600{ 1601{
1601 snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); 1602 kfree(net->mib.icmpmsg_statistics);
1602 snmp_mib_free((void __percpu **)net->mib.icmp_statistics); 1603 snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
1603 snmp_mib_free((void __percpu **)net->mib.udplite_statistics); 1604 snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
1604 snmp_mib_free((void __percpu **)net->mib.udp_statistics); 1605 snmp_mib_free((void __percpu **)net->mib.udp_statistics);
@@ -1671,6 +1672,8 @@ static int __init inet_init(void)
1671 ip_static_sysctl_init(); 1672 ip_static_sysctl_init();
1672#endif 1673#endif
1673 1674
1675 tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
1676
1674 /* 1677 /*
1675 * Add all the base protocols. 1678 * Add all the base protocols.
1676 */ 1679 */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164aa1367..59402be133f0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -112,11 +112,6 @@
112#include <net/arp.h> 112#include <net/arp.h>
113#include <net/ax25.h> 113#include <net/ax25.h>
114#include <net/netrom.h> 114#include <net/netrom.h>
115#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
116#include <net/atmclip.h>
117struct neigh_table *clip_tbl_hook;
118EXPORT_SYMBOL(clip_tbl_hook);
119#endif
120 115
121#include <asm/system.h> 116#include <asm/system.h>
122#include <linux/uaccess.h> 117#include <linux/uaccess.h>
@@ -126,7 +121,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
126/* 121/*
127 * Interface to generic neighbour cache. 122 * Interface to generic neighbour cache.
128 */ 123 */
129static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd); 124static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd);
130static int arp_constructor(struct neighbour *neigh); 125static int arp_constructor(struct neighbour *neigh);
131static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); 126static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
132static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); 127static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = {
164 159
165struct neigh_table arp_tbl = { 160struct neigh_table arp_tbl = {
166 .family = AF_INET, 161 .family = AF_INET,
167 .entry_size = sizeof(struct neighbour) + 4,
168 .key_len = 4, 162 .key_len = 4,
169 .hash = arp_hash, 163 .hash = arp_hash,
170 .constructor = arp_constructor, 164 .constructor = arp_constructor,
@@ -177,7 +171,7 @@ struct neigh_table arp_tbl = {
177 .gc_staletime = 60 * HZ, 171 .gc_staletime = 60 * HZ,
178 .reachable_time = 30 * HZ, 172 .reachable_time = 30 * HZ,
179 .delay_probe_time = 5 * HZ, 173 .delay_probe_time = 5 * HZ,
180 .queue_len = 3, 174 .queue_len_bytes = 64*1024,
181 .ucast_probes = 3, 175 .ucast_probes = 3,
182 .mcast_probes = 3, 176 .mcast_probes = 3,
183 .anycast_delay = 1 * HZ, 177 .anycast_delay = 1 * HZ,
@@ -221,9 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
221 215
222static u32 arp_hash(const void *pkey, 216static u32 arp_hash(const void *pkey,
223 const struct net_device *dev, 217 const struct net_device *dev,
224 __u32 hash_rnd) 218 __u32 *hash_rnd)
225{ 219{
226 return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); 220 return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd);
227} 221}
228 222
229static int arp_constructor(struct neighbour *neigh) 223static int arp_constructor(struct neighbour *neigh)
@@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh)
283 default: 277 default:
284 break; 278 break;
285 case ARPHRD_ROSE: 279 case ARPHRD_ROSE:
286#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 280#if IS_ENABLED(CONFIG_AX25)
287 case ARPHRD_AX25: 281 case ARPHRD_AX25:
288#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) 282#if IS_ENABLED(CONFIG_NETROM)
289 case ARPHRD_NETROM: 283 case ARPHRD_NETROM:
290#endif 284#endif
291 neigh->ops = &arp_broken_ops; 285 neigh->ops = &arp_broken_ops;
@@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
592 struct sk_buff *skb; 586 struct sk_buff *skb;
593 struct arphdr *arp; 587 struct arphdr *arp;
594 unsigned char *arp_ptr; 588 unsigned char *arp_ptr;
589 int hlen = LL_RESERVED_SPACE(dev);
590 int tlen = dev->needed_tailroom;
595 591
596 /* 592 /*
597 * Allocate a buffer 593 * Allocate a buffer
598 */ 594 */
599 595
600 skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 596 skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
601 if (skb == NULL) 597 if (skb == NULL)
602 return NULL; 598 return NULL;
603 599
604 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 600 skb_reserve(skb, hlen);
605 skb_reset_network_header(skb); 601 skb_reset_network_header(skb);
606 arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); 602 arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
607 skb->dev = dev; 603 skb->dev = dev;
@@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
633 arp->ar_pro = htons(ETH_P_IP); 629 arp->ar_pro = htons(ETH_P_IP);
634 break; 630 break;
635 631
636#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 632#if IS_ENABLED(CONFIG_AX25)
637 case ARPHRD_AX25: 633 case ARPHRD_AX25:
638 arp->ar_hrd = htons(ARPHRD_AX25); 634 arp->ar_hrd = htons(ARPHRD_AX25);
639 arp->ar_pro = htons(AX25_P_IP); 635 arp->ar_pro = htons(AX25_P_IP);
640 break; 636 break;
641 637
642#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) 638#if IS_ENABLED(CONFIG_NETROM)
643 case ARPHRD_NETROM: 639 case ARPHRD_NETROM:
644 arp->ar_hrd = htons(ARPHRD_NETROM); 640 arp->ar_hrd = htons(ARPHRD_NETROM);
645 arp->ar_pro = htons(AX25_P_IP); 641 arp->ar_pro = htons(AX25_P_IP);
@@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
647#endif 643#endif
648#endif 644#endif
649 645
650#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) 646#if IS_ENABLED(CONFIG_FDDI)
651 case ARPHRD_FDDI: 647 case ARPHRD_FDDI:
652 arp->ar_hrd = htons(ARPHRD_ETHER); 648 arp->ar_hrd = htons(ARPHRD_ETHER);
653 arp->ar_pro = htons(ETH_P_IP); 649 arp->ar_pro = htons(ETH_P_IP);
654 break; 650 break;
655#endif 651#endif
656#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) 652#if IS_ENABLED(CONFIG_TR)
657 case ARPHRD_IEEE802_TR: 653 case ARPHRD_IEEE802_TR:
658 arp->ar_hrd = htons(ARPHRD_IEEE802); 654 arp->ar_hrd = htons(ARPHRD_IEEE802);
659 arp->ar_pro = htons(ETH_P_IP); 655 arp->ar_pro = htons(ETH_P_IP);
@@ -1040,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1040 return -EINVAL; 1036 return -EINVAL;
1041 } 1037 }
1042 switch (dev->type) { 1038 switch (dev->type) {
1043#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) 1039#if IS_ENABLED(CONFIG_FDDI)
1044 case ARPHRD_FDDI: 1040 case ARPHRD_FDDI:
1045 /* 1041 /*
1046 * According to RFC 1390, FDDI devices should accept ARP 1042 * According to RFC 1390, FDDI devices should accept ARP
@@ -1286,7 +1282,7 @@ void __init arp_init(void)
1286} 1282}
1287 1283
1288#ifdef CONFIG_PROC_FS 1284#ifdef CONFIG_PROC_FS
1289#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 1285#if IS_ENABLED(CONFIG_AX25)
1290 1286
1291/* ------------------------------------------------------------------------ */ 1287/* ------------------------------------------------------------------------ */
1292/* 1288/*
@@ -1334,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1334 1330
1335 read_lock(&n->lock); 1331 read_lock(&n->lock);
1336 /* Convert hardware address to XX:XX:XX:XX ... form. */ 1332 /* Convert hardware address to XX:XX:XX:XX ... form. */
1337#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 1333#if IS_ENABLED(CONFIG_AX25)
1338 if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) 1334 if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
1339 ax2asc2((ax25_address *)n->ha, hbuffer); 1335 ax2asc2((ax25_address *)n->ha, hbuffer);
1340 else { 1336 else {
@@ -1347,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1347 if (k != 0) 1343 if (k != 0)
1348 --k; 1344 --k;
1349 hbuffer[k] = 0; 1345 hbuffer[k] = 0;
1350#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 1346#if IS_ENABLED(CONFIG_AX25)
1351 } 1347 }
1352#endif 1348#endif
1353 sprintf(tbuf, "%pI4", n->primary_key); 1349 sprintf(tbuf, "%pI4", n->primary_key);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 46339ba7a2d3..799fc790b3cf 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -67,6 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
67 67
68 return err; 68 return err;
69} 69}
70EXPORT_SYMBOL_GPL(fib_lookup);
70 71
71static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, 72static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
72 int flags, struct fib_lookup_arg *arg) 73 int flags, struct fib_lookup_arg *arg)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 37b671185c81..d04b13ae18fe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1607,6 +1607,7 @@ found:
1607 rcu_read_unlock(); 1607 rcu_read_unlock();
1608 return ret; 1608 return ret;
1609} 1609}
1610EXPORT_SYMBOL_GPL(fib_table_lookup);
1610 1611
1611/* 1612/*
1612 * Remove the leaf and return parent. 1613 * Remove the leaf and return parent.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b2ca095cb9da..fa057d105bef 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
304 struct igmpv3_report *pig; 304 struct igmpv3_report *pig;
305 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
306 struct flowi4 fl4; 306 struct flowi4 fl4;
307 int hlen = LL_RESERVED_SPACE(dev);
308 int tlen = dev->needed_tailroom;
307 309
308 while (1) { 310 while (1) {
309 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), 311 skb = alloc_skb(size + hlen + tlen,
310 GFP_ATOMIC | __GFP_NOWARN); 312 GFP_ATOMIC | __GFP_NOWARN);
311 if (skb) 313 if (skb)
312 break; 314 break;
@@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
327 skb_dst_set(skb, &rt->dst); 329 skb_dst_set(skb, &rt->dst);
328 skb->dev = dev; 330 skb->dev = dev;
329 331
330 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 332 skb_reserve(skb, hlen);
331 333
332 skb_reset_network_header(skb); 334 skb_reset_network_header(skb);
333 pip = ip_hdr(skb); 335 pip = ip_hdr(skb);
@@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
647 __be32 group = pmc ? pmc->multiaddr : 0; 649 __be32 group = pmc ? pmc->multiaddr : 0;
648 struct flowi4 fl4; 650 struct flowi4 fl4;
649 __be32 dst; 651 __be32 dst;
652 int hlen, tlen;
650 653
651 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 654 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
652 return igmpv3_send_report(in_dev, pmc); 655 return igmpv3_send_report(in_dev, pmc);
@@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
661 if (IS_ERR(rt)) 664 if (IS_ERR(rt))
662 return -1; 665 return -1;
663 666
664 skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 667 hlen = LL_RESERVED_SPACE(dev);
668 tlen = dev->needed_tailroom;
669 skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC);
665 if (skb == NULL) { 670 if (skb == NULL) {
666 ip_rt_put(rt); 671 ip_rt_put(rt);
667 return -1; 672 return -1;
@@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
669 674
670 skb_dst_set(skb, &rt->dst); 675 skb_dst_set(skb, &rt->dst);
671 676
672 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 677 skb_reserve(skb, hlen);
673 678
674 skb_reset_network_header(skb); 679 skb_reset_network_header(skb);
675 iph = ip_hdr(skb); 680 iph = ip_hdr(skb);
@@ -1574,7 +1579,7 @@ out_unlock:
1574 * Add multicast single-source filter to the interface list 1579 * Add multicast single-source filter to the interface list
1575 */ 1580 */
1576static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, 1581static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1577 __be32 *psfsrc, int delta) 1582 __be32 *psfsrc)
1578{ 1583{
1579 struct ip_sf_list *psf, *psf_prev; 1584 struct ip_sf_list *psf, *psf_prev;
1580 1585
@@ -1709,7 +1714,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1709 pmc->sfcount[sfmode]++; 1714 pmc->sfcount[sfmode]++;
1710 err = 0; 1715 err = 0;
1711 for (i=0; i<sfcount; i++) { 1716 for (i=0; i<sfcount; i++) {
1712 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); 1717 err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
1713 if (err) 1718 if (err)
1714 break; 1719 break;
1715 } 1720 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c14d88ad348d..2e4e24476c4c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
418 return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 418 return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
419} 419}
420 420
421#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 421#if IS_ENABLED(CONFIG_IPV6)
422#define AF_INET_FAMILY(fam) ((fam) == AF_INET) 422#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
423#else 423#else
424#define AF_INET_FAMILY(fam) 1 424#define AF_INET_FAMILY(fam) 1
@@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
588} 588}
589EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 589EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
590 590
591struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 591/**
592 const gfp_t priority) 592 * inet_csk_clone_lock - clone an inet socket, and lock its clone
593 * @sk: the socket to clone
594 * @req: request_sock
595 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
596 *
597 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
598 */
599struct sock *inet_csk_clone_lock(const struct sock *sk,
600 const struct request_sock *req,
601 const gfp_t priority)
593{ 602{
594 struct sock *newsk = sk_clone(sk, priority); 603 struct sock *newsk = sk_clone_lock(sk, priority);
595 604
596 if (newsk != NULL) { 605 if (newsk != NULL) {
597 struct inet_connection_sock *newicsk = inet_csk(newsk); 606 struct inet_connection_sock *newicsk = inet_csk(newsk);
@@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
615 } 624 }
616 return newsk; 625 return newsk;
617} 626}
618EXPORT_SYMBOL_GPL(inet_csk_clone); 627EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
619 628
620/* 629/*
621 * At this point, there should be no process reference to this 630 * At this point, there should be no process reference to this
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ccee270a9b65..2240a8e8c44d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
33#include <linux/stddef.h> 33#include <linux/stddef.h>
34 34
35#include <linux/inet_diag.h> 35#include <linux/inet_diag.h>
36#include <linux/sock_diag.h>
36 37
37static const struct inet_diag_handler **inet_diag_table; 38static const struct inet_diag_handler **inet_diag_table;
38 39
@@ -45,24 +46,22 @@ struct inet_diag_entry {
45 u16 userlocks; 46 u16 userlocks;
46}; 47};
47 48
48static struct sock *idiagnl;
49
50#define INET_DIAG_PUT(skb, attrtype, attrlen) \ 49#define INET_DIAG_PUT(skb, attrtype, attrlen) \
51 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) 50 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
52 51
53static DEFINE_MUTEX(inet_diag_table_mutex); 52static DEFINE_MUTEX(inet_diag_table_mutex);
54 53
55static const struct inet_diag_handler *inet_diag_lock_handler(int type) 54static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
56{ 55{
57 if (!inet_diag_table[type]) 56 if (!inet_diag_table[proto])
58 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, 57 request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
59 NETLINK_INET_DIAG, type); 58 NETLINK_SOCK_DIAG, AF_INET, proto);
60 59
61 mutex_lock(&inet_diag_table_mutex); 60 mutex_lock(&inet_diag_table_mutex);
62 if (!inet_diag_table[type]) 61 if (!inet_diag_table[proto])
63 return ERR_PTR(-ENOENT); 62 return ERR_PTR(-ENOENT);
64 63
65 return inet_diag_table[type]; 64 return inet_diag_table[proto];
66} 65}
67 66
68static inline void inet_diag_unlock_handler( 67static inline void inet_diag_unlock_handler(
@@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler(
71 mutex_unlock(&inet_diag_table_mutex); 70 mutex_unlock(&inet_diag_table_mutex);
72} 71}
73 72
74static int inet_csk_diag_fill(struct sock *sk, 73int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
75 struct sk_buff *skb, 74 struct sk_buff *skb, struct inet_diag_req *req,
76 int ext, u32 pid, u32 seq, u16 nlmsg_flags, 75 u32 pid, u32 seq, u16 nlmsg_flags,
77 const struct nlmsghdr *unlh) 76 const struct nlmsghdr *unlh)
78{ 77{
79 const struct inet_sock *inet = inet_sk(sk); 78 const struct inet_sock *inet = inet_sk(sk);
80 const struct inet_connection_sock *icsk = inet_csk(sk);
81 struct inet_diag_msg *r; 79 struct inet_diag_msg *r;
82 struct nlmsghdr *nlh; 80 struct nlmsghdr *nlh;
83 void *info = NULL; 81 void *info = NULL;
84 struct inet_diag_meminfo *minfo = NULL; 82 struct inet_diag_meminfo *minfo = NULL;
85 unsigned char *b = skb_tail_pointer(skb); 83 unsigned char *b = skb_tail_pointer(skb);
86 const struct inet_diag_handler *handler; 84 const struct inet_diag_handler *handler;
85 int ext = req->idiag_ext;
87 86
88 handler = inet_diag_table[unlh->nlmsg_type]; 87 handler = inet_diag_table[req->sdiag_protocol];
89 BUG_ON(handler == NULL); 88 BUG_ON(handler == NULL);
90 89
91 nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); 90 nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
@@ -97,25 +96,13 @@ static int inet_csk_diag_fill(struct sock *sk,
97 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) 96 if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
98 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); 97 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
99 98
100 if (ext & (1 << (INET_DIAG_INFO - 1)))
101 info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
102 handler->idiag_info_size);
103
104 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
105 const size_t len = strlen(icsk->icsk_ca_ops->name);
106
107 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
108 icsk->icsk_ca_ops->name);
109 }
110
111 r->idiag_family = sk->sk_family; 99 r->idiag_family = sk->sk_family;
112 r->idiag_state = sk->sk_state; 100 r->idiag_state = sk->sk_state;
113 r->idiag_timer = 0; 101 r->idiag_timer = 0;
114 r->idiag_retrans = 0; 102 r->idiag_retrans = 0;
115 103
116 r->id.idiag_if = sk->sk_bound_dev_if; 104 r->id.idiag_if = sk->sk_bound_dev_if;
117 r->id.idiag_cookie[0] = (u32)(unsigned long)sk; 105 sock_diag_save_cookie(sk, r->id.idiag_cookie);
118 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
119 106
120 r->id.idiag_sport = inet->inet_sport; 107 r->id.idiag_sport = inet->inet_sport;
121 r->id.idiag_dport = inet->inet_dport; 108 r->id.idiag_dport = inet->inet_dport;
@@ -128,20 +115,36 @@ static int inet_csk_diag_fill(struct sock *sk,
128 if (ext & (1 << (INET_DIAG_TOS - 1))) 115 if (ext & (1 << (INET_DIAG_TOS - 1)))
129 RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); 116 RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
130 117
131#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 118#if IS_ENABLED(CONFIG_IPV6)
132 if (r->idiag_family == AF_INET6) { 119 if (r->idiag_family == AF_INET6) {
133 const struct ipv6_pinfo *np = inet6_sk(sk); 120 const struct ipv6_pinfo *np = inet6_sk(sk);
134 121
122 *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
123 *(struct in6_addr *)r->id.idiag_dst = np->daddr;
135 if (ext & (1 << (INET_DIAG_TCLASS - 1))) 124 if (ext & (1 << (INET_DIAG_TCLASS - 1)))
136 RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); 125 RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
137
138 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
139 &np->rcv_saddr);
140 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
141 &np->daddr);
142 } 126 }
143#endif 127#endif
144 128
129 r->idiag_uid = sock_i_uid(sk);
130 r->idiag_inode = sock_i_ino(sk);
131
132 if (minfo) {
133 minfo->idiag_rmem = sk_rmem_alloc_get(sk);
134 minfo->idiag_wmem = sk->sk_wmem_queued;
135 minfo->idiag_fmem = sk->sk_forward_alloc;
136 minfo->idiag_tmem = sk_wmem_alloc_get(sk);
137 }
138
139 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
140 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
141 goto rtattr_failure;
142
143 if (icsk == NULL) {
144 r->idiag_rqueue = r->idiag_wqueue = 0;
145 goto out;
146 }
147
145#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) 148#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
146 149
147 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 150 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
@@ -162,14 +165,14 @@ static int inet_csk_diag_fill(struct sock *sk,
162 } 165 }
163#undef EXPIRES_IN_MS 166#undef EXPIRES_IN_MS
164 167
165 r->idiag_uid = sock_i_uid(sk); 168 if (ext & (1 << (INET_DIAG_INFO - 1)))
166 r->idiag_inode = sock_i_ino(sk); 169 info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
167 170
168 if (minfo) { 171 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
169 minfo->idiag_rmem = sk_rmem_alloc_get(sk); 172 const size_t len = strlen(icsk->icsk_ca_ops->name);
170 minfo->idiag_wmem = sk->sk_wmem_queued; 173
171 minfo->idiag_fmem = sk->sk_forward_alloc; 174 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
172 minfo->idiag_tmem = sk_wmem_alloc_get(sk); 175 icsk->icsk_ca_ops->name);
173 } 176 }
174 177
175 handler->idiag_get_info(sk, r, info); 178 handler->idiag_get_info(sk, r, info);
@@ -178,6 +181,7 @@ static int inet_csk_diag_fill(struct sock *sk,
178 icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) 181 icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
179 icsk->icsk_ca_ops->get_info(sk, ext, skb); 182 icsk->icsk_ca_ops->get_info(sk, ext, skb);
180 183
184out:
181 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 185 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
182 return skb->len; 186 return skb->len;
183 187
@@ -186,10 +190,20 @@ nlmsg_failure:
186 nlmsg_trim(skb, b); 190 nlmsg_trim(skb, b);
187 return -EMSGSIZE; 191 return -EMSGSIZE;
188} 192}
193EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
194
195static int inet_csk_diag_fill(struct sock *sk,
196 struct sk_buff *skb, struct inet_diag_req *req,
197 u32 pid, u32 seq, u16 nlmsg_flags,
198 const struct nlmsghdr *unlh)
199{
200 return inet_sk_diag_fill(sk, inet_csk(sk),
201 skb, req, pid, seq, nlmsg_flags, unlh);
202}
189 203
190static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, 204static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
191 struct sk_buff *skb, int ext, u32 pid, 205 struct sk_buff *skb, struct inet_diag_req *req,
192 u32 seq, u16 nlmsg_flags, 206 u32 pid, u32 seq, u16 nlmsg_flags,
193 const struct nlmsghdr *unlh) 207 const struct nlmsghdr *unlh)
194{ 208{
195 long tmo; 209 long tmo;
@@ -210,8 +224,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
210 r->idiag_family = tw->tw_family; 224 r->idiag_family = tw->tw_family;
211 r->idiag_retrans = 0; 225 r->idiag_retrans = 0;
212 r->id.idiag_if = tw->tw_bound_dev_if; 226 r->id.idiag_if = tw->tw_bound_dev_if;
213 r->id.idiag_cookie[0] = (u32)(unsigned long)tw; 227 sock_diag_save_cookie(tw, r->id.idiag_cookie);
214 r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
215 r->id.idiag_sport = tw->tw_sport; 228 r->id.idiag_sport = tw->tw_sport;
216 r->id.idiag_dport = tw->tw_dport; 229 r->id.idiag_dport = tw->tw_dport;
217 r->id.idiag_src[0] = tw->tw_rcv_saddr; 230 r->id.idiag_src[0] = tw->tw_rcv_saddr;
@@ -223,15 +236,13 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
223 r->idiag_wqueue = 0; 236 r->idiag_wqueue = 0;
224 r->idiag_uid = 0; 237 r->idiag_uid = 0;
225 r->idiag_inode = 0; 238 r->idiag_inode = 0;
226#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 239#if IS_ENABLED(CONFIG_IPV6)
227 if (tw->tw_family == AF_INET6) { 240 if (tw->tw_family == AF_INET6) {
228 const struct inet6_timewait_sock *tw6 = 241 const struct inet6_timewait_sock *tw6 =
229 inet6_twsk((struct sock *)tw); 242 inet6_twsk((struct sock *)tw);
230 243
231 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 244 *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
232 &tw6->tw_v6_rcv_saddr); 245 *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
233 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
234 &tw6->tw_v6_daddr);
235 } 246 }
236#endif 247#endif
237 nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; 248 nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -242,42 +253,31 @@ nlmsg_failure:
242} 253}
243 254
244static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, 255static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
245 int ext, u32 pid, u32 seq, u16 nlmsg_flags, 256 struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
246 const struct nlmsghdr *unlh) 257 const struct nlmsghdr *unlh)
247{ 258{
248 if (sk->sk_state == TCP_TIME_WAIT) 259 if (sk->sk_state == TCP_TIME_WAIT)
249 return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, 260 return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
250 skb, ext, pid, seq, nlmsg_flags, 261 skb, r, pid, seq, nlmsg_flags,
251 unlh); 262 unlh);
252 return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); 263 return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
253} 264}
254 265
255static int inet_diag_get_exact(struct sk_buff *in_skb, 266int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
256 const struct nlmsghdr *nlh) 267 const struct nlmsghdr *nlh, struct inet_diag_req *req)
257{ 268{
258 int err; 269 int err;
259 struct sock *sk; 270 struct sock *sk;
260 struct inet_diag_req *req = NLMSG_DATA(nlh);
261 struct sk_buff *rep; 271 struct sk_buff *rep;
262 struct inet_hashinfo *hashinfo;
263 const struct inet_diag_handler *handler;
264 272
265 handler = inet_diag_lock_handler(nlh->nlmsg_type);
266 if (IS_ERR(handler)) {
267 err = PTR_ERR(handler);
268 goto unlock;
269 }
270
271 hashinfo = handler->idiag_hashinfo;
272 err = -EINVAL; 273 err = -EINVAL;
273 274 if (req->sdiag_family == AF_INET) {
274 if (req->idiag_family == AF_INET) {
275 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], 275 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
276 req->id.idiag_dport, req->id.idiag_src[0], 276 req->id.idiag_dport, req->id.idiag_src[0],
277 req->id.idiag_sport, req->id.idiag_if); 277 req->id.idiag_sport, req->id.idiag_if);
278 } 278 }
279#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 279#if IS_ENABLED(CONFIG_IPV6)
280 else if (req->idiag_family == AF_INET6) { 280 else if (req->sdiag_family == AF_INET6) {
281 sk = inet6_lookup(&init_net, hashinfo, 281 sk = inet6_lookup(&init_net, hashinfo,
282 (struct in6_addr *)req->id.idiag_dst, 282 (struct in6_addr *)req->id.idiag_dst,
283 req->id.idiag_dport, 283 req->id.idiag_dport,
@@ -287,29 +287,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
287 } 287 }
288#endif 288#endif
289 else { 289 else {
290 goto unlock; 290 goto out_nosk;
291 } 291 }
292 292
293 err = -ENOENT; 293 err = -ENOENT;
294 if (sk == NULL) 294 if (sk == NULL)
295 goto unlock; 295 goto out_nosk;
296 296
297 err = -ESTALE; 297 err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
298 if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || 298 if (err)
299 req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
300 ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
301 (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
302 goto out; 299 goto out;
303 300
304 err = -ENOMEM; 301 err = -ENOMEM;
305 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + 302 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
306 sizeof(struct inet_diag_meminfo) + 303 sizeof(struct inet_diag_meminfo) +
307 handler->idiag_info_size + 64)), 304 sizeof(struct tcp_info) + 64)),
308 GFP_KERNEL); 305 GFP_KERNEL);
309 if (!rep) 306 if (!rep)
310 goto out; 307 goto out;
311 308
312 err = sk_diag_fill(sk, rep, req->idiag_ext, 309 err = sk_diag_fill(sk, rep, req,
313 NETLINK_CB(in_skb).pid, 310 NETLINK_CB(in_skb).pid,
314 nlh->nlmsg_seq, 0, nlh); 311 nlh->nlmsg_seq, 0, nlh);
315 if (err < 0) { 312 if (err < 0) {
@@ -317,7 +314,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
317 kfree_skb(rep); 314 kfree_skb(rep);
318 goto out; 315 goto out;
319 } 316 }
320 err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, 317 err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
321 MSG_DONTWAIT); 318 MSG_DONTWAIT);
322 if (err > 0) 319 if (err > 0)
323 err = 0; 320 err = 0;
@@ -329,8 +326,25 @@ out:
329 else 326 else
330 sock_put(sk); 327 sock_put(sk);
331 } 328 }
332unlock: 329out_nosk:
330 return err;
331}
332EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
333
334static int inet_diag_get_exact(struct sk_buff *in_skb,
335 const struct nlmsghdr *nlh,
336 struct inet_diag_req *req)
337{
338 const struct inet_diag_handler *handler;
339 int err;
340
341 handler = inet_diag_lock_handler(req->sdiag_protocol);
342 if (IS_ERR(handler))
343 err = PTR_ERR(handler);
344 else
345 err = handler->dump_one(in_skb, nlh, req);
333 inet_diag_unlock_handler(handler); 346 inet_diag_unlock_handler(handler);
347
334 return err; 348 return err;
335} 349}
336 350
@@ -361,9 +375,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
361} 375}
362 376
363 377
364static int inet_diag_bc_run(const void *bc, int len, 378static int inet_diag_bc_run(const struct nlattr *_bc,
365 const struct inet_diag_entry *entry) 379 const struct inet_diag_entry *entry)
366{ 380{
381 const void *bc = nla_data(_bc);
382 int len = nla_len(_bc);
383
367 while (len > 0) { 384 while (len > 0) {
368 int yes = 1; 385 int yes = 1;
369 const struct inet_diag_bc_op *op = bc; 386 const struct inet_diag_bc_op *op = bc;
@@ -437,6 +454,35 @@ static int inet_diag_bc_run(const void *bc, int len,
437 return len == 0; 454 return len == 0;
438} 455}
439 456
457int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
458{
459 struct inet_diag_entry entry;
460 struct inet_sock *inet = inet_sk(sk);
461
462 if (bc == NULL)
463 return 1;
464
465 entry.family = sk->sk_family;
466#if IS_ENABLED(CONFIG_IPV6)
467 if (entry.family == AF_INET6) {
468 struct ipv6_pinfo *np = inet6_sk(sk);
469
470 entry.saddr = np->rcv_saddr.s6_addr32;
471 entry.daddr = np->daddr.s6_addr32;
472 } else
473#endif
474 {
475 entry.saddr = &inet->inet_rcv_saddr;
476 entry.daddr = &inet->inet_daddr;
477 }
478 entry.sport = inet->inet_num;
479 entry.dport = ntohs(inet->inet_dport);
480 entry.userlocks = sk->sk_userlocks;
481
482 return inet_diag_bc_run(bc, &entry);
483}
484EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
485
440static int valid_cc(const void *bc, int len, int cc) 486static int valid_cc(const void *bc, int len, int cc)
441{ 487{
442 while (len >= 0) { 488 while (len >= 0) {
@@ -493,57 +539,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
493 539
494static int inet_csk_diag_dump(struct sock *sk, 540static int inet_csk_diag_dump(struct sock *sk,
495 struct sk_buff *skb, 541 struct sk_buff *skb,
496 struct netlink_callback *cb) 542 struct netlink_callback *cb,
543 struct inet_diag_req *r,
544 const struct nlattr *bc)
497{ 545{
498 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 546 if (!inet_diag_bc_sk(bc, sk))
499 547 return 0;
500 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
501 struct inet_diag_entry entry;
502 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
503 sizeof(*r),
504 INET_DIAG_REQ_BYTECODE);
505 struct inet_sock *inet = inet_sk(sk);
506
507 entry.family = sk->sk_family;
508#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
509 if (entry.family == AF_INET6) {
510 struct ipv6_pinfo *np = inet6_sk(sk);
511
512 entry.saddr = np->rcv_saddr.s6_addr32;
513 entry.daddr = np->daddr.s6_addr32;
514 } else
515#endif
516 {
517 entry.saddr = &inet->inet_rcv_saddr;
518 entry.daddr = &inet->inet_daddr;
519 }
520 entry.sport = inet->inet_num;
521 entry.dport = ntohs(inet->inet_dport);
522 entry.userlocks = sk->sk_userlocks;
523 548
524 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) 549 return inet_csk_diag_fill(sk, skb, r,
525 return 0;
526 }
527
528 return inet_csk_diag_fill(sk, skb, r->idiag_ext,
529 NETLINK_CB(cb->skb).pid, 550 NETLINK_CB(cb->skb).pid,
530 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 551 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
531} 552}
532 553
533static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, 554static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
534 struct sk_buff *skb, 555 struct sk_buff *skb,
535 struct netlink_callback *cb) 556 struct netlink_callback *cb,
557 struct inet_diag_req *r,
558 const struct nlattr *bc)
536{ 559{
537 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 560 if (bc != NULL) {
538
539 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
540 struct inet_diag_entry entry; 561 struct inet_diag_entry entry;
541 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
542 sizeof(*r),
543 INET_DIAG_REQ_BYTECODE);
544 562
545 entry.family = tw->tw_family; 563 entry.family = tw->tw_family;
546#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 564#if IS_ENABLED(CONFIG_IPV6)
547 if (tw->tw_family == AF_INET6) { 565 if (tw->tw_family == AF_INET6) {
548 struct inet6_timewait_sock *tw6 = 566 struct inet6_timewait_sock *tw6 =
549 inet6_twsk((struct sock *)tw); 567 inet6_twsk((struct sock *)tw);
@@ -559,11 +577,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
559 entry.dport = ntohs(tw->tw_dport); 577 entry.dport = ntohs(tw->tw_dport);
560 entry.userlocks = 0; 578 entry.userlocks = 0;
561 579
562 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) 580 if (!inet_diag_bc_run(bc, &entry))
563 return 0; 581 return 0;
564 } 582 }
565 583
566 return inet_twsk_diag_fill(tw, skb, r->idiag_ext, 584 return inet_twsk_diag_fill(tw, skb, r,
567 NETLINK_CB(cb->skb).pid, 585 NETLINK_CB(cb->skb).pid,
568 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 586 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
569} 587}
@@ -589,8 +607,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
589 r->idiag_retrans = req->retrans; 607 r->idiag_retrans = req->retrans;
590 608
591 r->id.idiag_if = sk->sk_bound_dev_if; 609 r->id.idiag_if = sk->sk_bound_dev_if;
592 r->id.idiag_cookie[0] = (u32)(unsigned long)req; 610 sock_diag_save_cookie(req, r->id.idiag_cookie);
593 r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
594 611
595 tmo = req->expires - jiffies; 612 tmo = req->expires - jiffies;
596 if (tmo < 0) 613 if (tmo < 0)
@@ -605,12 +622,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
605 r->idiag_wqueue = 0; 622 r->idiag_wqueue = 0;
606 r->idiag_uid = sock_i_uid(sk); 623 r->idiag_uid = sock_i_uid(sk);
607 r->idiag_inode = 0; 624 r->idiag_inode = 0;
608#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 625#if IS_ENABLED(CONFIG_IPV6)
609 if (r->idiag_family == AF_INET6) { 626 if (r->idiag_family == AF_INET6) {
610 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 627 *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
611 &inet6_rsk(req)->loc_addr); 628 *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
612 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
613 &inet6_rsk(req)->rmt_addr);
614 } 629 }
615#endif 630#endif
616 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 631 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -623,13 +638,13 @@ nlmsg_failure:
623} 638}
624 639
625static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 640static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
626 struct netlink_callback *cb) 641 struct netlink_callback *cb,
642 struct inet_diag_req *r,
643 const struct nlattr *bc)
627{ 644{
628 struct inet_diag_entry entry; 645 struct inet_diag_entry entry;
629 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
630 struct inet_connection_sock *icsk = inet_csk(sk); 646 struct inet_connection_sock *icsk = inet_csk(sk);
631 struct listen_sock *lopt; 647 struct listen_sock *lopt;
632 const struct nlattr *bc = NULL;
633 struct inet_sock *inet = inet_sk(sk); 648 struct inet_sock *inet = inet_sk(sk);
634 int j, s_j; 649 int j, s_j;
635 int reqnum, s_reqnum; 650 int reqnum, s_reqnum;
@@ -649,9 +664,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
649 if (!lopt || !lopt->qlen) 664 if (!lopt || !lopt->qlen)
650 goto out; 665 goto out;
651 666
652 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { 667 if (bc != NULL) {
653 bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
654 INET_DIAG_REQ_BYTECODE);
655 entry.sport = inet->inet_num; 668 entry.sport = inet->inet_num;
656 entry.userlocks = sk->sk_userlocks; 669 entry.userlocks = sk->sk_userlocks;
657 } 670 }
@@ -671,21 +684,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
671 684
672 if (bc) { 685 if (bc) {
673 entry.saddr = 686 entry.saddr =
674#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 687#if IS_ENABLED(CONFIG_IPV6)
675 (entry.family == AF_INET6) ? 688 (entry.family == AF_INET6) ?
676 inet6_rsk(req)->loc_addr.s6_addr32 : 689 inet6_rsk(req)->loc_addr.s6_addr32 :
677#endif 690#endif
678 &ireq->loc_addr; 691 &ireq->loc_addr;
679 entry.daddr = 692 entry.daddr =
680#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 693#if IS_ENABLED(CONFIG_IPV6)
681 (entry.family == AF_INET6) ? 694 (entry.family == AF_INET6) ?
682 inet6_rsk(req)->rmt_addr.s6_addr32 : 695 inet6_rsk(req)->rmt_addr.s6_addr32 :
683#endif 696#endif
684 &ireq->rmt_addr; 697 &ireq->rmt_addr;
685 entry.dport = ntohs(ireq->rmt_port); 698 entry.dport = ntohs(ireq->rmt_port);
686 699
687 if (!inet_diag_bc_run(nla_data(bc), 700 if (!inet_diag_bc_run(bc, &entry))
688 nla_len(bc), &entry))
689 continue; 701 continue;
690 } 702 }
691 703
@@ -708,19 +720,11 @@ out:
708 return err; 720 return err;
709} 721}
710 722
711static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 723void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
724 struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
712{ 725{
713 int i, num; 726 int i, num;
714 int s_i, s_num; 727 int s_i, s_num;
715 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
716 const struct inet_diag_handler *handler;
717 struct inet_hashinfo *hashinfo;
718
719 handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
720 if (IS_ERR(handler))
721 goto unlock;
722
723 hashinfo = handler->idiag_hashinfo;
724 728
725 s_i = cb->args[1]; 729 s_i = cb->args[1];
726 s_num = num = cb->args[2]; 730 s_num = num = cb->args[2];
@@ -745,6 +749,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
745 continue; 749 continue;
746 } 750 }
747 751
752 if (r->sdiag_family != AF_UNSPEC &&
753 sk->sk_family != r->sdiag_family)
754 goto next_listen;
755
748 if (r->id.idiag_sport != inet->inet_sport && 756 if (r->id.idiag_sport != inet->inet_sport &&
749 r->id.idiag_sport) 757 r->id.idiag_sport)
750 goto next_listen; 758 goto next_listen;
@@ -754,7 +762,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
754 cb->args[3] > 0) 762 cb->args[3] > 0)
755 goto syn_recv; 763 goto syn_recv;
756 764
757 if (inet_csk_diag_dump(sk, skb, cb) < 0) { 765 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
758 spin_unlock_bh(&ilb->lock); 766 spin_unlock_bh(&ilb->lock);
759 goto done; 767 goto done;
760 } 768 }
@@ -763,7 +771,7 @@ syn_recv:
763 if (!(r->idiag_states & TCPF_SYN_RECV)) 771 if (!(r->idiag_states & TCPF_SYN_RECV))
764 goto next_listen; 772 goto next_listen;
765 773
766 if (inet_diag_dump_reqs(skb, sk, cb) < 0) { 774 if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
767 spin_unlock_bh(&ilb->lock); 775 spin_unlock_bh(&ilb->lock);
768 goto done; 776 goto done;
769 } 777 }
@@ -785,7 +793,7 @@ skip_listen_ht:
785 } 793 }
786 794
787 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 795 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
788 goto unlock; 796 goto out;
789 797
790 for (i = s_i; i <= hashinfo->ehash_mask; i++) { 798 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
791 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 799 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
@@ -810,13 +818,16 @@ skip_listen_ht:
810 goto next_normal; 818 goto next_normal;
811 if (!(r->idiag_states & (1 << sk->sk_state))) 819 if (!(r->idiag_states & (1 << sk->sk_state)))
812 goto next_normal; 820 goto next_normal;
821 if (r->sdiag_family != AF_UNSPEC &&
822 sk->sk_family != r->sdiag_family)
823 goto next_normal;
813 if (r->id.idiag_sport != inet->inet_sport && 824 if (r->id.idiag_sport != inet->inet_sport &&
814 r->id.idiag_sport) 825 r->id.idiag_sport)
815 goto next_normal; 826 goto next_normal;
816 if (r->id.idiag_dport != inet->inet_dport && 827 if (r->id.idiag_dport != inet->inet_dport &&
817 r->id.idiag_dport) 828 r->id.idiag_dport)
818 goto next_normal; 829 goto next_normal;
819 if (inet_csk_diag_dump(sk, skb, cb) < 0) { 830 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
820 spin_unlock_bh(lock); 831 spin_unlock_bh(lock);
821 goto done; 832 goto done;
822 } 833 }
@@ -832,13 +843,16 @@ next_normal:
832 843
833 if (num < s_num) 844 if (num < s_num)
834 goto next_dying; 845 goto next_dying;
846 if (r->sdiag_family != AF_UNSPEC &&
847 tw->tw_family != r->sdiag_family)
848 goto next_dying;
835 if (r->id.idiag_sport != tw->tw_sport && 849 if (r->id.idiag_sport != tw->tw_sport &&
836 r->id.idiag_sport) 850 r->id.idiag_sport)
837 goto next_dying; 851 goto next_dying;
838 if (r->id.idiag_dport != tw->tw_dport && 852 if (r->id.idiag_dport != tw->tw_dport &&
839 r->id.idiag_dport) 853 r->id.idiag_dport)
840 goto next_dying; 854 goto next_dying;
841 if (inet_twsk_diag_dump(tw, skb, cb) < 0) { 855 if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
842 spin_unlock_bh(lock); 856 spin_unlock_bh(lock);
843 goto done; 857 goto done;
844 } 858 }
@@ -852,15 +866,85 @@ next_dying:
852done: 866done:
853 cb->args[1] = i; 867 cb->args[1] = i;
854 cb->args[2] = num; 868 cb->args[2] = num;
855unlock: 869out:
870 ;
871}
872EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
873
874static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
875 struct inet_diag_req *r, struct nlattr *bc)
876{
877 const struct inet_diag_handler *handler;
878
879 handler = inet_diag_lock_handler(r->sdiag_protocol);
880 if (!IS_ERR(handler))
881 handler->dump(skb, cb, r, bc);
856 inet_diag_unlock_handler(handler); 882 inet_diag_unlock_handler(handler);
883
857 return skb->len; 884 return skb->len;
858} 885}
859 886
860static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 887static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
861{ 888{
889 struct nlattr *bc = NULL;
862 int hdrlen = sizeof(struct inet_diag_req); 890 int hdrlen = sizeof(struct inet_diag_req);
863 891
892 if (nlmsg_attrlen(cb->nlh, hdrlen))
893 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
894
895 return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
896}
897
898static inline int inet_diag_type2proto(int type)
899{
900 switch (type) {
901 case TCPDIAG_GETSOCK:
902 return IPPROTO_TCP;
903 case DCCPDIAG_GETSOCK:
904 return IPPROTO_DCCP;
905 default:
906 return 0;
907 }
908}
909
910static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
911{
912 struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
913 struct inet_diag_req req;
914 struct nlattr *bc = NULL;
915 int hdrlen = sizeof(struct inet_diag_req_compat);
916
917 req.sdiag_family = AF_UNSPEC; /* compatibility */
918 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
919 req.idiag_ext = rc->idiag_ext;
920 req.idiag_states = rc->idiag_states;
921 req.id = rc->id;
922
923 if (nlmsg_attrlen(cb->nlh, hdrlen))
924 bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
925
926 return __inet_diag_dump(skb, cb, &req, bc);
927}
928
929static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
930 const struct nlmsghdr *nlh)
931{
932 struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
933 struct inet_diag_req req;
934
935 req.sdiag_family = rc->idiag_family;
936 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
937 req.idiag_ext = rc->idiag_ext;
938 req.idiag_states = rc->idiag_states;
939 req.id = rc->id;
940
941 return inet_diag_get_exact(in_skb, nlh, &req);
942}
943
944static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
945{
946 int hdrlen = sizeof(struct inet_diag_req_compat);
947
864 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || 948 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
865 nlmsg_len(nlh) < hdrlen) 949 nlmsg_len(nlh) < hdrlen)
866 return -EINVAL; 950 return -EINVAL;
@@ -877,28 +961,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
877 return -EINVAL; 961 return -EINVAL;
878 } 962 }
879 963
880 return netlink_dump_start(idiagnl, skb, nlh, 964 return netlink_dump_start(sock_diag_nlsk, skb, nlh,
881 inet_diag_dump, NULL, 0); 965 inet_diag_dump_compat, NULL, 0);
882 } 966 }
883 967
884 return inet_diag_get_exact(skb, nlh); 968 return inet_diag_get_exact_compat(skb, nlh);
885} 969}
886 970
887static DEFINE_MUTEX(inet_diag_mutex); 971static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
888
889static void inet_diag_rcv(struct sk_buff *skb)
890{ 972{
891 mutex_lock(&inet_diag_mutex); 973 int hdrlen = sizeof(struct inet_diag_req);
892 netlink_rcv_skb(skb, &inet_diag_rcv_msg); 974
893 mutex_unlock(&inet_diag_mutex); 975 if (nlmsg_len(h) < hdrlen)
976 return -EINVAL;
977
978 if (h->nlmsg_flags & NLM_F_DUMP) {
979 if (nlmsg_attrlen(h, hdrlen)) {
980 struct nlattr *attr;
981 attr = nlmsg_find_attr(h, hdrlen,
982 INET_DIAG_REQ_BYTECODE);
983 if (attr == NULL ||
984 nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
985 inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
986 return -EINVAL;
987 }
988
989 return netlink_dump_start(sock_diag_nlsk, skb, h,
990 inet_diag_dump, NULL, 0);
991 }
992
993 return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
894} 994}
895 995
996static struct sock_diag_handler inet_diag_handler = {
997 .family = AF_INET,
998 .dump = inet_diag_handler_dump,
999};
1000
1001static struct sock_diag_handler inet6_diag_handler = {
1002 .family = AF_INET6,
1003 .dump = inet_diag_handler_dump,
1004};
1005
896int inet_diag_register(const struct inet_diag_handler *h) 1006int inet_diag_register(const struct inet_diag_handler *h)
897{ 1007{
898 const __u16 type = h->idiag_type; 1008 const __u16 type = h->idiag_type;
899 int err = -EINVAL; 1009 int err = -EINVAL;
900 1010
901 if (type >= INET_DIAG_GETSOCK_MAX) 1011 if (type >= IPPROTO_MAX)
902 goto out; 1012 goto out;
903 1013
904 mutex_lock(&inet_diag_table_mutex); 1014 mutex_lock(&inet_diag_table_mutex);
@@ -917,7 +1027,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
917{ 1027{
918 const __u16 type = h->idiag_type; 1028 const __u16 type = h->idiag_type;
919 1029
920 if (type >= INET_DIAG_GETSOCK_MAX) 1030 if (type >= IPPROTO_MAX)
921 return; 1031 return;
922 1032
923 mutex_lock(&inet_diag_table_mutex); 1033 mutex_lock(&inet_diag_table_mutex);
@@ -928,7 +1038,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister);
928 1038
929static int __init inet_diag_init(void) 1039static int __init inet_diag_init(void)
930{ 1040{
931 const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * 1041 const int inet_diag_table_size = (IPPROTO_MAX *
932 sizeof(struct inet_diag_handler *)); 1042 sizeof(struct inet_diag_handler *));
933 int err = -ENOMEM; 1043 int err = -ENOMEM;
934 1044
@@ -936,25 +1046,35 @@ static int __init inet_diag_init(void)
936 if (!inet_diag_table) 1046 if (!inet_diag_table)
937 goto out; 1047 goto out;
938 1048
939 idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, 1049 err = sock_diag_register(&inet_diag_handler);
940 inet_diag_rcv, NULL, THIS_MODULE); 1050 if (err)
941 if (idiagnl == NULL) 1051 goto out_free_nl;
942 goto out_free_table; 1052
943 err = 0; 1053 err = sock_diag_register(&inet6_diag_handler);
1054 if (err)
1055 goto out_free_inet;
1056
1057 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
944out: 1058out:
945 return err; 1059 return err;
946out_free_table: 1060
1061out_free_inet:
1062 sock_diag_unregister(&inet_diag_handler);
1063out_free_nl:
947 kfree(inet_diag_table); 1064 kfree(inet_diag_table);
948 goto out; 1065 goto out;
949} 1066}
950 1067
951static void __exit inet_diag_exit(void) 1068static void __exit inet_diag_exit(void)
952{ 1069{
953 netlink_kernel_release(idiagnl); 1070 sock_diag_unregister(&inet6_diag_handler);
1071 sock_diag_unregister(&inet_diag_handler);
1072 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
954 kfree(inet_diag_table); 1073 kfree(inet_diag_table);
955} 1074}
956 1075
957module_init(inet_diag_init); 1076module_init(inet_diag_init);
958module_exit(inet_diag_exit); 1077module_exit(inet_diag_exit);
959MODULE_LICENSE("GPL"); 1078MODULE_LICENSE("GPL");
960MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); 1079MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
1080MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d55110e93120..2b53a1f7abf6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -46,7 +46,7 @@
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h> 47#include <net/gre.h>
48 48
49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if IS_ENABLED(CONFIG_IPV6)
50#include <net/ipv6.h> 50#include <net/ipv6.h>
51#include <net/ip6_fib.h> 51#include <net/ip6_fib.h>
52#include <net/ip6_route.h> 52#include <net/ip6_route.h>
@@ -171,7 +171,7 @@ struct pcpu_tstats {
171 unsigned long rx_bytes; 171 unsigned long rx_bytes;
172 unsigned long tx_packets; 172 unsigned long tx_packets;
173 unsigned long tx_bytes; 173 unsigned long tx_bytes;
174}; 174} __attribute__((aligned(4*sizeof(unsigned long))));
175 175
176static struct net_device_stats *ipgre_get_stats(struct net_device *dev) 176static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177{ 177{
@@ -729,9 +729,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
729 if ((dst = rt->rt_gateway) == 0) 729 if ((dst = rt->rt_gateway) == 0)
730 goto tx_error_icmp; 730 goto tx_error_icmp;
731 } 731 }
732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if IS_ENABLED(CONFIG_IPV6)
733 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
734 struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); 734 struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
735 const struct in6_addr *addr6; 735 const struct in6_addr *addr6;
736 int addr_type; 736 int addr_type;
737 737
@@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
799 goto tx_error; 799 goto tx_error;
800 } 800 }
801 } 801 }
802#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 802#if IS_ENABLED(CONFIG_IPV6)
803 else if (skb->protocol == htons(ETH_P_IPV6)) { 803 else if (skb->protocol == htons(ETH_P_IPV6)) {
804 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 804 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
805 805
@@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
835 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 835 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
836 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 836 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
837 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 837 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
838 if (max_headroom > dev->needed_headroom)
839 dev->needed_headroom = max_headroom;
838 if (!new_skb) { 840 if (!new_skb) {
839 ip_rt_put(rt); 841 ip_rt_put(rt);
840 dev->stats.tx_dropped++; 842 dev->stats.tx_dropped++;
@@ -873,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
873 if ((iph->ttl = tiph->ttl) == 0) { 875 if ((iph->ttl = tiph->ttl) == 0) {
874 if (skb->protocol == htons(ETH_P_IP)) 876 if (skb->protocol == htons(ETH_P_IP))
875 iph->ttl = old_iph->ttl; 877 iph->ttl = old_iph->ttl;
876#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 878#if IS_ENABLED(CONFIG_IPV6)
877 else if (skb->protocol == htons(ETH_P_IPV6)) 879 else if (skb->protocol == htons(ETH_P_IPV6))
878 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; 880 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
879#endif 881#endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0bc95f3977d2..ff302bde8890 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
206 } 206 }
207 207
208 rcu_read_lock(); 208 rcu_read_lock();
209 neigh = dst_get_neighbour(dst); 209 neigh = dst_get_neighbour_noref(dst);
210 if (neigh) { 210 if (neigh) {
211 int res = neigh_output(neigh, skb); 211 int res = neigh_output(neigh, skb);
212 212
@@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb)
319 !(IPCB(skb)->flags & IPSKB_REROUTED)); 319 !(IPCB(skb)->flags & IPSKB_REROUTED));
320} 320}
321 321
322/*
323 * copy saddr and daddr, possibly using 64bit load/stores
324 * Equivalent to :
325 * iph->saddr = fl4->saddr;
326 * iph->daddr = fl4->daddr;
327 */
328static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
329{
330 BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
331 offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
332 memcpy(&iph->saddr, &fl4->saddr,
333 sizeof(fl4->saddr) + sizeof(fl4->daddr));
334}
335
322int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) 336int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
323{ 337{
324 struct sock *sk = skb->sk; 338 struct sock *sk = skb->sk;
@@ -381,8 +395,8 @@ packet_routed:
381 iph->frag_off = 0; 395 iph->frag_off = 0;
382 iph->ttl = ip_select_ttl(inet, &rt->dst); 396 iph->ttl = ip_select_ttl(inet, &rt->dst);
383 iph->protocol = sk->sk_protocol; 397 iph->protocol = sk->sk_protocol;
384 iph->saddr = fl4->saddr; 398 ip_copy_addrs(iph, fl4);
385 iph->daddr = fl4->daddr; 399
386 /* Transport layer set skb->h.foo itself. */ 400 /* Transport layer set skb->h.foo itself. */
387 401
388 if (inet_opt && inet_opt->opt.optlen) { 402 if (inet_opt && inet_opt->opt.optlen) {
@@ -1337,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1337 ip_select_ident(iph, &rt->dst, sk); 1351 ip_select_ident(iph, &rt->dst, sk);
1338 iph->ttl = ttl; 1352 iph->ttl = ttl;
1339 iph->protocol = sk->sk_protocol; 1353 iph->protocol = sk->sk_protocol;
1340 iph->saddr = fl4->saddr; 1354 ip_copy_addrs(iph, fl4);
1341 iph->daddr = fl4->daddr;
1342 1355
1343 if (opt) { 1356 if (opt) {
1344 iph->ihl += opt->optlen>>2; 1357 iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 09ff51bf16a4..8aa87c19fa00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,7 +37,7 @@
37#include <net/route.h> 37#include <net/route.h>
38#include <net/xfrm.h> 38#include <net/xfrm.h>
39#include <net/compat.h> 39#include <net/compat.h>
40#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 40#if IS_ENABLED(CONFIG_IPV6)
41#include <net/transp_v6.h> 41#include <net/transp_v6.h>
42#endif 42#endif
43 43
@@ -55,20 +55,13 @@
55/* 55/*
56 * SOL_IP control messages. 56 * SOL_IP control messages.
57 */ 57 */
58#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
58 59
59static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 60static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
60{ 61{
61 struct in_pktinfo info; 62 struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
62 struct rtable *rt = skb_rtable(skb);
63 63
64 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 64 info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
65 if (rt) {
66 info.ipi_ifindex = rt->rt_iif;
67 info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
68 } else {
69 info.ipi_ifindex = 0;
70 info.ipi_spec_dst.s_addr = 0;
71 }
72 65
73 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 66 put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
74} 67}
@@ -515,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
515 sock_owned_by_user(sk)); 508 sock_owned_by_user(sk));
516 if (inet->is_icsk) { 509 if (inet->is_icsk) {
517 struct inet_connection_sock *icsk = inet_csk(sk); 510 struct inet_connection_sock *icsk = inet_csk(sk);
518#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 511#if IS_ENABLED(CONFIG_IPV6)
519 if (sk->sk_family == PF_INET || 512 if (sk->sk_family == PF_INET ||
520 (!((1 << sk->sk_state) & 513 (!((1 << sk->sk_state) &
521 (TCPF_LISTEN | TCPF_CLOSE)) && 514 (TCPF_LISTEN | TCPF_CLOSE)) &&
@@ -526,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
526 if (opt) 519 if (opt)
527 icsk->icsk_ext_hdr_len += opt->opt.optlen; 520 icsk->icsk_ext_hdr_len += opt->opt.optlen;
528 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 521 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
529#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 522#if IS_ENABLED(CONFIG_IPV6)
530 } 523 }
531#endif 524#endif
532 } 525 }
@@ -992,20 +985,28 @@ e_inval:
992} 985}
993 986
994/** 987/**
995 * ip_queue_rcv_skb - Queue an skb into sock receive queue 988 * ipv4_pktinfo_prepare - transfert some info from rtable to skb
996 * @sk: socket 989 * @sk: socket
997 * @skb: buffer 990 * @skb: buffer
998 * 991 *
999 * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option 992 * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
1000 * is not set, we drop skb dst entry now, while dst cache line is hot. 993 * in skb->cb[] before dst drop.
994 * This way, receiver doesnt make cache line misses to read rtable.
1001 */ 995 */
1002int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 996void ipv4_pktinfo_prepare(struct sk_buff *skb)
1003{ 997{
1004 if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) 998 struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1005 skb_dst_drop(skb); 999 const struct rtable *rt = skb_rtable(skb);
1006 return sock_queue_rcv_skb(sk, skb); 1000
1001 if (rt) {
1002 pktinfo->ipi_ifindex = rt->rt_iif;
1003 pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
1004 } else {
1005 pktinfo->ipi_ifindex = 0;
1006 pktinfo->ipi_spec_dst.s_addr = 0;
1007 }
1008 skb_dst_drop(skb);
1007} 1009}
1008EXPORT_SYMBOL(ip_queue_rcv_skb);
1009 1010
1010int ip_setsockopt(struct sock *sk, int level, 1011int ip_setsockopt(struct sock *sk, int level,
1011 int optname, char __user *optval, unsigned int optlen) 1012 int optname, char __user *optval, unsigned int optlen)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 99ec116bef14..7e4ec9fc2cef 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -767,13 +767,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
767 struct sk_buff *skb; 767 struct sk_buff *skb;
768 struct bootp_pkt *b; 768 struct bootp_pkt *b;
769 struct iphdr *h; 769 struct iphdr *h;
770 int hlen = LL_RESERVED_SPACE(dev);
771 int tlen = dev->needed_tailroom;
770 772
771 /* Allocate packet */ 773 /* Allocate packet */
772 skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, 774 skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15,
773 GFP_KERNEL); 775 GFP_KERNEL);
774 if (!skb) 776 if (!skb)
775 return; 777 return;
776 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 778 skb_reserve(skb, hlen);
777 b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); 779 b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
778 memset(b, 0, sizeof(struct bootp_pkt)); 780 memset(b, 0, sizeof(struct bootp_pkt));
779 781
@@ -826,8 +828,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
826 skb->dev = dev; 828 skb->dev = dev;
827 skb->protocol = htons(ETH_P_IP); 829 skb->protocol = htons(ETH_P_IP);
828 if (dev_hard_header(skb, dev, ntohs(skb->protocol), 830 if (dev_hard_header(skb, dev, ntohs(skb->protocol),
829 dev->broadcast, dev->dev_addr, skb->len) < 0 || 831 dev->broadcast, dev->dev_addr, skb->len) < 0) {
830 dev_queue_xmit(skb) < 0) 832 kfree_skb(skb);
833 printk("E");
834 return;
835 }
836
837 if (dev_queue_xmit(skb) < 0)
831 printk("E"); 838 printk("E");
832} 839}
833 840
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0b2e7329abda..413ed1ba7a5a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,7 +148,7 @@ struct pcpu_tstats {
148 unsigned long rx_bytes; 148 unsigned long rx_bytes;
149 unsigned long tx_packets; 149 unsigned long tx_packets;
150 unsigned long tx_bytes; 150 unsigned long tx_bytes;
151}; 151} __attribute__((aligned(4*sizeof(unsigned long))));
152 152
153static struct net_device_stats *ipip_get_stats(struct net_device *dev) 153static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154{ 154{
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 76a7f07b38b6..8e54490ee3f4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1520,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1520 struct mr_table *mrt; 1520 struct mr_table *mrt;
1521 struct vif_device *v; 1521 struct vif_device *v;
1522 int ct; 1522 int ct;
1523 LIST_HEAD(list);
1524 1523
1525 if (event != NETDEV_UNREGISTER) 1524 if (event != NETDEV_UNREGISTER)
1526 return NOTIFY_DONE; 1525 return NOTIFY_DONE;
@@ -1529,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1529 v = &mrt->vif_table[0]; 1528 v = &mrt->vif_table[0];
1530 for (ct = 0; ct < mrt->maxvif; ct++, v++) { 1529 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1531 if (v->dev == dev) 1530 if (v->dev == dev)
1532 vif_delete(mrt, ct, 1, &list); 1531 vif_delete(mrt, ct, 1, NULL);
1533 } 1532 }
1534 } 1533 }
1535 unregister_netdevice_many(&list);
1536 return NOTIFY_DONE; 1534 return NOTIFY_DONE;
1537} 1535}
1538 1536
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f19f2182894c..74dfc9e5211f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -27,7 +27,7 @@ config NF_CONNTRACK_IPV4
27 27
28config NF_CONNTRACK_PROC_COMPAT 28config NF_CONNTRACK_PROC_COMPAT
29 bool "proc/sysctl compatibility with old connection tracking" 29 bool "proc/sysctl compatibility with old connection tracking"
30 depends on NF_CONNTRACK_IPV4 30 depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
31 default y 31 default y
32 help 32 help
33 This option enables /proc and sysctl compatibility with the old 33 This option enables /proc and sysctl compatibility with the old
@@ -76,11 +76,21 @@ config IP_NF_MATCH_AH
76config IP_NF_MATCH_ECN 76config IP_NF_MATCH_ECN
77 tristate '"ecn" match support' 77 tristate '"ecn" match support'
78 depends on NETFILTER_ADVANCED 78 depends on NETFILTER_ADVANCED
79 help 79 select NETFILTER_XT_MATCH_ECN
80 This option adds a `ECN' match, which allows you to match against 80 ---help---
81 the IPv4 and TCP header ECN fields. 81 This is a backwards-compat option for the user's convenience
82 (e.g. when running oldconfig). It selects
83 CONFIG_NETFILTER_XT_MATCH_ECN.
84
85config IP_NF_MATCH_RPFILTER
86 tristate '"rpfilter" reverse path filter match support'
87 depends on NETFILTER_ADVANCED
88 ---help---
89 This option allows you to match packets whose replies would
90 go out via the interface the packet came in.
82 91
83 To compile it as a module, choose M here. If unsure, say N. 92 To compile it as a module, choose M here. If unsure, say N.
93 The module will be called ipt_rpfilter.
84 94
85config IP_NF_MATCH_TTL 95config IP_NF_MATCH_TTL
86 tristate '"ttl" match support' 96 tristate '"ttl" match support'
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dca2082ec683..213a462b739b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
49 49
50# matches 50# matches
51obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 51obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
52obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 52obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
53 53
54# targets 54# targets
55obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o 55obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e59aabd0eae4..a057fe64debd 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
404 int status, type, pid, flags; 404 int status, type, pid, flags;
405 unsigned int nlmsglen, skblen; 405 unsigned int nlmsglen, skblen;
406 struct nlmsghdr *nlh; 406 struct nlmsghdr *nlh;
407 bool enable_timestamp = false;
407 408
408 skblen = skb->len; 409 skblen = skb->len;
409 if (skblen < sizeof(*nlh)) 410 if (skblen < sizeof(*nlh))
@@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb)
441 RCV_SKB_FAIL(-EBUSY); 442 RCV_SKB_FAIL(-EBUSY);
442 } 443 }
443 } else { 444 } else {
444 net_enable_timestamp(); 445 enable_timestamp = true;
445 peer_pid = pid; 446 peer_pid = pid;
446 } 447 }
447 448
448 spin_unlock_bh(&queue_lock); 449 spin_unlock_bh(&queue_lock);
449 450 if (enable_timestamp)
451 net_enable_timestamp();
450 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 452 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
451 nlmsglen - NLMSG_LENGTH(0)); 453 nlmsglen - NLMSG_LENGTH(0));
452 if (status < 0) 454 if (status < 0)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9931152a78b5..2f210c79dc87 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
30/* FIXME: Multiple targets. --RR */ 30/* FIXME: Multiple targets. --RR */
31static int masquerade_tg_check(const struct xt_tgchk_param *par) 31static int masquerade_tg_check(const struct xt_tgchk_param *par)
32{ 32{
33 const struct nf_nat_multi_range_compat *mr = par->targinfo; 33 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
34 34
35 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 35 if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
36 pr_debug("bad MAP_IPS.\n"); 36 pr_debug("bad MAP_IPS.\n");
37 return -EINVAL; 37 return -EINVAL;
38 } 38 }
@@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
49 struct nf_conn *ct; 49 struct nf_conn *ct;
50 struct nf_conn_nat *nat; 50 struct nf_conn_nat *nat;
51 enum ip_conntrack_info ctinfo; 51 enum ip_conntrack_info ctinfo;
52 struct nf_nat_range newrange; 52 struct nf_nat_ipv4_range newrange;
53 const struct nf_nat_multi_range_compat *mr; 53 const struct nf_nat_ipv4_multi_range_compat *mr;
54 const struct rtable *rt; 54 const struct rtable *rt;
55 __be32 newsrc; 55 __be32 newsrc;
56 56
@@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
79 nat->masq_index = par->out->ifindex; 79 nat->masq_index = par->out->ifindex;
80 80
81 /* Transfer from original range. */ 81 /* Transfer from original range. */
82 newrange = ((struct nf_nat_range) 82 newrange = ((struct nf_nat_ipv4_range)
83 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 83 { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
84 newsrc, newsrc, 84 newsrc, newsrc,
85 mr->range[0].min, mr->range[0].max }); 85 mr->range[0].min, mr->range[0].max });
86 86
87 /* Hand modified range to generic setup. */ 87 /* Hand modified range to generic setup. */
88 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); 88 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
89} 89}
90 90
91static int 91static int
@@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
139 .name = "MASQUERADE", 139 .name = "MASQUERADE",
140 .family = NFPROTO_IPV4, 140 .family = NFPROTO_IPV4,
141 .target = masquerade_tg, 141 .target = masquerade_tg,
142 .targetsize = sizeof(struct nf_nat_multi_range_compat), 142 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
143 .table = "nat", 143 .table = "nat",
144 .hooks = 1 << NF_INET_POST_ROUTING, 144 .hooks = 1 << NF_INET_POST_ROUTING,
145 .checkentry = masquerade_tg_check, 145 .checkentry = masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6cdb298f1035..b5bfbbabf70d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
24 24
25static int netmap_tg_check(const struct xt_tgchk_param *par) 25static int netmap_tg_check(const struct xt_tgchk_param *par)
26{ 26{
27 const struct nf_nat_multi_range_compat *mr = par->targinfo; 27 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
28 28
29 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { 29 if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
30 pr_debug("bad MAP_IPS.\n"); 30 pr_debug("bad MAP_IPS.\n");
31 return -EINVAL; 31 return -EINVAL;
32 } 32 }
@@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
43 struct nf_conn *ct; 43 struct nf_conn *ct;
44 enum ip_conntrack_info ctinfo; 44 enum ip_conntrack_info ctinfo;
45 __be32 new_ip, netmask; 45 __be32 new_ip, netmask;
46 const struct nf_nat_multi_range_compat *mr = par->targinfo; 46 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
47 struct nf_nat_range newrange; 47 struct nf_nat_ipv4_range newrange;
48 48
49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
50 par->hooknum == NF_INET_POST_ROUTING || 50 par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
61 new_ip = ip_hdr(skb)->saddr & ~netmask; 61 new_ip = ip_hdr(skb)->saddr & ~netmask;
62 new_ip |= mr->range[0].min_ip & netmask; 62 new_ip |= mr->range[0].min_ip & netmask;
63 63
64 newrange = ((struct nf_nat_range) 64 newrange = ((struct nf_nat_ipv4_range)
65 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 65 { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
66 new_ip, new_ip, 66 new_ip, new_ip,
67 mr->range[0].min, mr->range[0].max }); 67 mr->range[0].min, mr->range[0].max });
68 68
@@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = {
74 .name = "NETMAP", 74 .name = "NETMAP",
75 .family = NFPROTO_IPV4, 75 .family = NFPROTO_IPV4,
76 .target = netmap_tg, 76 .target = netmap_tg,
77 .targetsize = sizeof(struct nf_nat_multi_range_compat), 77 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
78 .table = "nat", 78 .table = "nat",
79 .hooks = (1 << NF_INET_PRE_ROUTING) | 79 .hooks = (1 << NF_INET_PRE_ROUTING) |
80 (1 << NF_INET_POST_ROUTING) | 80 (1 << NF_INET_POST_ROUTING) |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 18a0656505a0..7c0103a5203e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
28/* FIXME: Take multiple ranges --RR */ 28/* FIXME: Take multiple ranges --RR */
29static int redirect_tg_check(const struct xt_tgchk_param *par) 29static int redirect_tg_check(const struct xt_tgchk_param *par)
30{ 30{
31 const struct nf_nat_multi_range_compat *mr = par->targinfo; 31 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
32 32
33 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 33 if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
34 pr_debug("bad MAP_IPS.\n"); 34 pr_debug("bad MAP_IPS.\n");
35 return -EINVAL; 35 return -EINVAL;
36 } 36 }
@@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
47 struct nf_conn *ct; 47 struct nf_conn *ct;
48 enum ip_conntrack_info ctinfo; 48 enum ip_conntrack_info ctinfo;
49 __be32 newdst; 49 __be32 newdst;
50 const struct nf_nat_multi_range_compat *mr = par->targinfo; 50 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
51 struct nf_nat_range newrange; 51 struct nf_nat_ipv4_range newrange;
52 52
53 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 53 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
54 par->hooknum == NF_INET_LOCAL_OUT); 54 par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
76 } 76 }
77 77
78 /* Transfer from original range. */ 78 /* Transfer from original range. */
79 newrange = ((struct nf_nat_range) 79 newrange = ((struct nf_nat_ipv4_range)
80 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 80 { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
81 newdst, newdst, 81 newdst, newdst,
82 mr->range[0].min, mr->range[0].max }); 82 mr->range[0].min, mr->range[0].max });
83 83
84 /* Hand modified range to generic setup. */ 84 /* Hand modified range to generic setup. */
85 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST); 85 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
86} 86}
87 87
88static struct xt_target redirect_tg_reg __read_mostly = { 88static struct xt_target redirect_tg_reg __read_mostly = {
89 .name = "REDIRECT", 89 .name = "REDIRECT",
90 .family = NFPROTO_IPV4, 90 .family = NFPROTO_IPV4,
91 .target = redirect_tg, 91 .target = redirect_tg,
92 .targetsize = sizeof(struct nf_nat_multi_range_compat), 92 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
93 .table = "nat", 93 .table = "nat",
94 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), 94 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
95 .checkentry = redirect_tg_check, 95 .checkentry = redirect_tg_check,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5508151e547..ba5756d20165 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10;
65module_param(flushtimeout, uint, 0600); 65module_param(flushtimeout, uint, 0600);
66MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); 66MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
67 67
68static int nflog = 1; 68static bool nflog = true;
69module_param(nflog, bool, 0400); 69module_param(nflog, bool, 0400);
70MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); 70MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
71 71
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index 2b57e52c746c..000000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/* IP tables module for matching the value of the IPv4 and TCP ECN bits
2 *
3 * (C) 2002 by Harald Welte <laforge@gnumonks.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/in.h>
11#include <linux/ip.h>
12#include <net/ip.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/tcp.h>
16
17#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_ecn.h>
20
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
23MODULE_LICENSE("GPL");
24
25static inline bool match_ip(const struct sk_buff *skb,
26 const struct ipt_ecn_info *einfo)
27{
28 return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
29 !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
30}
31
32static inline bool match_tcp(const struct sk_buff *skb,
33 const struct ipt_ecn_info *einfo,
34 bool *hotdrop)
35{
36 struct tcphdr _tcph;
37 const struct tcphdr *th;
38
39 /* In practice, TCP match does this, so can't fail. But let's
40 * be good citizens.
41 */
42 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
43 if (th == NULL) {
44 *hotdrop = false;
45 return false;
46 }
47
48 if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
49 if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
50 if (th->ece == 1)
51 return false;
52 } else {
53 if (th->ece == 0)
54 return false;
55 }
56 }
57
58 if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
59 if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
60 if (th->cwr == 1)
61 return false;
62 } else {
63 if (th->cwr == 0)
64 return false;
65 }
66 }
67
68 return true;
69}
70
71static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
72{
73 const struct ipt_ecn_info *info = par->matchinfo;
74
75 if (info->operation & IPT_ECN_OP_MATCH_IP)
76 if (!match_ip(skb, info))
77 return false;
78
79 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
80 if (!match_tcp(skb, info, &par->hotdrop))
81 return false;
82 }
83
84 return true;
85}
86
87static int ecn_mt_check(const struct xt_mtchk_param *par)
88{
89 const struct ipt_ecn_info *info = par->matchinfo;
90 const struct ipt_ip *ip = par->entryinfo;
91
92 if (info->operation & IPT_ECN_OP_MATCH_MASK)
93 return -EINVAL;
94
95 if (info->invert & IPT_ECN_OP_MATCH_MASK)
96 return -EINVAL;
97
98 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
99 (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
100 pr_info("cannot match TCP bits in rule for non-tcp packets\n");
101 return -EINVAL;
102 }
103
104 return 0;
105}
106
107static struct xt_match ecn_mt_reg __read_mostly = {
108 .name = "ecn",
109 .family = NFPROTO_IPV4,
110 .match = ecn_mt,
111 .matchsize = sizeof(struct ipt_ecn_info),
112 .checkentry = ecn_mt_check,
113 .me = THIS_MODULE,
114};
115
116static int __init ecn_mt_init(void)
117{
118 return xt_register_match(&ecn_mt_reg);
119}
120
121static void __exit ecn_mt_exit(void)
122{
123 xt_unregister_match(&ecn_mt_reg);
124}
125
126module_init(ecn_mt_init);
127module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 000000000000..31371be8174b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,141 @@
1/*
2 * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/netdevice.h>
14#include <linux/ip.h>
15#include <net/ip.h>
16#include <net/ip_fib.h>
17#include <net/route.h>
18
19#include <linux/netfilter/xt_rpfilter.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
24MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
25
26/* don't try to find route from mcast/bcast/zeronet */
27static __be32 rpfilter_get_saddr(__be32 addr)
28{
29 if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
30 ipv4_is_zeronet(addr))
31 return 0;
32 return addr;
33}
34
35static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
36 const struct net_device *dev, u8 flags)
37{
38 struct fib_result res;
39 bool dev_match;
40 struct net *net = dev_net(dev);
41 int ret __maybe_unused;
42
43 if (fib_lookup(net, fl4, &res))
44 return false;
45
46 if (res.type != RTN_UNICAST) {
47 if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
48 return false;
49 }
50 dev_match = false;
51#ifdef CONFIG_IP_ROUTE_MULTIPATH
52 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
53 struct fib_nh *nh = &res.fi->fib_nh[ret];
54
55 if (nh->nh_dev == dev) {
56 dev_match = true;
57 break;
58 }
59 }
60#else
61 if (FIB_RES_DEV(res) == dev)
62 dev_match = true;
63#endif
64 if (dev_match || flags & XT_RPFILTER_LOOSE)
65 return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
66 return dev_match;
67}
68
69static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
70{
71 const struct xt_rpfilter_info *info;
72 const struct iphdr *iph;
73 struct flowi4 flow;
74 bool invert;
75
76 info = par->matchinfo;
77 invert = info->flags & XT_RPFILTER_INVERT;
78
79 if (par->in->flags & IFF_LOOPBACK)
80 return true ^ invert;
81
82 iph = ip_hdr(skb);
83 if (ipv4_is_multicast(iph->daddr)) {
84 if (ipv4_is_zeronet(iph->saddr))
85 return ipv4_is_local_multicast(iph->daddr) ^ invert;
86 flow.flowi4_iif = 0;
87 } else {
88 flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
89 }
90
91 flow.daddr = iph->saddr;
92 flow.saddr = rpfilter_get_saddr(iph->daddr);
93 flow.flowi4_oif = 0;
94 flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
95 flow.flowi4_tos = RT_TOS(iph->tos);
96 flow.flowi4_scope = RT_SCOPE_UNIVERSE;
97
98 return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
99}
100
101static int rpfilter_check(const struct xt_mtchk_param *par)
102{
103 const struct xt_rpfilter_info *info = par->matchinfo;
104 unsigned int options = ~XT_RPFILTER_OPTION_MASK;
105 if (info->flags & options) {
106 pr_info("unknown options encountered");
107 return -EINVAL;
108 }
109
110 if (strcmp(par->table, "mangle") != 0 &&
111 strcmp(par->table, "raw") != 0) {
112 pr_info("match only valid in the \'raw\' "
113 "or \'mangle\' tables, not \'%s\'.\n", par->table);
114 return -EINVAL;
115 }
116
117 return 0;
118}
119
120static struct xt_match rpfilter_mt_reg __read_mostly = {
121 .name = "rpfilter",
122 .family = NFPROTO_IPV4,
123 .checkentry = rpfilter_check,
124 .match = rpfilter_mt,
125 .matchsize = sizeof(struct xt_rpfilter_info),
126 .hooks = (1 << NF_INET_PRE_ROUTING),
127 .me = THIS_MODULE
128};
129
130static int __init rpfilter_mt_init(void)
131{
132 return xt_register_match(&rpfilter_mt_reg);
133}
134
135static void __exit rpfilter_mt_exit(void)
136{
137 xt_unregister_match(&rpfilter_mt_reg);
138}
139
140module_init(rpfilter_mt_init);
141module_exit(rpfilter_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c37641e819f2..0e58f09e59fb 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
52static struct nf_hook_ops *filter_ops __read_mostly; 52static struct nf_hook_ops *filter_ops __read_mostly;
53 53
54/* Default to forward because I got too much mail already. */ 54/* Default to forward because I got too much mail already. */
55static int forward = NF_ACCEPT; 55static bool forward = NF_ACCEPT;
56module_param(forward, bool, 0000); 56module_param(forward, bool, 0000);
57 57
58static int __net_init iptable_filter_net_init(struct net *net) 58static int __net_init iptable_filter_net_init(struct net *net)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 447bc5cfdc6c..acdd002bb540 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,7 +30,6 @@
30#include <net/netfilter/nf_nat_helper.h> 30#include <net/netfilter/nf_nat_helper.h>
31#include <net/netfilter/nf_conntrack_helper.h> 31#include <net/netfilter/nf_conntrack_helper.h>
32#include <net/netfilter/nf_conntrack_l3proto.h> 32#include <net/netfilter/nf_conntrack_l3proto.h>
33#include <net/netfilter/nf_conntrack_l4proto.h>
34#include <net/netfilter/nf_conntrack_zones.h> 33#include <net/netfilter/nf_conntrack_zones.h>
35 34
36static DEFINE_SPINLOCK(nf_nat_lock); 35static DEFINE_SPINLOCK(nf_nat_lock);
@@ -57,7 +56,7 @@ hash_by_src(const struct net *net, u16 zone,
57 /* Original src, to ensure we map it consistently if poss. */ 56 /* Original src, to ensure we map it consistently if poss. */
58 hash = jhash_3words((__force u32)tuple->src.u3.ip, 57 hash = jhash_3words((__force u32)tuple->src.u3.ip,
59 (__force u32)tuple->src.u.all ^ zone, 58 (__force u32)tuple->src.u.all ^ zone,
60 tuple->dst.protonum, 0); 59 tuple->dst.protonum, nf_conntrack_hash_rnd);
61 return ((u64)hash * net->ipv4.nat_htable_size) >> 32; 60 return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
62} 61}
63 62
@@ -82,14 +81,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
82 * that meet the constraints of range. */ 81 * that meet the constraints of range. */
83static int 82static int
84in_range(const struct nf_conntrack_tuple *tuple, 83in_range(const struct nf_conntrack_tuple *tuple,
85 const struct nf_nat_range *range) 84 const struct nf_nat_ipv4_range *range)
86{ 85{
87 const struct nf_nat_protocol *proto; 86 const struct nf_nat_protocol *proto;
88 int ret = 0; 87 int ret = 0;
89 88
90 /* If we are supposed to map IPs, then we must be in the 89 /* If we are supposed to map IPs, then we must be in the
91 range specified, otherwise let this drag us onto a new src IP. */ 90 range specified, otherwise let this drag us onto a new src IP. */
92 if (range->flags & IP_NAT_RANGE_MAP_IPS) { 91 if (range->flags & NF_NAT_RANGE_MAP_IPS) {
93 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || 92 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
94 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) 93 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
95 return 0; 94 return 0;
@@ -97,8 +96,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
97 96
98 rcu_read_lock(); 97 rcu_read_lock();
99 proto = __nf_nat_proto_find(tuple->dst.protonum); 98 proto = __nf_nat_proto_find(tuple->dst.protonum);
100 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 99 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
101 proto->in_range(tuple, IP_NAT_MANIP_SRC, 100 proto->in_range(tuple, NF_NAT_MANIP_SRC,
102 &range->min, &range->max)) 101 &range->min, &range->max))
103 ret = 1; 102 ret = 1;
104 rcu_read_unlock(); 103 rcu_read_unlock();
@@ -123,7 +122,7 @@ static int
123find_appropriate_src(struct net *net, u16 zone, 122find_appropriate_src(struct net *net, u16 zone,
124 const struct nf_conntrack_tuple *tuple, 123 const struct nf_conntrack_tuple *tuple,
125 struct nf_conntrack_tuple *result, 124 struct nf_conntrack_tuple *result,
126 const struct nf_nat_range *range) 125 const struct nf_nat_ipv4_range *range)
127{ 126{
128 unsigned int h = hash_by_src(net, zone, tuple); 127 unsigned int h = hash_by_src(net, zone, tuple);
129 const struct nf_conn_nat *nat; 128 const struct nf_conn_nat *nat;
@@ -157,7 +156,7 @@ find_appropriate_src(struct net *net, u16 zone,
157*/ 156*/
158static void 157static void
159find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, 158find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
160 const struct nf_nat_range *range, 159 const struct nf_nat_ipv4_range *range,
161 const struct nf_conn *ct, 160 const struct nf_conn *ct,
162 enum nf_nat_manip_type maniptype) 161 enum nf_nat_manip_type maniptype)
163{ 162{
@@ -166,10 +165,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
166 u_int32_t minip, maxip, j; 165 u_int32_t minip, maxip, j;
167 166
168 /* No IP mapping? Do nothing. */ 167 /* No IP mapping? Do nothing. */
169 if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) 168 if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
170 return; 169 return;
171 170
172 if (maniptype == IP_NAT_MANIP_SRC) 171 if (maniptype == NF_NAT_MANIP_SRC)
173 var_ipp = &tuple->src.u3.ip; 172 var_ipp = &tuple->src.u3.ip;
174 else 173 else
175 var_ipp = &tuple->dst.u3.ip; 174 var_ipp = &tuple->dst.u3.ip;
@@ -189,7 +188,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
189 minip = ntohl(range->min_ip); 188 minip = ntohl(range->min_ip);
190 maxip = ntohl(range->max_ip); 189 maxip = ntohl(range->max_ip);
191 j = jhash_2words((__force u32)tuple->src.u3.ip, 190 j = jhash_2words((__force u32)tuple->src.u3.ip,
192 range->flags & IP_NAT_RANGE_PERSISTENT ? 191 range->flags & NF_NAT_RANGE_PERSISTENT ?
193 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); 192 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
194 j = ((u64)j * (maxip - minip + 1)) >> 32; 193 j = ((u64)j * (maxip - minip + 1)) >> 32;
195 *var_ipp = htonl(minip + j); 194 *var_ipp = htonl(minip + j);
@@ -204,7 +203,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
204static void 203static void
205get_unique_tuple(struct nf_conntrack_tuple *tuple, 204get_unique_tuple(struct nf_conntrack_tuple *tuple,
206 const struct nf_conntrack_tuple *orig_tuple, 205 const struct nf_conntrack_tuple *orig_tuple,
207 const struct nf_nat_range *range, 206 const struct nf_nat_ipv4_range *range,
208 struct nf_conn *ct, 207 struct nf_conn *ct,
209 enum nf_nat_manip_type maniptype) 208 enum nf_nat_manip_type maniptype)
210{ 209{
@@ -219,8 +218,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
219 This is only required for source (ie. NAT/masq) mappings. 218 This is only required for source (ie. NAT/masq) mappings.
220 So far, we don't do local source mappings, so multiple 219 So far, we don't do local source mappings, so multiple
221 manips not an issue. */ 220 manips not an issue. */
222 if (maniptype == IP_NAT_MANIP_SRC && 221 if (maniptype == NF_NAT_MANIP_SRC &&
223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 222 !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
224 /* try the original tuple first */ 223 /* try the original tuple first */
225 if (in_range(orig_tuple, range)) { 224 if (in_range(orig_tuple, range)) {
226 if (!nf_nat_used_tuple(orig_tuple, ct)) { 225 if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -247,8 +246,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
247 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 246 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
248 247
249 /* Only bother mapping if it's not already in range and unique */ 248 /* Only bother mapping if it's not already in range and unique */
250 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 249 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
251 if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) { 250 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
252 if (proto->in_range(tuple, maniptype, &range->min, 251 if (proto->in_range(tuple, maniptype, &range->min,
253 &range->max) && 252 &range->max) &&
254 (range->min.all == range->max.all || 253 (range->min.all == range->max.all ||
@@ -267,7 +266,7 @@ out:
267 266
268unsigned int 267unsigned int
269nf_nat_setup_info(struct nf_conn *ct, 268nf_nat_setup_info(struct nf_conn *ct,
270 const struct nf_nat_range *range, 269 const struct nf_nat_ipv4_range *range,
271 enum nf_nat_manip_type maniptype) 270 enum nf_nat_manip_type maniptype)
272{ 271{
273 struct net *net = nf_ct_net(ct); 272 struct net *net = nf_ct_net(ct);
@@ -284,8 +283,8 @@ nf_nat_setup_info(struct nf_conn *ct,
284 } 283 }
285 } 284 }
286 285
287 NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC || 286 NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
288 maniptype == IP_NAT_MANIP_DST); 287 maniptype == NF_NAT_MANIP_DST);
289 BUG_ON(nf_nat_initialized(ct, maniptype)); 288 BUG_ON(nf_nat_initialized(ct, maniptype));
290 289
291 /* What we've got will look like inverse of reply. Normally 290 /* What we've got will look like inverse of reply. Normally
@@ -306,13 +305,13 @@ nf_nat_setup_info(struct nf_conn *ct,
306 nf_conntrack_alter_reply(ct, &reply); 305 nf_conntrack_alter_reply(ct, &reply);
307 306
308 /* Non-atomic: we own this at the moment. */ 307 /* Non-atomic: we own this at the moment. */
309 if (maniptype == IP_NAT_MANIP_SRC) 308 if (maniptype == NF_NAT_MANIP_SRC)
310 ct->status |= IPS_SRC_NAT; 309 ct->status |= IPS_SRC_NAT;
311 else 310 else
312 ct->status |= IPS_DST_NAT; 311 ct->status |= IPS_DST_NAT;
313 } 312 }
314 313
315 if (maniptype == IP_NAT_MANIP_SRC) { 314 if (maniptype == NF_NAT_MANIP_SRC) {
316 unsigned int srchash; 315 unsigned int srchash;
317 316
318 srchash = hash_by_src(net, nf_ct_zone(ct), 317 srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -327,7 +326,7 @@ nf_nat_setup_info(struct nf_conn *ct,
327 } 326 }
328 327
329 /* It's done. */ 328 /* It's done. */
330 if (maniptype == IP_NAT_MANIP_DST) 329 if (maniptype == NF_NAT_MANIP_DST)
331 ct->status |= IPS_DST_NAT_DONE; 330 ct->status |= IPS_DST_NAT_DONE;
332 else 331 else
333 ct->status |= IPS_SRC_NAT_DONE; 332 ct->status |= IPS_SRC_NAT_DONE;
@@ -361,7 +360,7 @@ manip_pkt(u_int16_t proto,
361 360
362 iph = (void *)skb->data + iphdroff; 361 iph = (void *)skb->data + iphdroff;
363 362
364 if (maniptype == IP_NAT_MANIP_SRC) { 363 if (maniptype == NF_NAT_MANIP_SRC) {
365 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 364 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
366 iph->saddr = target->src.u3.ip; 365 iph->saddr = target->src.u3.ip;
367 } else { 366 } else {
@@ -381,7 +380,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
381 unsigned long statusbit; 380 unsigned long statusbit;
382 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); 381 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
383 382
384 if (mtype == IP_NAT_MANIP_SRC) 383 if (mtype == NF_NAT_MANIP_SRC)
385 statusbit = IPS_SRC_NAT; 384 statusbit = IPS_SRC_NAT;
386 else 385 else
387 statusbit = IPS_DST_NAT; 386 statusbit = IPS_DST_NAT;
@@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
414 struct icmphdr icmp; 413 struct icmphdr icmp;
415 struct iphdr ip; 414 struct iphdr ip;
416 } *inside; 415 } *inside;
417 const struct nf_conntrack_l4proto *l4proto; 416 struct nf_conntrack_tuple target;
418 struct nf_conntrack_tuple inner, target;
419 int hdrlen = ip_hdrlen(skb); 417 int hdrlen = ip_hdrlen(skb);
420 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 418 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
421 unsigned long statusbit; 419 unsigned long statusbit;
@@ -447,7 +445,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
447 return 0; 445 return 0;
448 } 446 }
449 447
450 if (manip == IP_NAT_MANIP_SRC) 448 if (manip == NF_NAT_MANIP_SRC)
451 statusbit = IPS_SRC_NAT; 449 statusbit = IPS_SRC_NAT;
452 else 450 else
453 statusbit = IPS_DST_NAT; 451 statusbit = IPS_DST_NAT;
@@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
463 "dir %s\n", skb, manip, 461 "dir %s\n", skb, manip,
464 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 462 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
465 463
466 /* rcu_read_lock()ed by nf_hook_slow */
467 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
468
469 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
470 (hdrlen +
471 sizeof(struct icmphdr) + inside->ip.ihl * 4),
472 (u_int16_t)AF_INET, inside->ip.protocol,
473 &inner, l3proto, l4proto))
474 return 0;
475
476 /* Change inner back to look like incoming packet. We do the 464 /* Change inner back to look like incoming packet. We do the
477 opposite manip on this hook to normal, because it might not 465 opposite manip on this hook to normal, because it might not
478 pass all hooks (locally-generated ICMP). Consider incoming 466 pass all hooks (locally-generated ICMP). Consider incoming
@@ -575,26 +563,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
575#include <linux/netfilter/nfnetlink.h> 563#include <linux/netfilter/nfnetlink.h>
576#include <linux/netfilter/nfnetlink_conntrack.h> 564#include <linux/netfilter/nfnetlink_conntrack.h>
577 565
578static const struct nf_nat_protocol *
579nf_nat_proto_find_get(u_int8_t protonum)
580{
581 const struct nf_nat_protocol *p;
582
583 rcu_read_lock();
584 p = __nf_nat_proto_find(protonum);
585 if (!try_module_get(p->me))
586 p = &nf_nat_unknown_protocol;
587 rcu_read_unlock();
588
589 return p;
590}
591
592static void
593nf_nat_proto_put(const struct nf_nat_protocol *p)
594{
595 module_put(p->me);
596}
597
598static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 566static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
599 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 567 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
600 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 568 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
@@ -602,7 +570,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
602 570
603static int nfnetlink_parse_nat_proto(struct nlattr *attr, 571static int nfnetlink_parse_nat_proto(struct nlattr *attr,
604 const struct nf_conn *ct, 572 const struct nf_conn *ct,
605 struct nf_nat_range *range) 573 struct nf_nat_ipv4_range *range)
606{ 574{
607 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 575 struct nlattr *tb[CTA_PROTONAT_MAX+1];
608 const struct nf_nat_protocol *npt; 576 const struct nf_nat_protocol *npt;
@@ -612,21 +580,23 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
612 if (err < 0) 580 if (err < 0)
613 return err; 581 return err;
614 582
615 npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); 583 rcu_read_lock();
584 npt = __nf_nat_proto_find(nf_ct_protonum(ct));
616 if (npt->nlattr_to_range) 585 if (npt->nlattr_to_range)
617 err = npt->nlattr_to_range(tb, range); 586 err = npt->nlattr_to_range(tb, range);
618 nf_nat_proto_put(npt); 587 rcu_read_unlock();
619 return err; 588 return err;
620} 589}
621 590
622static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { 591static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
623 [CTA_NAT_MINIP] = { .type = NLA_U32 }, 592 [CTA_NAT_MINIP] = { .type = NLA_U32 },
624 [CTA_NAT_MAXIP] = { .type = NLA_U32 }, 593 [CTA_NAT_MAXIP] = { .type = NLA_U32 },
594 [CTA_NAT_PROTO] = { .type = NLA_NESTED },
625}; 595};
626 596
627static int 597static int
628nfnetlink_parse_nat(const struct nlattr *nat, 598nfnetlink_parse_nat(const struct nlattr *nat,
629 const struct nf_conn *ct, struct nf_nat_range *range) 599 const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
630{ 600{
631 struct nlattr *tb[CTA_NAT_MAX+1]; 601 struct nlattr *tb[CTA_NAT_MAX+1];
632 int err; 602 int err;
@@ -646,7 +616,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
646 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); 616 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
647 617
648 if (range->min_ip) 618 if (range->min_ip)
649 range->flags |= IP_NAT_RANGE_MAP_IPS; 619 range->flags |= NF_NAT_RANGE_MAP_IPS;
650 620
651 if (!tb[CTA_NAT_PROTO]) 621 if (!tb[CTA_NAT_PROTO])
652 return 0; 622 return 0;
@@ -663,7 +633,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
663 enum nf_nat_manip_type manip, 633 enum nf_nat_manip_type manip,
664 const struct nlattr *attr) 634 const struct nlattr *attr)
665{ 635{
666 struct nf_nat_range range; 636 struct nf_nat_ipv4_range range;
667 637
668 if (nfnetlink_parse_nat(attr, ct, &range) < 0) 638 if (nfnetlink_parse_nat(attr, ct, &range) < 0)
669 return -EINVAL; 639 return -EINVAL;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index b9a1136addbd..dc1dd912baf4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
398static void ip_nat_q931_expect(struct nf_conn *new, 398static void ip_nat_q931_expect(struct nf_conn *new,
399 struct nf_conntrack_expect *this) 399 struct nf_conntrack_expect *this)
400{ 400{
401 struct nf_nat_range range; 401 struct nf_nat_ipv4_range range;
402 402
403 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ 403 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
404 nf_nat_follow_master(new, this); 404 nf_nat_follow_master(new, this);
@@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new,
409 BUG_ON(new->status & IPS_NAT_DONE_MASK); 409 BUG_ON(new->status & IPS_NAT_DONE_MASK);
410 410
411 /* Change src to where master sends to */ 411 /* Change src to where master sends to */
412 range.flags = IP_NAT_RANGE_MAP_IPS; 412 range.flags = NF_NAT_RANGE_MAP_IPS;
413 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 413 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
414 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); 414 nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
415 415
416 /* For DST manip, map port here to where it's expected. */ 416 /* For DST manip, map port here to where it's expected. */
417 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 417 range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
418 range.min = range.max = this->saved_proto; 418 range.min = range.max = this->saved_proto;
419 range.min_ip = range.max_ip = 419 range.min_ip = range.max_ip =
420 new->master->tuplehash[!this->dir].tuple.src.u3.ip; 420 new->master->tuplehash[!this->dir].tuple.src.u3.ip;
421 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); 421 nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
422} 422}
423 423
424/****************************************************************************/ 424/****************************************************************************/
@@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
496static void ip_nat_callforwarding_expect(struct nf_conn *new, 496static void ip_nat_callforwarding_expect(struct nf_conn *new,
497 struct nf_conntrack_expect *this) 497 struct nf_conntrack_expect *this)
498{ 498{
499 struct nf_nat_range range; 499 struct nf_nat_ipv4_range range;
500 500
501 /* This must be a fresh one. */ 501 /* This must be a fresh one. */
502 BUG_ON(new->status & IPS_NAT_DONE_MASK); 502 BUG_ON(new->status & IPS_NAT_DONE_MASK);
503 503
504 /* Change src to where master sends to */ 504 /* Change src to where master sends to */
505 range.flags = IP_NAT_RANGE_MAP_IPS; 505 range.flags = NF_NAT_RANGE_MAP_IPS;
506 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 506 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
507 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); 507 nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
508 508
509 /* For DST manip, map port here to where it's expected. */ 509 /* For DST manip, map port here to where it's expected. */
510 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 510 range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
511 range.min = range.max = this->saved_proto; 511 range.min = range.max = this->saved_proto;
512 range.min_ip = range.max_ip = this->saved_ip; 512 range.min_ip = range.max_ip = this->saved_ip;
513 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); 513 nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
514} 514}
515 515
516/****************************************************************************/ 516/****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ebc5f8894f99..af65958f6308 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -253,12 +253,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
253 struct udphdr *udph; 253 struct udphdr *udph;
254 int datalen, oldlen; 254 int datalen, oldlen;
255 255
256 /* UDP helpers might accidentally mangle the wrong packet */
257 iph = ip_hdr(skb);
258 if (skb->len < iph->ihl*4 + sizeof(*udph) +
259 match_offset + match_len)
260 return 0;
261
262 if (!skb_make_writable(skb, skb->len)) 256 if (!skb_make_writable(skb, skb->len))
263 return 0; 257 return 0;
264 258
@@ -430,22 +424,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
430void nf_nat_follow_master(struct nf_conn *ct, 424void nf_nat_follow_master(struct nf_conn *ct,
431 struct nf_conntrack_expect *exp) 425 struct nf_conntrack_expect *exp)
432{ 426{
433 struct nf_nat_range range; 427 struct nf_nat_ipv4_range range;
434 428
435 /* This must be a fresh one. */ 429 /* This must be a fresh one. */
436 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 430 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
437 431
438 /* Change src to where master sends to */ 432 /* Change src to where master sends to */
439 range.flags = IP_NAT_RANGE_MAP_IPS; 433 range.flags = NF_NAT_RANGE_MAP_IPS;
440 range.min_ip = range.max_ip 434 range.min_ip = range.max_ip
441 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 435 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
442 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); 436 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
443 437
444 /* For DST manip, map port here to where it's expected. */ 438 /* For DST manip, map port here to where it's expected. */
445 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 439 range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
446 range.min = range.max = exp->saved_proto; 440 range.min = range.max = exp->saved_proto;
447 range.min_ip = range.max_ip 441 range.min_ip = range.max_ip
448 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; 442 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
449 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 443 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
450} 444}
451EXPORT_SYMBOL(nf_nat_follow_master); 445EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3e8284ba46b8..c273d58980ae 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
47 struct nf_conntrack_tuple t; 47 struct nf_conntrack_tuple t;
48 const struct nf_ct_pptp_master *ct_pptp_info; 48 const struct nf_ct_pptp_master *ct_pptp_info;
49 const struct nf_nat_pptp *nat_pptp_info; 49 const struct nf_nat_pptp *nat_pptp_info;
50 struct nf_nat_range range; 50 struct nf_nat_ipv4_range range;
51 51
52 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; 52 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
53 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; 53 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
88 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 88 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
89 89
90 /* Change src to where master sends to */ 90 /* Change src to where master sends to */
91 range.flags = IP_NAT_RANGE_MAP_IPS; 91 range.flags = NF_NAT_RANGE_MAP_IPS;
92 range.min_ip = range.max_ip 92 range.min_ip = range.max_ip
93 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 93 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
94 if (exp->dir == IP_CT_DIR_ORIGINAL) { 94 if (exp->dir == IP_CT_DIR_ORIGINAL) {
95 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 95 range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
96 range.min = range.max = exp->saved_proto; 96 range.min = range.max = exp->saved_proto;
97 } 97 }
98 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); 98 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
99 99
100 /* For DST manip, map port here to where it's expected. */ 100 /* For DST manip, map port here to where it's expected. */
101 range.flags = IP_NAT_RANGE_MAP_IPS; 101 range.flags = NF_NAT_RANGE_MAP_IPS;
102 range.min_ip = range.max_ip 102 range.min_ip = range.max_ip
103 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; 103 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
104 if (exp->dir == IP_CT_DIR_REPLY) { 104 if (exp->dir == IP_CT_DIR_REPLY) {
105 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 105 range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
106 range.min = range.max = exp->saved_proto; 106 range.min = range.max = exp->saved_proto;
107 } 107 }
108 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 108 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
109} 109}
110 110
111/* outbound packets == from PNS to PAC */ 111/* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index a3d997618602..9993bc93e102 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
26{ 26{
27 __be16 port; 27 __be16 port;
28 28
29 if (maniptype == IP_NAT_MANIP_SRC) 29 if (maniptype == NF_NAT_MANIP_SRC)
30 port = tuple->src.u.all; 30 port = tuple->src.u.all;
31 else 31 else
32 port = tuple->dst.u.all; 32 port = tuple->dst.u.all;
@@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
37EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); 37EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
38 38
39void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 39void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
40 const struct nf_nat_range *range, 40 const struct nf_nat_ipv4_range *range,
41 enum nf_nat_manip_type maniptype, 41 enum nf_nat_manip_type maniptype,
42 const struct nf_conn *ct, 42 const struct nf_conn *ct,
43 u_int16_t *rover) 43 u_int16_t *rover)
@@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
46 __be16 *portptr; 46 __be16 *portptr;
47 u_int16_t off; 47 u_int16_t off;
48 48
49 if (maniptype == IP_NAT_MANIP_SRC) 49 if (maniptype == NF_NAT_MANIP_SRC)
50 portptr = &tuple->src.u.all; 50 portptr = &tuple->src.u.all;
51 else 51 else
52 portptr = &tuple->dst.u.all; 52 portptr = &tuple->dst.u.all;
53 53
54 /* If no range specified... */ 54 /* If no range specified... */
55 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 55 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
56 /* If it's dst rewrite, can't change port */ 56 /* If it's dst rewrite, can't change port */
57 if (maniptype == IP_NAT_MANIP_DST) 57 if (maniptype == NF_NAT_MANIP_DST)
58 return; 58 return;
59 59
60 if (ntohs(*portptr) < 1024) { 60 if (ntohs(*portptr) < 1024) {
@@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
75 range_size = ntohs(range->max.all) - min + 1; 75 range_size = ntohs(range->max.all) - min + 1;
76 } 76 }
77 77
78 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) 78 if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
79 off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, 79 off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
80 maniptype == IP_NAT_MANIP_SRC 80 maniptype == NF_NAT_MANIP_SRC
81 ? tuple->dst.u.all 81 ? tuple->dst.u.all
82 : tuple->src.u.all); 82 : tuple->src.u.all);
83 else 83 else
@@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
87 *portptr = htons(min + off % range_size); 87 *portptr = htons(min + off % range_size);
88 if (++i != range_size && nf_nat_used_tuple(tuple, ct)) 88 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
89 continue; 89 continue;
90 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 90 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
91 *rover = off; 91 *rover = off;
92 return; 92 return;
93 } 93 }
@@ -96,31 +96,19 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
96EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); 96EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
97 97
98#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 98#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
99int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
100 const struct nf_nat_range *range)
101{
102 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
103 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
104 return 0;
105
106nla_put_failure:
107 return -1;
108}
109EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
110
111int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], 99int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
112 struct nf_nat_range *range) 100 struct nf_nat_ipv4_range *range)
113{ 101{
114 if (tb[CTA_PROTONAT_PORT_MIN]) { 102 if (tb[CTA_PROTONAT_PORT_MIN]) {
115 range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); 103 range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
116 range->max.all = range->min.tcp.port; 104 range->max.all = range->min.tcp.port;
117 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 105 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
118 } 106 }
119 if (tb[CTA_PROTONAT_PORT_MAX]) { 107 if (tb[CTA_PROTONAT_PORT_MAX]) {
120 range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); 108 range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
121 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 109 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
122 } 110 }
123 return 0; 111 return 0;
124} 112}
125EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr); 113EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
126#endif 114#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 570faf2667b2..3f67138d187c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover;
24 24
25static void 25static void
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple, 26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range, 27 const struct nf_nat_ipv4_range *range,
28 enum nf_nat_manip_type maniptype, 28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct) 29 const struct nf_conn *ct)
30{ 30{
@@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb,
54 iph = (struct iphdr *)(skb->data + iphdroff); 54 iph = (struct iphdr *)(skb->data + iphdroff);
55 hdr = (struct dccp_hdr *)(skb->data + hdroff); 55 hdr = (struct dccp_hdr *)(skb->data + hdroff);
56 56
57 if (maniptype == IP_NAT_MANIP_SRC) { 57 if (maniptype == NF_NAT_MANIP_SRC) {
58 oldip = iph->saddr; 58 oldip = iph->saddr;
59 newip = tuple->src.u3.ip; 59 newip = tuple->src.u3.ip;
60 newport = tuple->src.u.dccp.port; 60 newport = tuple->src.u.dccp.port;
@@ -80,12 +80,10 @@ dccp_manip_pkt(struct sk_buff *skb,
80 80
81static const struct nf_nat_protocol nf_nat_protocol_dccp = { 81static const struct nf_nat_protocol nf_nat_protocol_dccp = {
82 .protonum = IPPROTO_DCCP, 82 .protonum = IPPROTO_DCCP,
83 .me = THIS_MODULE,
84 .manip_pkt = dccp_manip_pkt, 83 .manip_pkt = dccp_manip_pkt,
85 .in_range = nf_nat_proto_in_range, 84 .in_range = nf_nat_proto_in_range,
86 .unique_tuple = dccp_unique_tuple, 85 .unique_tuple = dccp_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 86#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
88 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
89 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 87 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
90#endif 88#endif
91}; 89};
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index bc8d83a31c73..46ba0b9ab985 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
39/* generate unique tuple ... */ 39/* generate unique tuple ... */
40static void 40static void
41gre_unique_tuple(struct nf_conntrack_tuple *tuple, 41gre_unique_tuple(struct nf_conntrack_tuple *tuple,
42 const struct nf_nat_range *range, 42 const struct nf_nat_ipv4_range *range,
43 enum nf_nat_manip_type maniptype, 43 enum nf_nat_manip_type maniptype,
44 const struct nf_conn *ct) 44 const struct nf_conn *ct)
45{ 45{
@@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
52 if (!ct->master) 52 if (!ct->master)
53 return; 53 return;
54 54
55 if (maniptype == IP_NAT_MANIP_SRC) 55 if (maniptype == NF_NAT_MANIP_SRC)
56 keyptr = &tuple->src.u.gre.key; 56 keyptr = &tuple->src.u.gre.key;
57 else 57 else
58 keyptr = &tuple->dst.u.gre.key; 58 keyptr = &tuple->dst.u.gre.key;
59 59
60 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 60 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
61 pr_debug("%p: NATing GRE PPTP\n", ct); 61 pr_debug("%p: NATing GRE PPTP\n", ct);
62 min = 1; 62 min = 1;
63 range_size = 0xffff; 63 range_size = 0xffff;
@@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
99 99
100 /* we only have destination manip of a packet, since 'source key' 100 /* we only have destination manip of a packet, since 'source key'
101 * is not present in the packet itself */ 101 * is not present in the packet itself */
102 if (maniptype != IP_NAT_MANIP_DST) 102 if (maniptype != NF_NAT_MANIP_DST)
103 return true; 103 return true;
104 switch (greh->version) { 104 switch (greh->version) {
105 case GRE_VERSION_1701: 105 case GRE_VERSION_1701:
@@ -119,12 +119,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
119 119
120static const struct nf_nat_protocol gre = { 120static const struct nf_nat_protocol gre = {
121 .protonum = IPPROTO_GRE, 121 .protonum = IPPROTO_GRE,
122 .me = THIS_MODULE,
123 .manip_pkt = gre_manip_pkt, 122 .manip_pkt = gre_manip_pkt,
124 .in_range = nf_nat_proto_in_range, 123 .in_range = nf_nat_proto_in_range,
125 .unique_tuple = gre_unique_tuple, 124 .unique_tuple = gre_unique_tuple,
126#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 125#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
127 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
128 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 126 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
129#endif 127#endif
130}; 128};
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 9f4dc1235dc7..b35172851bae 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
30 30
31static void 31static void
32icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 32icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
33 const struct nf_nat_range *range, 33 const struct nf_nat_ipv4_range *range,
34 enum nf_nat_manip_type maniptype, 34 enum nf_nat_manip_type maniptype,
35 const struct nf_conn *ct) 35 const struct nf_conn *ct)
36{ 36{
@@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
40 40
41 range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; 41 range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
42 /* If no range specified... */ 42 /* If no range specified... */
43 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) 43 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
44 range_size = 0xFFFF; 44 range_size = 0xFFFF;
45 45
46 for (i = 0; ; ++id) { 46 for (i = 0; ; ++id) {
@@ -74,12 +74,10 @@ icmp_manip_pkt(struct sk_buff *skb,
74 74
75const struct nf_nat_protocol nf_nat_protocol_icmp = { 75const struct nf_nat_protocol nf_nat_protocol_icmp = {
76 .protonum = IPPROTO_ICMP, 76 .protonum = IPPROTO_ICMP,
77 .me = THIS_MODULE,
78 .manip_pkt = icmp_manip_pkt, 77 .manip_pkt = icmp_manip_pkt,
79 .in_range = icmp_in_range, 78 .in_range = icmp_in_range,
80 .unique_tuple = icmp_unique_tuple, 79 .unique_tuple = icmp_unique_tuple,
81#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 80#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
82 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
83 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 81 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
84#endif 82#endif
85}; 83};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index bd5a80a62a5b..3cce9b6c1c29 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover;
19 19
20static void 20static void
21sctp_unique_tuple(struct nf_conntrack_tuple *tuple, 21sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
22 const struct nf_nat_range *range, 22 const struct nf_nat_ipv4_range *range,
23 enum nf_nat_manip_type maniptype, 23 enum nf_nat_manip_type maniptype,
24 const struct nf_conn *ct) 24 const struct nf_conn *ct)
25{ 25{
@@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb,
46 iph = (struct iphdr *)(skb->data + iphdroff); 46 iph = (struct iphdr *)(skb->data + iphdroff);
47 hdr = (struct sctphdr *)(skb->data + hdroff); 47 hdr = (struct sctphdr *)(skb->data + hdroff);
48 48
49 if (maniptype == IP_NAT_MANIP_SRC) { 49 if (maniptype == NF_NAT_MANIP_SRC) {
50 /* Get rid of src ip and src pt */ 50 /* Get rid of src ip and src pt */
51 oldip = iph->saddr; 51 oldip = iph->saddr;
52 newip = tuple->src.u3.ip; 52 newip = tuple->src.u3.ip;
@@ -70,12 +70,10 @@ sctp_manip_pkt(struct sk_buff *skb,
70 70
71static const struct nf_nat_protocol nf_nat_protocol_sctp = { 71static const struct nf_nat_protocol nf_nat_protocol_sctp = {
72 .protonum = IPPROTO_SCTP, 72 .protonum = IPPROTO_SCTP,
73 .me = THIS_MODULE,
74 .manip_pkt = sctp_manip_pkt, 73 .manip_pkt = sctp_manip_pkt,
75 .in_range = nf_nat_proto_in_range, 74 .in_range = nf_nat_proto_in_range,
76 .unique_tuple = sctp_unique_tuple, 75 .unique_tuple = sctp_unique_tuple,
77#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 76#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
78 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
79 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 77 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
80#endif 78#endif
81}; 79};
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 0d67bb80130f..9fb4b4e72bbf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover;
23 23
24static void 24static void
25tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 25tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
26 const struct nf_nat_range *range, 26 const struct nf_nat_ipv4_range *range,
27 enum nf_nat_manip_type maniptype, 27 enum nf_nat_manip_type maniptype,
28 const struct nf_conn *ct) 28 const struct nf_conn *ct)
29{ 29{
@@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb,
55 iph = (struct iphdr *)(skb->data + iphdroff); 55 iph = (struct iphdr *)(skb->data + iphdroff);
56 hdr = (struct tcphdr *)(skb->data + hdroff); 56 hdr = (struct tcphdr *)(skb->data + hdroff);
57 57
58 if (maniptype == IP_NAT_MANIP_SRC) { 58 if (maniptype == NF_NAT_MANIP_SRC) {
59 /* Get rid of src ip and src pt */ 59 /* Get rid of src ip and src pt */
60 oldip = iph->saddr; 60 oldip = iph->saddr;
61 newip = tuple->src.u3.ip; 61 newip = tuple->src.u3.ip;
@@ -82,12 +82,10 @@ tcp_manip_pkt(struct sk_buff *skb,
82 82
83const struct nf_nat_protocol nf_nat_protocol_tcp = { 83const struct nf_nat_protocol nf_nat_protocol_tcp = {
84 .protonum = IPPROTO_TCP, 84 .protonum = IPPROTO_TCP,
85 .me = THIS_MODULE,
86 .manip_pkt = tcp_manip_pkt, 85 .manip_pkt = tcp_manip_pkt,
87 .in_range = nf_nat_proto_in_range, 86 .in_range = nf_nat_proto_in_range,
88 .unique_tuple = tcp_unique_tuple, 87 .unique_tuple = tcp_unique_tuple,
89#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 88#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
90 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
91 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 89 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
92#endif 90#endif
93}; 91};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 0b1b8601cba7..9883336e628f 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u_int16_t udp_port_rover;
22 22
23static void 23static void
24udp_unique_tuple(struct nf_conntrack_tuple *tuple, 24udp_unique_tuple(struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range, 25 const struct nf_nat_ipv4_range *range,
26 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct) 27 const struct nf_conn *ct)
28{ 28{
@@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb,
47 iph = (struct iphdr *)(skb->data + iphdroff); 47 iph = (struct iphdr *)(skb->data + iphdroff);
48 hdr = (struct udphdr *)(skb->data + hdroff); 48 hdr = (struct udphdr *)(skb->data + hdroff);
49 49
50 if (maniptype == IP_NAT_MANIP_SRC) { 50 if (maniptype == NF_NAT_MANIP_SRC) {
51 /* Get rid of src ip and src pt */ 51 /* Get rid of src ip and src pt */
52 oldip = iph->saddr; 52 oldip = iph->saddr;
53 newip = tuple->src.u3.ip; 53 newip = tuple->src.u3.ip;
@@ -73,12 +73,10 @@ udp_manip_pkt(struct sk_buff *skb,
73 73
74const struct nf_nat_protocol nf_nat_protocol_udp = { 74const struct nf_nat_protocol nf_nat_protocol_udp = {
75 .protonum = IPPROTO_UDP, 75 .protonum = IPPROTO_UDP,
76 .me = THIS_MODULE,
77 .manip_pkt = udp_manip_pkt, 76 .manip_pkt = udp_manip_pkt,
78 .in_range = nf_nat_proto_in_range, 77 .in_range = nf_nat_proto_in_range,
79 .unique_tuple = udp_unique_tuple, 78 .unique_tuple = udp_unique_tuple,
80#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 79#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
81 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
82 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 80 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
83#endif 81#endif
84}; 82};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index f83ef23e2ab7..d24d10a7beb2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover;
21 21
22static void 22static void
23udplite_unique_tuple(struct nf_conntrack_tuple *tuple, 23udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
24 const struct nf_nat_range *range, 24 const struct nf_nat_ipv4_range *range,
25 enum nf_nat_manip_type maniptype, 25 enum nf_nat_manip_type maniptype,
26 const struct nf_conn *ct) 26 const struct nf_conn *ct)
27{ 27{
@@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb,
47 iph = (struct iphdr *)(skb->data + iphdroff); 47 iph = (struct iphdr *)(skb->data + iphdroff);
48 hdr = (struct udphdr *)(skb->data + hdroff); 48 hdr = (struct udphdr *)(skb->data + hdroff);
49 49
50 if (maniptype == IP_NAT_MANIP_SRC) { 50 if (maniptype == NF_NAT_MANIP_SRC) {
51 /* Get rid of src ip and src pt */ 51 /* Get rid of src ip and src pt */
52 oldip = iph->saddr; 52 oldip = iph->saddr;
53 newip = tuple->src.u3.ip; 53 newip = tuple->src.u3.ip;
@@ -72,12 +72,10 @@ udplite_manip_pkt(struct sk_buff *skb,
72 72
73static const struct nf_nat_protocol nf_nat_protocol_udplite = { 73static const struct nf_nat_protocol nf_nat_protocol_udplite = {
74 .protonum = IPPROTO_UDPLITE, 74 .protonum = IPPROTO_UDPLITE,
75 .me = THIS_MODULE,
76 .manip_pkt = udplite_manip_pkt, 75 .manip_pkt = udplite_manip_pkt,
77 .in_range = nf_nat_proto_in_range, 76 .in_range = nf_nat_proto_in_range,
78 .unique_tuple = udplite_unique_tuple, 77 .unique_tuple = udplite_unique_tuple,
79#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 78#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
80 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
81 .nlattr_to_range = nf_nat_proto_nlattr_to_range, 79 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
82#endif 80#endif
83}; 81};
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a50f2bc1c732..e0afe8112b1c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
27} 27}
28 28
29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_ipv4_range *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
@@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb,
46} 46}
47 47
48const struct nf_nat_protocol nf_nat_unknown_protocol = { 48const struct nf_nat_protocol nf_nat_unknown_protocol = {
49 /* .me isn't set: getting a ref to this cannot fail. */
50 .manip_pkt = unknown_manip_pkt, 49 .manip_pkt = unknown_manip_pkt,
51 .in_range = unknown_in_range, 50 .in_range = unknown_in_range,
52 .unique_tuple = unknown_unique_tuple, 51 .unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 733c9abc1cbd..d2a9dc314e0e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
44{ 44{
45 struct nf_conn *ct; 45 struct nf_conn *ct;
46 enum ip_conntrack_info ctinfo; 46 enum ip_conntrack_info ctinfo;
47 const struct nf_nat_multi_range_compat *mr = par->targinfo; 47 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
48 48
49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || 49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
50 par->hooknum == NF_INET_LOCAL_IN); 50 par->hooknum == NF_INET_LOCAL_IN);
@@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
56 ctinfo == IP_CT_RELATED_REPLY)); 56 ctinfo == IP_CT_RELATED_REPLY));
57 NF_CT_ASSERT(par->out != NULL); 57 NF_CT_ASSERT(par->out != NULL);
58 58
59 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); 59 return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
60} 60}
61 61
62static unsigned int 62static unsigned int
@@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
64{ 64{
65 struct nf_conn *ct; 65 struct nf_conn *ct;
66 enum ip_conntrack_info ctinfo; 66 enum ip_conntrack_info ctinfo;
67 const struct nf_nat_multi_range_compat *mr = par->targinfo; 67 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
68 68
69 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 69 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
70 par->hooknum == NF_INET_LOCAL_OUT); 70 par->hooknum == NF_INET_LOCAL_OUT);
@@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
74 /* Connection must be valid and new. */ 74 /* Connection must be valid and new. */
75 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 75 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
76 76
77 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); 77 return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
78} 78}
79 79
80static int ipt_snat_checkentry(const struct xt_tgchk_param *par) 80static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
81{ 81{
82 const struct nf_nat_multi_range_compat *mr = par->targinfo; 82 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
83 83
84 /* Must be a valid range */ 84 /* Must be a valid range */
85 if (mr->rangesize != 1) { 85 if (mr->rangesize != 1) {
@@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
91 91
92static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) 92static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
93{ 93{
94 const struct nf_nat_multi_range_compat *mr = par->targinfo; 94 const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
95 95
96 /* Must be a valid range */ 96 /* Must be a valid range */
97 if (mr->rangesize != 1) { 97 if (mr->rangesize != 1) {
@@ -105,13 +105,13 @@ static unsigned int
105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
106{ 106{
107 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 108 per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
109 */ 109 */
110 struct nf_nat_range range; 110 struct nf_nat_ipv4_range range;
111 111
112 range.flags = 0; 112 range.flags = 0;
113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, 113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
114 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? 114 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : 115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
117 117
@@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
140static struct xt_target ipt_snat_reg __read_mostly = { 140static struct xt_target ipt_snat_reg __read_mostly = {
141 .name = "SNAT", 141 .name = "SNAT",
142 .target = ipt_snat_target, 142 .target = ipt_snat_target,
143 .targetsize = sizeof(struct nf_nat_multi_range_compat), 143 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
144 .table = "nat", 144 .table = "nat",
145 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), 145 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
146 .checkentry = ipt_snat_checkentry, 146 .checkentry = ipt_snat_checkentry,
@@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
150static struct xt_target ipt_dnat_reg __read_mostly = { 150static struct xt_target ipt_dnat_reg __read_mostly = {
151 .name = "DNAT", 151 .name = "DNAT",
152 .target = ipt_dnat_target, 152 .target = ipt_dnat_target,
153 .targetsize = sizeof(struct nf_nat_multi_range_compat), 153 .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
154 .table = "nat", 154 .table = "nat",
155 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), 155 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
156 .checkentry = ipt_dnat_checkentry, 156 .checkentry = ipt_dnat_checkentry,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 78844d9208f1..d0319f96269f 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
249static void ip_nat_sip_expected(struct nf_conn *ct, 249static void ip_nat_sip_expected(struct nf_conn *ct,
250 struct nf_conntrack_expect *exp) 250 struct nf_conntrack_expect *exp)
251{ 251{
252 struct nf_nat_range range; 252 struct nf_nat_ipv4_range range;
253 253
254 /* This must be a fresh one. */ 254 /* This must be a fresh one. */
255 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 255 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
256 256
257 /* For DST manip, map port here to where it's expected. */ 257 /* For DST manip, map port here to where it's expected. */
258 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 258 range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
259 range.min = range.max = exp->saved_proto; 259 range.min = range.max = exp->saved_proto;
260 range.min_ip = range.max_ip = exp->saved_ip; 260 range.min_ip = range.max_ip = exp->saved_ip;
261 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 261 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
262 262
263 /* Change src to where master sends to, but only if the connection 263 /* Change src to where master sends to, but only if the connection
264 * actually came from the same source. */ 264 * actually came from the same source. */
265 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 265 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
266 ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { 266 ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
267 range.flags = IP_NAT_RANGE_MAP_IPS; 267 range.flags = NF_NAT_RANGE_MAP_IPS;
268 range.min_ip = range.max_ip 268 range.min_ip = range.max_ip
269 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 269 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
270 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); 270 nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
271 } 271 }
272} 272}
273 273
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 92900482edea..3828a4229822 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
137 return ret; 137 return ret;
138 } else 138 } else
139 pr_debug("Already setup manip %s for ct %p\n", 139 pr_debug("Already setup manip %s for ct %p\n",
140 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", 140 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
141 ct); 141 ct);
142 break; 142 break;
143 143
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 466ea8bb7a4d..3569d8ecaeac 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
56 56
57 local_bh_disable(); 57 local_bh_disable();
58 orphans = percpu_counter_sum_positive(&tcp_orphan_count); 58 orphans = percpu_counter_sum_positive(&tcp_orphan_count);
59 sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); 59 sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
60 local_bh_enable(); 60 local_bh_enable();
61 61
62 socket_seq_show(seq); 62 socket_seq_show(seq);
63 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", 63 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
64 sock_prot_inuse_get(net, &tcp_prot), orphans, 64 sock_prot_inuse_get(net, &tcp_prot), orphans,
65 tcp_death_row.tw_count, sockets, 65 tcp_death_row.tw_count, sockets,
66 atomic_long_read(&tcp_memory_allocated)); 66 proto_memory_allocated(&tcp_prot));
67 seq_printf(seq, "UDP: inuse %d mem %ld\n", 67 seq_printf(seq, "UDP: inuse %d mem %ld\n",
68 sock_prot_inuse_get(net, &udp_prot), 68 sock_prot_inuse_get(net, &udp_prot),
69 atomic_long_read(&udp_memory_allocated)); 69 proto_memory_allocated(&udp_prot));
70 seq_printf(seq, "UDPLITE: inuse %d\n", 70 seq_printf(seq, "UDPLITE: inuse %d\n",
71 sock_prot_inuse_get(net, &udplite_prot)); 71 sock_prot_inuse_get(net, &udplite_prot));
72 seq_printf(seq, "RAW: inuse %d\n", 72 seq_printf(seq, "RAW: inuse %d\n",
@@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq)
288 288
289 count = 0; 289 count = 0;
290 for (i = 0; i < ICMPMSG_MIB_MAX; i++) { 290 for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
291 val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); 291 val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]);
292 if (val) { 292 if (val) {
293 type[count] = i; 293 type[count] = i;
294 vals[count++] = val; 294 vals[count++] = val;
@@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq)
307{ 307{
308 int i; 308 int i;
309 struct net *net = seq->private; 309 struct net *net = seq->private;
310 atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
310 311
311 seq_puts(seq, "\nIcmp: InMsgs InErrors"); 312 seq_puts(seq, "\nIcmp: InMsgs InErrors");
312 for (i=0; icmpmibmap[i].name != NULL; i++) 313 for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq)
319 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); 320 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
320 for (i=0; icmpmibmap[i].name != NULL; i++) 321 for (i=0; icmpmibmap[i].name != NULL; i++)
321 seq_printf(seq, " %lu", 322 seq_printf(seq, " %lu",
322 snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, 323 atomic_long_read(ptr + icmpmibmap[i].index));
323 icmpmibmap[i].index));
324 seq_printf(seq, " %lu %lu", 324 seq_printf(seq, " %lu %lu",
325 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), 325 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
326 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); 326 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
327 for (i=0; icmpmibmap[i].name != NULL; i++) 327 for (i=0; icmpmibmap[i].name != NULL; i++)
328 seq_printf(seq, " %lu", 328 seq_printf(seq, " %lu",
329 snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, 329 atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
330 icmpmibmap[i].index | 0x100));
331} 330}
332 331
333/* 332/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 007e2eb769d3..3ccda5ae8a27 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
292{ 292{
293 /* Charge it to the socket. */ 293 /* Charge it to the socket. */
294 294
295 if (ip_queue_rcv_skb(sk, skb) < 0) { 295 ipv4_pktinfo_prepare(skb);
296 if (sock_queue_rcv_skb(sk, skb) < 0) {
296 kfree_skb(skb); 297 kfree_skb(skb);
297 return NET_RX_DROP; 298 return NET_RX_DROP;
298 } 299 }
@@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
327 unsigned int iphlen; 328 unsigned int iphlen;
328 int err; 329 int err;
329 struct rtable *rt = *rtp; 330 struct rtable *rt = *rtp;
331 int hlen, tlen;
330 332
331 if (length > rt->dst.dev->mtu) { 333 if (length > rt->dst.dev->mtu) {
332 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, 334 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
336 if (flags&MSG_PROBE) 338 if (flags&MSG_PROBE)
337 goto out; 339 goto out;
338 340
341 hlen = LL_RESERVED_SPACE(rt->dst.dev);
342 tlen = rt->dst.dev->needed_tailroom;
339 skb = sock_alloc_send_skb(sk, 343 skb = sock_alloc_send_skb(sk,
340 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, 344 length + hlen + tlen + 15,
341 flags & MSG_DONTWAIT, &err); 345 flags & MSG_DONTWAIT, &err);
342 if (skb == NULL) 346 if (skb == NULL)
343 goto error; 347 goto error;
344 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); 348 skb_reserve(skb, hlen);
345 349
346 skb->priority = sk->sk_priority; 350 skb->priority = sk->sk_priority;
347 skb->mark = sk->sk_mark; 351 skb->mark = sk->sk_mark;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94cdbc55ca7e..bcacf54e5418 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,7 +109,6 @@
109#ifdef CONFIG_SYSCTL 109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h> 110#include <linux/sysctl.h>
111#endif 111#endif
112#include <net/atmclip.h>
113#include <net/secure_seq.h> 112#include <net/secure_seq.h>
114 113
115#define RT_FL_TOS(oldflp4) \ 114#define RT_FL_TOS(oldflp4) \
@@ -425,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
425 int len, HHUptod; 424 int len, HHUptod;
426 425
427 rcu_read_lock(); 426 rcu_read_lock();
428 n = dst_get_neighbour(&r->dst); 427 n = dst_get_neighbour_noref(&r->dst);
429 HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; 428 HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
430 rcu_read_unlock(); 429 rcu_read_unlock();
431 430
@@ -1115,23 +1114,18 @@ static int slow_chain_length(const struct rtable *head)
1115 1114
1116static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) 1115static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
1117{ 1116{
1118 struct neigh_table *tbl = &arp_tbl;
1119 static const __be32 inaddr_any = 0; 1117 static const __be32 inaddr_any = 0;
1120 struct net_device *dev = dst->dev; 1118 struct net_device *dev = dst->dev;
1121 const __be32 *pkey = daddr; 1119 const __be32 *pkey = daddr;
1122 struct neighbour *n; 1120 struct neighbour *n;
1123 1121
1124#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1125 if (dev->type == ARPHRD_ATM)
1126 tbl = clip_tbl_hook;
1127#endif
1128 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) 1122 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
1129 pkey = &inaddr_any; 1123 pkey = &inaddr_any;
1130 1124
1131 n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); 1125 n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);
1132 if (n) 1126 if (n)
1133 return n; 1127 return n;
1134 return neigh_create(tbl, pkey, dev); 1128 return neigh_create(&arp_tbl, pkey, dev);
1135} 1129}
1136 1130
1137static int rt_bind_neighbour(struct rtable *rt) 1131static int rt_bind_neighbour(struct rtable *rt)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 90f6544c13e2..51fdbb490437 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
245 if (!sysctl_tcp_timestamps) 245 if (!sysctl_tcp_timestamps)
246 return false; 246 return false;
247 247
248 tcp_opt->sack_ok = (options >> 4) & 0x1; 248 tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
249 *ecn_ok = (options >> 5) & 1; 249 *ecn_ok = (options >> 5) & 1;
250 if (*ecn_ok && !sysctl_tcp_ecn) 250 if (*ecn_ok && !sysctl_tcp_ecn)
251 return false; 251 return false;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 69fd7201129a..4aa7e9dc0cbb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/nsproxy.h> 16#include <linux/nsproxy.h>
17#include <linux/swap.h>
17#include <net/snmp.h> 18#include <net/snmp.h>
18#include <net/icmp.h> 19#include <net/icmp.h>
19#include <net/ip.h> 20#include <net/ip.h>
@@ -23,6 +24,7 @@
23#include <net/cipso_ipv4.h> 24#include <net/cipso_ipv4.h>
24#include <net/inet_frag.h> 25#include <net/inet_frag.h>
25#include <net/ping.h> 26#include <net/ping.h>
27#include <net/tcp_memcontrol.h>
26 28
27static int zero; 29static int zero;
28static int tcp_retr1_max = 255; 30static int tcp_retr1_max = 255;
@@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
73} 75}
74 76
75 77
76void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) 78static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
77{ 79{
78 gid_t *data = table->data; 80 gid_t *data = table->data;
79 unsigned seq; 81 unsigned seq;
@@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t
86} 88}
87 89
88/* Update system visible IP port range */ 90/* Update system visible IP port range */
89static void set_ping_group_range(struct ctl_table *table, int range[2]) 91static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
90{ 92{
91 gid_t *data = table->data; 93 gid_t *data = table->data;
92 write_seqlock(&sysctl_local_ports.lock); 94 write_seqlock(&sysctl_local_ports.lock);
@@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
174 return ret; 176 return ret;
175} 177}
176 178
179static int ipv4_tcp_mem(ctl_table *ctl, int write,
180 void __user *buffer, size_t *lenp,
181 loff_t *ppos)
182{
183 int ret;
184 unsigned long vec[3];
185 struct net *net = current->nsproxy->net_ns;
186#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
187 struct mem_cgroup *memcg;
188#endif
189
190 ctl_table tmp = {
191 .data = &vec,
192 .maxlen = sizeof(vec),
193 .mode = ctl->mode,
194 };
195
196 if (!write) {
197 ctl->data = &net->ipv4.sysctl_tcp_mem;
198 return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
199 }
200
201 ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
202 if (ret)
203 return ret;
204
205#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
206 rcu_read_lock();
207 memcg = mem_cgroup_from_task(current);
208
209 tcp_prot_mem(memcg, vec[0], 0);
210 tcp_prot_mem(memcg, vec[1], 1);
211 tcp_prot_mem(memcg, vec[2], 2);
212 rcu_read_unlock();
213#endif
214
215 net->ipv4.sysctl_tcp_mem[0] = vec[0];
216 net->ipv4.sysctl_tcp_mem[1] = vec[1];
217 net->ipv4.sysctl_tcp_mem[2] = vec[2];
218
219 return 0;
220}
221
177static struct ctl_table ipv4_table[] = { 222static struct ctl_table ipv4_table[] = {
178 { 223 {
179 .procname = "tcp_timestamps", 224 .procname = "tcp_timestamps",
@@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = {
433 .proc_handler = proc_dointvec 478 .proc_handler = proc_dointvec
434 }, 479 },
435 { 480 {
436 .procname = "tcp_mem",
437 .data = &sysctl_tcp_mem,
438 .maxlen = sizeof(sysctl_tcp_mem),
439 .mode = 0644,
440 .proc_handler = proc_doulongvec_minmax
441 },
442 {
443 .procname = "tcp_wmem", 481 .procname = "tcp_wmem",
444 .data = &sysctl_tcp_wmem, 482 .data = &sysctl_tcp_wmem,
445 .maxlen = sizeof(sysctl_tcp_wmem), 483 .maxlen = sizeof(sysctl_tcp_wmem),
@@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = {
721 .mode = 0644, 759 .mode = 0644,
722 .proc_handler = ipv4_ping_group_range, 760 .proc_handler = ipv4_ping_group_range,
723 }, 761 },
762 {
763 .procname = "tcp_mem",
764 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
765 .mode = 0644,
766 .proc_handler = ipv4_tcp_mem,
767 },
724 { } 768 { }
725}; 769};
726 770
@@ -734,6 +778,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
734static __net_init int ipv4_sysctl_init_net(struct net *net) 778static __net_init int ipv4_sysctl_init_net(struct net *net)
735{ 779{
736 struct ctl_table *table; 780 struct ctl_table *table;
781 unsigned long limit;
737 782
738 table = ipv4_net_table; 783 table = ipv4_net_table;
739 if (!net_eq(net, &init_net)) { 784 if (!net_eq(net, &init_net)) {
@@ -769,6 +814,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
769 814
770 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 815 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
771 816
817 limit = nr_free_buffer_pages() / 8;
818 limit = max(limit, 128UL);
819 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
820 net->ipv4.sysctl_tcp_mem[1] = limit;
821 net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
822
772 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 823 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
773 net_ipv4_ctl_path, table); 824 net_ipv4_ctl_path, table);
774 if (net->ipv4.ipv4_hdr == NULL) 825 if (net->ipv4.ipv4_hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..9bcdec3ad772 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 285int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 286int sysctl_tcp_rmem[3] __read_mostly;
288 287
289EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 288EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 289EXPORT_SYMBOL(sysctl_tcp_wmem);
292 290
@@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
888} 886}
889EXPORT_SYMBOL(tcp_sendpage); 887EXPORT_SYMBOL(tcp_sendpage);
890 888
891#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 889static inline int select_size(const struct sock *sk, bool sg)
892#define TCP_OFF(sk) (sk->sk_sndmsg_off)
893
894static inline int select_size(const struct sock *sk, int sg)
895{ 890{
896 const struct tcp_sock *tp = tcp_sk(sk); 891 const struct tcp_sock *tp = tcp_sk(sk);
897 int tmp = tp->mss_cache; 892 int tmp = tp->mss_cache;
898 893
899 if (sg) { 894 if (sg) {
900 if (sk_can_gso(sk)) 895 if (sk_can_gso(sk)) {
901 tmp = 0; 896 /* Small frames wont use a full page:
902 else { 897 * Payload will immediately follow tcp header.
898 */
899 tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
900 } else {
903 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); 901 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
904 902
905 if (tmp >= pgbreak && 903 if (tmp >= pgbreak &&
@@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
917 struct iovec *iov; 915 struct iovec *iov;
918 struct tcp_sock *tp = tcp_sk(sk); 916 struct tcp_sock *tp = tcp_sk(sk);
919 struct sk_buff *skb; 917 struct sk_buff *skb;
920 int iovlen, flags; 918 int iovlen, flags, err, copied;
921 int mss_now, size_goal; 919 int mss_now, size_goal;
922 int sg, err, copied; 920 bool sg;
923 long timeo; 921 long timeo;
924 922
925 lock_sock(sk); 923 lock_sock(sk);
@@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
946 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 944 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
947 goto out_err; 945 goto out_err;
948 946
949 sg = sk->sk_route_caps & NETIF_F_SG; 947 sg = !!(sk->sk_route_caps & NETIF_F_SG);
950 948
951 while (--iovlen >= 0) { 949 while (--iovlen >= 0) {
952 size_t seglen = iov->iov_len; 950 size_t seglen = iov->iov_len;
@@ -1005,8 +1003,13 @@ new_segment:
1005 } else { 1003 } else {
1006 int merge = 0; 1004 int merge = 0;
1007 int i = skb_shinfo(skb)->nr_frags; 1005 int i = skb_shinfo(skb)->nr_frags;
1008 struct page *page = TCP_PAGE(sk); 1006 struct page *page = sk->sk_sndmsg_page;
1009 int off = TCP_OFF(sk); 1007 int off;
1008
1009 if (page && page_count(page) == 1)
1010 sk->sk_sndmsg_off = 0;
1011
1012 off = sk->sk_sndmsg_off;
1010 1013
1011 if (skb_can_coalesce(skb, i, page, off) && 1014 if (skb_can_coalesce(skb, i, page, off) &&
1012 off != PAGE_SIZE) { 1015 off != PAGE_SIZE) {
@@ -1023,7 +1026,7 @@ new_segment:
1023 } else if (page) { 1026 } else if (page) {
1024 if (off == PAGE_SIZE) { 1027 if (off == PAGE_SIZE) {
1025 put_page(page); 1028 put_page(page);
1026 TCP_PAGE(sk) = page = NULL; 1029 sk->sk_sndmsg_page = page = NULL;
1027 off = 0; 1030 off = 0;
1028 } 1031 }
1029 } else 1032 } else
@@ -1049,9 +1052,9 @@ new_segment:
1049 /* If this page was new, give it to the 1052 /* If this page was new, give it to the
1050 * socket so it does not get leaked. 1053 * socket so it does not get leaked.
1051 */ 1054 */
1052 if (!TCP_PAGE(sk)) { 1055 if (!sk->sk_sndmsg_page) {
1053 TCP_PAGE(sk) = page; 1056 sk->sk_sndmsg_page = page;
1054 TCP_OFF(sk) = 0; 1057 sk->sk_sndmsg_off = 0;
1055 } 1058 }
1056 goto do_error; 1059 goto do_error;
1057 } 1060 }
@@ -1061,15 +1064,15 @@ new_segment:
1061 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1064 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1062 } else { 1065 } else {
1063 skb_fill_page_desc(skb, i, page, off, copy); 1066 skb_fill_page_desc(skb, i, page, off, copy);
1064 if (TCP_PAGE(sk)) { 1067 if (sk->sk_sndmsg_page) {
1065 get_page(page); 1068 get_page(page);
1066 } else if (off + copy < PAGE_SIZE) { 1069 } else if (off + copy < PAGE_SIZE) {
1067 get_page(page); 1070 get_page(page);
1068 TCP_PAGE(sk) = page; 1071 sk->sk_sndmsg_page = page;
1069 } 1072 }
1070 } 1073 }
1071 1074
1072 TCP_OFF(sk) = off + copy; 1075 sk->sk_sndmsg_off = off + copy;
1073 } 1076 }
1074 1077
1075 if (!copied) 1078 if (!copied)
@@ -2653,7 +2656,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2653EXPORT_SYMBOL(compat_tcp_getsockopt); 2656EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif 2657#endif
2655 2658
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) 2659struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2660 netdev_features_t features)
2657{ 2661{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL); 2662 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th; 2663 struct tcphdr *th;
@@ -3272,14 +3276,9 @@ void __init tcp_init(void)
3272 sysctl_tcp_max_orphans = cnt / 2; 3276 sysctl_tcp_max_orphans = cnt / 2;
3273 sysctl_max_syn_backlog = max(128, cnt / 256); 3277 sysctl_max_syn_backlog = max(128, cnt / 256);
3274 3278
3275 limit = nr_free_buffer_pages() / 8;
3276 limit = max(limit, 128UL);
3277 sysctl_tcp_mem[0] = limit / 4 * 3;
3278 sysctl_tcp_mem[1] = limit;
3279 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
3280
3281 /* Set per-socket limits to no more than 1/128 the pressure threshold */ 3279 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3282 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); 3280 limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
3281 << (PAGE_SHIFT - 7);
3283 max_share = min(4UL*1024*1024, limit); 3282 max_share = min(4UL*1024*1024, limit);
3284 3283
3285 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3284 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 850c737e08e2..fc6d475f488f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
292 left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && 292 left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
293 left * tp->mss_cache < sk->sk_gso_max_size) 293 left * tp->mss_cache < sk->sk_gso_max_size)
294 return 1; 294 return 1;
295 return left <= tcp_max_burst(tp); 295 return left <= tcp_max_tso_deferred_mss(tp);
296} 296}
297EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); 297EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
298 298
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..8cd357a8be79 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
34 tcp_get_info(sk, info); 34 tcp_get_info(sk, info);
35} 35}
36 36
37static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
38 struct inet_diag_req *r, struct nlattr *bc)
39{
40 inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
41}
42
43static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
44 struct inet_diag_req *req)
45{
46 return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
47}
48
37static const struct inet_diag_handler tcp_diag_handler = { 49static const struct inet_diag_handler tcp_diag_handler = {
38 .idiag_hashinfo = &tcp_hashinfo, 50 .dump = tcp_diag_dump,
51 .dump_one = tcp_diag_dump_one,
39 .idiag_get_info = tcp_diag_get_info, 52 .idiag_get_info = tcp_diag_get_info,
40 .idiag_type = TCPDIAG_GETSOCK, 53 .idiag_type = IPPROTO_TCP,
41 .idiag_info_size = sizeof(struct tcp_info),
42}; 54};
43 55
44static int __init tcp_diag_init(void) 56static int __init tcp_diag_init(void)
@@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void)
54module_init(tcp_diag_init); 66module_init(tcp_diag_init);
55module_exit(tcp_diag_exit); 67module_exit(tcp_diag_exit);
56MODULE_LICENSE("GPL"); 68MODULE_LICENSE("GPL");
57MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); 69MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b5c2d0ecd0..2877c3e09587 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -322,7 +322,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
322 /* Check #1 */ 322 /* Check #1 */
323 if (tp->rcv_ssthresh < tp->window_clamp && 323 if (tp->rcv_ssthresh < tp->window_clamp &&
324 (int)tp->rcv_ssthresh < tcp_space(sk) && 324 (int)tp->rcv_ssthresh < tcp_space(sk) &&
325 !tcp_memory_pressure) { 325 !sk_under_memory_pressure(sk)) {
326 int incr; 326 int incr;
327 327
328 /* Check #2. Increase window, if skb with such overhead 328 /* Check #2. Increase window, if skb with such overhead
@@ -411,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk)
411 411
412 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 412 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
413 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 413 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
414 !tcp_memory_pressure && 414 !sk_under_memory_pressure(sk) &&
415 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { 415 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
416 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 416 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
417 sysctl_tcp_rmem[2]); 417 sysctl_tcp_rmem[2]);
418 } 418 }
@@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)
865 /* RFC3517 uses different metric in lost marker => reset on change */ 865 /* RFC3517 uses different metric in lost marker => reset on change */
866 if (tcp_is_fack(tp)) 866 if (tcp_is_fack(tp))
867 tp->lost_skb_hint = NULL; 867 tp->lost_skb_hint = NULL;
868 tp->rx_opt.sack_ok &= ~2; 868 tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
869} 869}
870 870
871/* Take a notice that peer is sending D-SACKs */ 871/* Take a notice that peer is sending D-SACKs */
872static void tcp_dsack_seen(struct tcp_sock *tp) 872static void tcp_dsack_seen(struct tcp_sock *tp)
873{ 873{
874 tp->rx_opt.sack_ok |= 4; 874 tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
875} 875}
876 876
877/* Initialize metrics on socket. */ 877/* Initialize metrics on socket. */
@@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2663 tp->snd_ssthresh, tp->prior_ssthresh, 2663 tp->snd_ssthresh, tp->prior_ssthresh,
2664 tp->packets_out); 2664 tp->packets_out);
2665 } 2665 }
2666#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 2666#if IS_ENABLED(CONFIG_IPV6)
2667 else if (sk->sk_family == AF_INET6) { 2667 else if (sk->sk_family == AF_INET6) {
2668 struct ipv6_pinfo *np = inet6_sk(sk); 2668 struct ipv6_pinfo *np = inet6_sk(sk);
2669 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", 2669 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
@@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk)
2858 struct tcp_sock *tp = tcp_sk(sk); 2858 struct tcp_sock *tp = tcp_sk(sk);
2859 int state = TCP_CA_Open; 2859 int state = TCP_CA_Open;
2860 2860
2861 if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) 2861 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2862 state = TCP_CA_Disorder; 2862 state = TCP_CA_Disorder;
2863 2863
2864 if (inet_csk(sk)->icsk_ca_state != state) { 2864 if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2881 2881
2882 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2882 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2883 tcp_try_keep_open(sk); 2883 tcp_try_keep_open(sk);
2884 tcp_moderate_cwnd(tp); 2884 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2885 tcp_moderate_cwnd(tp);
2885 } else { 2886 } else {
2886 tcp_cwnd_down(sk, flag); 2887 tcp_cwnd_down(sk, flag);
2887 } 2888 }
@@ -3009,11 +3010,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
3009 * tcp_xmit_retransmit_queue(). 3010 * tcp_xmit_retransmit_queue().
3010 */ 3011 */
3011static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, 3012static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3012 int newly_acked_sacked, int flag) 3013 int newly_acked_sacked, bool is_dupack,
3014 int flag)
3013{ 3015{
3014 struct inet_connection_sock *icsk = inet_csk(sk); 3016 struct inet_connection_sock *icsk = inet_csk(sk);
3015 struct tcp_sock *tp = tcp_sk(sk); 3017 struct tcp_sock *tp = tcp_sk(sk);
3016 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3017 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 3018 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
3018 (tcp_fackets_out(tp) > tp->reordering)); 3019 (tcp_fackets_out(tp) > tp->reordering));
3019 int fast_rexmit = 0, mib_idx; 3020 int fast_rexmit = 0, mib_idx;
@@ -3066,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3066 } 3067 }
3067 break; 3068 break;
3068 3069
3069 case TCP_CA_Disorder:
3070 tcp_try_undo_dsack(sk);
3071 if (!tp->undo_marker ||
3072 /* For SACK case do not Open to allow to undo
3073 * catching for all duplicate ACKs. */
3074 tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
3075 tp->undo_marker = 0;
3076 tcp_set_ca_state(sk, TCP_CA_Open);
3077 }
3078 break;
3079
3080 case TCP_CA_Recovery: 3070 case TCP_CA_Recovery:
3081 if (tcp_is_reno(tp)) 3071 if (tcp_is_reno(tp))
3082 tcp_reset_reno_sack(tp); 3072 tcp_reset_reno_sack(tp);
@@ -3117,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3117 tcp_add_reno_sack(sk); 3107 tcp_add_reno_sack(sk);
3118 } 3108 }
3119 3109
3120 if (icsk->icsk_ca_state == TCP_CA_Disorder) 3110 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3121 tcp_try_undo_dsack(sk); 3111 tcp_try_undo_dsack(sk);
3122 3112
3123 if (!tcp_time_to_recover(sk)) { 3113 if (!tcp_time_to_recover(sk)) {
@@ -3681,10 +3671,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3681 u32 prior_snd_una = tp->snd_una; 3671 u32 prior_snd_una = tp->snd_una;
3682 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3672 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3683 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3673 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3674 bool is_dupack = false;
3684 u32 prior_in_flight; 3675 u32 prior_in_flight;
3685 u32 prior_fackets; 3676 u32 prior_fackets;
3686 int prior_packets; 3677 int prior_packets;
3687 int prior_sacked = tp->sacked_out; 3678 int prior_sacked = tp->sacked_out;
3679 int pkts_acked = 0;
3688 int newly_acked_sacked = 0; 3680 int newly_acked_sacked = 0;
3689 int frto_cwnd = 0; 3681 int frto_cwnd = 0;
3690 3682
@@ -3757,6 +3749,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3757 /* See if we can take anything off of the retransmit queue. */ 3749 /* See if we can take anything off of the retransmit queue. */
3758 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3750 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3759 3751
3752 pkts_acked = prior_packets - tp->packets_out;
3760 newly_acked_sacked = (prior_packets - prior_sacked) - 3753 newly_acked_sacked = (prior_packets - prior_sacked) -
3761 (tp->packets_out - tp->sacked_out); 3754 (tp->packets_out - tp->sacked_out);
3762 3755
@@ -3771,8 +3764,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3771 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3764 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3772 tcp_may_raise_cwnd(sk, flag)) 3765 tcp_may_raise_cwnd(sk, flag))
3773 tcp_cong_avoid(sk, ack, prior_in_flight); 3766 tcp_cong_avoid(sk, ack, prior_in_flight);
3774 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, 3767 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3775 newly_acked_sacked, flag); 3768 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3769 is_dupack, flag);
3776 } else { 3770 } else {
3777 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3771 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3778 tcp_cong_avoid(sk, ack, prior_in_flight); 3772 tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3784,6 +3778,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3784 return 1; 3778 return 1;
3785 3779
3786no_queue: 3780no_queue:
3781 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3782 if (flag & FLAG_DSACKING_ACK)
3783 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3784 is_dupack, flag);
3787 /* If this ack opens up a zero window, clear backoff. It was 3785 /* If this ack opens up a zero window, clear backoff. It was
3788 * being used to time the probes, and is probably far higher than 3786 * being used to time the probes, and is probably far higher than
3789 * it needs to be for normal retransmission. 3787 * it needs to be for normal retransmission.
@@ -3797,10 +3795,14 @@ invalid_ack:
3797 return -1; 3795 return -1;
3798 3796
3799old_ack: 3797old_ack:
3798 /* If data was SACKed, tag it and see if we should send more data.
3799 * If data was DSACKed, see if we can undo a cwnd reduction.
3800 */
3800 if (TCP_SKB_CB(skb)->sacked) { 3801 if (TCP_SKB_CB(skb)->sacked) {
3801 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3802 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3802 if (icsk->icsk_ca_state == TCP_CA_Open) 3803 newly_acked_sacked = tp->sacked_out - prior_sacked;
3803 tcp_try_keep_open(sk); 3804 tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
3805 is_dupack, flag);
3804 } 3806 }
3805 3807
3806 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3808 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3876,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3876 case TCPOPT_SACK_PERM: 3878 case TCPOPT_SACK_PERM:
3877 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3879 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3878 !estab && sysctl_tcp_sack) { 3880 !estab && sysctl_tcp_sack) {
3879 opt_rx->sack_ok = 1; 3881 opt_rx->sack_ok = TCP_SACK_SEEN;
3880 tcp_sack_reset(opt_rx); 3882 tcp_sack_reset(opt_rx);
3881 } 3883 }
3882 break; 3884 break;
@@ -4864,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk)
4864 4866
4865 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4867 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4866 tcp_clamp_window(sk); 4868 tcp_clamp_window(sk);
4867 else if (tcp_memory_pressure) 4869 else if (sk_under_memory_pressure(sk))
4868 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 4870 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4869 4871
4870 tcp_collapse_ofo_queue(sk); 4872 tcp_collapse_ofo_queue(sk);
@@ -4930,11 +4932,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk)
4930 return 0; 4932 return 0;
4931 4933
4932 /* If we are under global TCP memory pressure, do not expand. */ 4934 /* If we are under global TCP memory pressure, do not expand. */
4933 if (tcp_memory_pressure) 4935 if (sk_under_memory_pressure(sk))
4934 return 0; 4936 return 0;
4935 4937
4936 /* If we are under soft global TCP memory pressure, do not expand. */ 4938 /* If we are under soft global TCP memory pressure, do not expand. */
4937 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) 4939 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4938 return 0; 4940 return 0;
4939 4941
4940 /* If we filled the congestion window, do not expand. */ 4942 /* If we filled the congestion window, do not expand. */
@@ -5809,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5809 goto discard; 5811 goto discard;
5810 5812
5811 if (th->syn) { 5813 if (th->syn) {
5814 if (th->fin)
5815 goto discard;
5812 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) 5816 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5813 return 1; 5817 return 1;
5814 5818
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a9db4b1a2215..1eb4ad57670e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h> 74#include <net/netdma.h>
75#include <net/secure_seq.h> 75#include <net/secure_seq.h>
76#include <net/tcp_memcontrol.h>
76 77
77#include <linux/inet.h> 78#include <linux/inet.h>
78#include <linux/ipv6.h> 79#include <linux/ipv6.h>
@@ -1511,6 +1512,7 @@ exit:
1511 return NULL; 1512 return NULL;
1512put_and_exit: 1513put_and_exit:
1513 tcp_clear_xmit_timers(newsk); 1514 tcp_clear_xmit_timers(newsk);
1515 tcp_cleanup_congestion_control(newsk);
1514 bh_unlock_sock(newsk); 1516 bh_unlock_sock(newsk);
1515 sock_put(newsk); 1517 sock_put(newsk);
1516 goto exit; 1518 goto exit;
@@ -1916,7 +1918,8 @@ static int tcp_v4_init_sock(struct sock *sk)
1916 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1918 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1917 1919
1918 local_bh_disable(); 1920 local_bh_disable();
1919 percpu_counter_inc(&tcp_sockets_allocated); 1921 sock_update_memcg(sk);
1922 sk_sockets_allocated_inc(sk);
1920 local_bh_enable(); 1923 local_bh_enable();
1921 1924
1922 return 0; 1925 return 0;
@@ -1972,7 +1975,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
1972 tp->cookie_values = NULL; 1975 tp->cookie_values = NULL;
1973 } 1976 }
1974 1977
1975 percpu_counter_dec(&tcp_sockets_allocated); 1978 sk_sockets_allocated_dec(sk);
1979 sock_release_memcg(sk);
1976} 1980}
1977EXPORT_SYMBOL(tcp_v4_destroy_sock); 1981EXPORT_SYMBOL(tcp_v4_destroy_sock);
1978 1982
@@ -2619,7 +2623,6 @@ struct proto tcp_prot = {
2619 .orphan_count = &tcp_orphan_count, 2623 .orphan_count = &tcp_orphan_count,
2620 .memory_allocated = &tcp_memory_allocated, 2624 .memory_allocated = &tcp_memory_allocated,
2621 .memory_pressure = &tcp_memory_pressure, 2625 .memory_pressure = &tcp_memory_pressure,
2622 .sysctl_mem = sysctl_tcp_mem,
2623 .sysctl_wmem = sysctl_tcp_wmem, 2626 .sysctl_wmem = sysctl_tcp_wmem,
2624 .sysctl_rmem = sysctl_tcp_rmem, 2627 .sysctl_rmem = sysctl_tcp_rmem,
2625 .max_header = MAX_TCP_HEADER, 2628 .max_header = MAX_TCP_HEADER,
@@ -2633,10 +2636,14 @@ struct proto tcp_prot = {
2633 .compat_setsockopt = compat_tcp_setsockopt, 2636 .compat_setsockopt = compat_tcp_setsockopt,
2634 .compat_getsockopt = compat_tcp_getsockopt, 2637 .compat_getsockopt = compat_tcp_getsockopt,
2635#endif 2638#endif
2639#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2640 .init_cgroup = tcp_init_cgroup,
2641 .destroy_cgroup = tcp_destroy_cgroup,
2642 .proto_cgroup = tcp_proto_cgroup,
2643#endif
2636}; 2644};
2637EXPORT_SYMBOL(tcp_prot); 2645EXPORT_SYMBOL(tcp_prot);
2638 2646
2639
2640static int __net_init tcp_sk_init(struct net *net) 2647static int __net_init tcp_sk_init(struct net *net)
2641{ 2648{
2642 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2649 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..7fed04f875c1
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,272 @@
1#include <net/tcp.h>
2#include <net/tcp_memcontrol.h>
3#include <net/sock.h>
4#include <net/ip.h>
5#include <linux/nsproxy.h>
6#include <linux/memcontrol.h>
7#include <linux/module.h>
8
9static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft);
10static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
11 const char *buffer);
12static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event);
13
14static struct cftype tcp_files[] = {
15 {
16 .name = "kmem.tcp.limit_in_bytes",
17 .write_string = tcp_cgroup_write,
18 .read_u64 = tcp_cgroup_read,
19 .private = RES_LIMIT,
20 },
21 {
22 .name = "kmem.tcp.usage_in_bytes",
23 .read_u64 = tcp_cgroup_read,
24 .private = RES_USAGE,
25 },
26 {
27 .name = "kmem.tcp.failcnt",
28 .private = RES_FAILCNT,
29 .trigger = tcp_cgroup_reset,
30 .read_u64 = tcp_cgroup_read,
31 },
32 {
33 .name = "kmem.tcp.max_usage_in_bytes",
34 .private = RES_MAX_USAGE,
35 .trigger = tcp_cgroup_reset,
36 .read_u64 = tcp_cgroup_read,
37 },
38};
39
40static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
41{
42 return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
43}
44
45static void memcg_tcp_enter_memory_pressure(struct sock *sk)
46{
47 if (sk->sk_cgrp->memory_pressure)
48 *sk->sk_cgrp->memory_pressure = 1;
49}
50EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
51
52int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
53{
54 /*
55 * The root cgroup does not use res_counters, but rather,
56 * rely on the data already collected by the network
57 * subsystem
58 */
59 struct res_counter *res_parent = NULL;
60 struct cg_proto *cg_proto, *parent_cg;
61 struct tcp_memcontrol *tcp;
62 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
63 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
64 struct net *net = current->nsproxy->net_ns;
65
66 cg_proto = tcp_prot.proto_cgroup(memcg);
67 if (!cg_proto)
68 goto create_files;
69
70 tcp = tcp_from_cgproto(cg_proto);
71
72 tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
73 tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
74 tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
75 tcp->tcp_memory_pressure = 0;
76
77 parent_cg = tcp_prot.proto_cgroup(parent);
78 if (parent_cg)
79 res_parent = parent_cg->memory_allocated;
80
81 res_counter_init(&tcp->tcp_memory_allocated, res_parent);
82 percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
83
84 cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
85 cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
86 cg_proto->sysctl_mem = tcp->tcp_prot_mem;
87 cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
88 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
89 cg_proto->memcg = memcg;
90
91create_files:
92 return cgroup_add_files(cgrp, ss, tcp_files,
93 ARRAY_SIZE(tcp_files));
94}
95EXPORT_SYMBOL(tcp_init_cgroup);
96
97void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
98{
99 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
100 struct cg_proto *cg_proto;
101 struct tcp_memcontrol *tcp;
102 u64 val;
103
104 cg_proto = tcp_prot.proto_cgroup(memcg);
105 if (!cg_proto)
106 return;
107
108 tcp = tcp_from_cgproto(cg_proto);
109 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
110
111 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
112
113 if (val != RESOURCE_MAX)
114 jump_label_dec(&memcg_socket_limit_enabled);
115}
116EXPORT_SYMBOL(tcp_destroy_cgroup);
117
118static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
119{
120 struct net *net = current->nsproxy->net_ns;
121 struct tcp_memcontrol *tcp;
122 struct cg_proto *cg_proto;
123 u64 old_lim;
124 int i;
125 int ret;
126
127 cg_proto = tcp_prot.proto_cgroup(memcg);
128 if (!cg_proto)
129 return -EINVAL;
130
131 if (val > RESOURCE_MAX)
132 val = RESOURCE_MAX;
133
134 tcp = tcp_from_cgproto(cg_proto);
135
136 old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
137 ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
138 if (ret)
139 return ret;
140
141 for (i = 0; i < 3; i++)
142 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
143 net->ipv4.sysctl_tcp_mem[i]);
144
145 if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
146 jump_label_dec(&memcg_socket_limit_enabled);
147 else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
148 jump_label_inc(&memcg_socket_limit_enabled);
149
150 return 0;
151}
152
153static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
154 const char *buffer)
155{
156 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
157 unsigned long long val;
158 int ret = 0;
159
160 switch (cft->private) {
161 case RES_LIMIT:
162 /* see memcontrol.c */
163 ret = res_counter_memparse_write_strategy(buffer, &val);
164 if (ret)
165 break;
166 ret = tcp_update_limit(memcg, val);
167 break;
168 default:
169 ret = -EINVAL;
170 break;
171 }
172 return ret;
173}
174
175static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
176{
177 struct tcp_memcontrol *tcp;
178 struct cg_proto *cg_proto;
179
180 cg_proto = tcp_prot.proto_cgroup(memcg);
181 if (!cg_proto)
182 return default_val;
183
184 tcp = tcp_from_cgproto(cg_proto);
185 return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
186}
187
188static u64 tcp_read_usage(struct mem_cgroup *memcg)
189{
190 struct tcp_memcontrol *tcp;
191 struct cg_proto *cg_proto;
192
193 cg_proto = tcp_prot.proto_cgroup(memcg);
194 if (!cg_proto)
195 return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
196
197 tcp = tcp_from_cgproto(cg_proto);
198 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
199}
200
201static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
202{
203 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
204 u64 val;
205
206 switch (cft->private) {
207 case RES_LIMIT:
208 val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
209 break;
210 case RES_USAGE:
211 val = tcp_read_usage(memcg);
212 break;
213 case RES_FAILCNT:
214 case RES_MAX_USAGE:
215 val = tcp_read_stat(memcg, cft->private, 0);
216 break;
217 default:
218 BUG();
219 }
220 return val;
221}
222
223static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
224{
225 struct mem_cgroup *memcg;
226 struct tcp_memcontrol *tcp;
227 struct cg_proto *cg_proto;
228
229 memcg = mem_cgroup_from_cont(cont);
230 cg_proto = tcp_prot.proto_cgroup(memcg);
231 if (!cg_proto)
232 return 0;
233 tcp = tcp_from_cgproto(cg_proto);
234
235 switch (event) {
236 case RES_MAX_USAGE:
237 res_counter_reset_max(&tcp->tcp_memory_allocated);
238 break;
239 case RES_FAILCNT:
240 res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
241 break;
242 }
243
244 return 0;
245}
246
247unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
248{
249 struct tcp_memcontrol *tcp;
250 struct cg_proto *cg_proto;
251
252 cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
253 if (!cg_proto)
254 return 0;
255
256 tcp = tcp_from_cgproto(cg_proto);
257 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
258}
259
260void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
261{
262 struct tcp_memcontrol *tcp;
263 struct cg_proto *cg_proto;
264
265 cg_proto = tcp_prot.proto_cgroup(memcg);
266 if (!cg_proto)
267 return;
268
269 tcp = tcp_from_cgproto(cg_proto);
270
271 tcp->tcp_prot_mem[idx] = val;
272}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 66363b689ad6..550e755747e0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -336,15 +336,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
336 tcptw->tw_ts_recent = tp->rx_opt.ts_recent; 336 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
337 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; 337 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
338 338
339#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 339#if IS_ENABLED(CONFIG_IPV6)
340 if (tw->tw_family == PF_INET6) { 340 if (tw->tw_family == PF_INET6) {
341 struct ipv6_pinfo *np = inet6_sk(sk); 341 struct ipv6_pinfo *np = inet6_sk(sk);
342 struct inet6_timewait_sock *tw6; 342 struct inet6_timewait_sock *tw6;
343 343
344 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); 344 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
345 tw6 = inet6_twsk((struct sock *)tw); 345 tw6 = inet6_twsk((struct sock *)tw);
346 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); 346 tw6->tw_v6_daddr = np->daddr;
347 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); 347 tw6->tw_v6_rcv_saddr = np->rcv_saddr;
348 tw->tw_tclass = np->tclass; 348 tw->tw_tclass = np->tclass;
349 tw->tw_ipv6only = np->ipv6only; 349 tw->tw_ipv6only = np->ipv6only;
350 } 350 }
@@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
425 */ 425 */
426struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) 426struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
427{ 427{
428 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); 428 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
429 429
430 if (newsk != NULL) { 430 if (newsk != NULL) {
431 const struct inet_request_sock *ireq = inet_rsk(req); 431 const struct inet_request_sock *ireq = inet_rsk(req);
@@ -495,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
495 newtp->frto_counter = 0; 495 newtp->frto_counter = 0;
496 newtp->frto_highmark = 0; 496 newtp->frto_highmark = 0;
497 497
498 newicsk->icsk_ca_ops = &tcp_init_congestion_ops; 498 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
499 !try_module_get(newicsk->icsk_ca_ops->owner))
500 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
499 501
500 tcp_set_ca_state(newsk, TCP_CA_Open); 502 tcp_set_ca_state(newsk, TCP_CA_Open);
501 tcp_init_xmit_timers(newsk); 503 tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 63170e297540..8c8de2780c7a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1093{ 1093{
1094 int i, k, eat; 1094 int i, k, eat;
1095 1095
1096 eat = min_t(int, len, skb_headlen(skb));
1097 if (eat) {
1098 __skb_pull(skb, eat);
1099 len -= eat;
1100 if (!len)
1101 return;
1102 }
1096 eat = len; 1103 eat = len;
1097 k = 0; 1104 k = 0;
1098 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1105 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1124 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 1131 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1125 return -ENOMEM; 1132 return -ENOMEM;
1126 1133
1127 /* If len == headlen, we avoid __skb_pull to preserve alignment. */ 1134 __pskb_trim_head(skb, len);
1128 if (unlikely(len < skb_headlen(skb)))
1129 __skb_pull(skb, len);
1130 else
1131 __pskb_trim_head(skb, len - skb_headlen(skb));
1132 1135
1133 TCP_SKB_CB(skb)->seq += len; 1136 TCP_SKB_CB(skb)->seq += len;
1134 skb->ip_summed = CHECKSUM_PARTIAL; 1137 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1581,7 +1584,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1581 * frame, so if we have space for more than 3 frames 1584 * frame, so if we have space for more than 3 frames
1582 * then send now. 1585 * then send now.
1583 */ 1586 */
1584 if (limit > tcp_max_burst(tp) * tp->mss_cache) 1587 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
1585 goto send_now; 1588 goto send_now;
1586 } 1589 }
1587 1590
@@ -1919,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk)
1919 if (free_space < (full_space >> 1)) { 1922 if (free_space < (full_space >> 1)) {
1920 icsk->icsk_ack.quick = 0; 1923 icsk->icsk_ack.quick = 0;
1921 1924
1922 if (tcp_memory_pressure) 1925 if (sk_under_memory_pressure(sk))
1923 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 1926 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1924 4U * tp->advmss); 1927 4U * tp->advmss);
1925 1928
@@ -2147,7 +2150,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2147 */ 2150 */
2148 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2151 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2149 2152
2150 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2153 /* make sure skb->data is aligned on arches that require it */
2154 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2155 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2156 GFP_ATOMIC);
2157 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2158 -ENOBUFS;
2159 } else {
2160 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2161 }
2151 2162
2152 if (err == 0) { 2163 if (err == 0) {
2153 /* Update global TCP statistics. */ 2164 /* Update global TCP statistics. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af76c19..a516d1e399df 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -171,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk)
171{ 171{
172 struct inet_connection_sock *icsk = inet_csk(sk); 172 struct inet_connection_sock *icsk = inet_csk(sk);
173 int retry_until; 173 int retry_until;
174 bool do_reset, syn_set = 0; 174 bool do_reset, syn_set = false;
175 175
176 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 176 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
177 if (icsk->icsk_retransmits) 177 if (icsk->icsk_retransmits)
178 dst_negative_advice(sk); 178 dst_negative_advice(sk);
179 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 179 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
180 syn_set = 1; 180 syn_set = true;
181 } else { 181 } else {
182 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { 182 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
183 /* Black hole detection */ 183 /* Black hole detection */
@@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data)
261 } 261 }
262 262
263out: 263out:
264 if (tcp_memory_pressure) 264 if (sk_under_memory_pressure(sk))
265 sk_mem_reclaim(sk); 265 sk_mem_reclaim(sk);
266out_unlock: 266out_unlock:
267 bh_unlock_sock(sk); 267 bh_unlock_sock(sk);
@@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk)
340 &inet->inet_daddr, ntohs(inet->inet_dport), 340 &inet->inet_daddr, ntohs(inet->inet_dport),
341 inet->inet_num, tp->snd_una, tp->snd_nxt); 341 inet->inet_num, tp->snd_una, tp->snd_nxt);
342 } 342 }
343#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 343#if IS_ENABLED(CONFIG_IPV6)
344 else if (sk->sk_family == AF_INET6) { 344 else if (sk->sk_family == AF_INET6) {
345 struct ipv6_pinfo *np = inet6_sk(sk); 345 struct ipv6_pinfo *np = inet6_sk(sk);
346 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", 346 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ac3b3ee4b07c..01775983b997 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -105,7 +105,7 @@ drop:
105 return 0; 105 return 0;
106} 106}
107 107
108#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 108#if IS_ENABLED(CONFIG_IPV6)
109static int tunnel64_rcv(struct sk_buff *skb) 109static int tunnel64_rcv(struct sk_buff *skb)
110{ 110{
111 struct xfrm_tunnel *handler; 111 struct xfrm_tunnel *handler;
@@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
134 break; 134 break;
135} 135}
136 136
137#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 137#if IS_ENABLED(CONFIG_IPV6)
138static void tunnel64_err(struct sk_buff *skb, u32 info) 138static void tunnel64_err(struct sk_buff *skb, u32 info)
139{ 139{
140 struct xfrm_tunnel *handler; 140 struct xfrm_tunnel *handler;
@@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = {
152 .netns_ok = 1, 152 .netns_ok = 1,
153}; 153};
154 154
155#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 155#if IS_ENABLED(CONFIG_IPV6)
156static const struct net_protocol tunnel64_protocol = { 156static const struct net_protocol tunnel64_protocol = {
157 .handler = tunnel64_rcv, 157 .handler = tunnel64_rcv,
158 .err_handler = tunnel64_err, 158 .err_handler = tunnel64_err,
@@ -167,7 +167,7 @@ static int __init tunnel4_init(void)
167 printk(KERN_ERR "tunnel4 init: can't add protocol\n"); 167 printk(KERN_ERR "tunnel4 init: can't add protocol\n");
168 return -EAGAIN; 168 return -EAGAIN;
169 } 169 }
170#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 170#if IS_ENABLED(CONFIG_IPV6)
171 if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { 171 if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
172 printk(KERN_ERR "tunnel64 init: can't add protocol\n"); 172 printk(KERN_ERR "tunnel64 init: can't add protocol\n");
173 inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); 173 inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
@@ -179,7 +179,7 @@ static int __init tunnel4_init(void)
179 179
180static void __exit tunnel4_fini(void) 180static void __exit tunnel4_fini(void)
181{ 181{
182#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 182#if IS_ENABLED(CONFIG_IPV6)
183 if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) 183 if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
184 printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); 184 printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
185#endif 185#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5a65eeac1d29..5d075b5f70fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -445,7 +445,7 @@ exact_match:
445/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 445/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
446 * harder than this. -DaveM 446 * harder than this. -DaveM
447 */ 447 */
448static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 448struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
449 __be16 sport, __be32 daddr, __be16 dport, 449 __be16 sport, __be32 daddr, __be16 dport,
450 int dif, struct udp_table *udptable) 450 int dif, struct udp_table *udptable)
451{ 451{
@@ -512,6 +512,7 @@ begin:
512 rcu_read_unlock(); 512 rcu_read_unlock();
513 return result; 513 return result;
514} 514}
515EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
515 516
516static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 517static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
517 __be16 sport, __be16 dport, 518 __be16 sport, __be16 dport,
@@ -1358,7 +1359,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1358 if (inet_sk(sk)->inet_daddr) 1359 if (inet_sk(sk)->inet_daddr)
1359 sock_rps_save_rxhash(sk, skb); 1360 sock_rps_save_rxhash(sk, skb);
1360 1361
1361 rc = ip_queue_rcv_skb(sk, skb); 1362 rc = sock_queue_rcv_skb(sk, skb);
1362 if (rc < 0) { 1363 if (rc < 0) {
1363 int is_udplite = IS_UDPLITE(sk); 1364 int is_udplite = IS_UDPLITE(sk);
1364 1365
@@ -1474,6 +1475,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1474 1475
1475 rc = 0; 1476 rc = 0;
1476 1477
1478 ipv4_pktinfo_prepare(skb);
1477 bh_lock_sock(sk); 1479 bh_lock_sock(sk);
1478 if (!sock_owned_by_user(sk)) 1480 if (!sock_owned_by_user(sk))
1479 rc = __udp_queue_rcv_skb(sk, skb); 1481 rc = __udp_queue_rcv_skb(sk, skb);
@@ -2247,7 +2249,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
2247 return 0; 2249 return 0;
2248} 2250}
2249 2251
2250struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) 2252struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2253 netdev_features_t features)
2251{ 2254{
2252 struct sk_buff *segs = ERR_PTR(-EINVAL); 2255 struct sk_buff *segs = ERR_PTR(-EINVAL);
2253 unsigned int mss; 2256 unsigned int mss;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
new file mode 100644
index 000000000000..69f8a7ca63dd
--- /dev/null
+++ b/net/ipv4/udp_diag.c
@@ -0,0 +1,201 @@
1/*
2 * udp_diag.c Module for monitoring UDP transport protocols sockets.
3 *
4 * Authors: Pavel Emelyanov, <xemul@parallels.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13#include <linux/module.h>
14#include <linux/inet_diag.h>
15#include <linux/udp.h>
16#include <net/udp.h>
17#include <net/udplite.h>
18#include <linux/inet_diag.h>
19#include <linux/sock_diag.h>
20
21static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
22 struct netlink_callback *cb, struct inet_diag_req *req,
23 struct nlattr *bc)
24{
25 if (!inet_diag_bc_sk(bc, sk))
26 return 0;
27
28 return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid,
29 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
30}
31
32static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
33 const struct nlmsghdr *nlh, struct inet_diag_req *req)
34{
35 int err = -EINVAL;
36 struct sock *sk;
37 struct sk_buff *rep;
38
39 if (req->sdiag_family == AF_INET)
40 sk = __udp4_lib_lookup(&init_net,
41 req->id.idiag_src[0], req->id.idiag_sport,
42 req->id.idiag_dst[0], req->id.idiag_dport,
43 req->id.idiag_if, tbl);
44#if IS_ENABLED(CONFIG_IPV6)
45 else if (req->sdiag_family == AF_INET6)
46 sk = __udp6_lib_lookup(&init_net,
47 (struct in6_addr *)req->id.idiag_src,
48 req->id.idiag_sport,
49 (struct in6_addr *)req->id.idiag_dst,
50 req->id.idiag_dport,
51 req->id.idiag_if, tbl);
52#endif
53 else
54 goto out_nosk;
55
56 err = -ENOENT;
57 if (sk == NULL)
58 goto out_nosk;
59
60 err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
61 if (err)
62 goto out;
63
64 err = -ENOMEM;
65 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
66 sizeof(struct inet_diag_meminfo) +
67 64)), GFP_KERNEL);
68 if (!rep)
69 goto out;
70
71 err = inet_sk_diag_fill(sk, NULL, rep, req,
72 NETLINK_CB(in_skb).pid,
73 nlh->nlmsg_seq, 0, nlh);
74 if (err < 0) {
75 WARN_ON(err == -EMSGSIZE);
76 kfree_skb(rep);
77 goto out;
78 }
79 err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
80 MSG_DONTWAIT);
81 if (err > 0)
82 err = 0;
83out:
84 if (sk)
85 sock_put(sk);
86out_nosk:
87 return err;
88}
89
90static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
91 struct inet_diag_req *r, struct nlattr *bc)
92{
93 int num, s_num, slot, s_slot;
94
95 s_slot = cb->args[0];
96 num = s_num = cb->args[1];
97
98 for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) {
99 struct sock *sk;
100 struct hlist_nulls_node *node;
101 struct udp_hslot *hslot = &table->hash[slot];
102
103 if (hlist_nulls_empty(&hslot->head))
104 continue;
105
106 spin_lock_bh(&hslot->lock);
107 sk_nulls_for_each(sk, node, &hslot->head) {
108 struct inet_sock *inet = inet_sk(sk);
109
110 if (num < s_num)
111 goto next;
112 if (!(r->idiag_states & (1 << sk->sk_state)))
113 goto next;
114 if (r->sdiag_family != AF_UNSPEC &&
115 sk->sk_family != r->sdiag_family)
116 goto next;
117 if (r->id.idiag_sport != inet->inet_sport &&
118 r->id.idiag_sport)
119 goto next;
120 if (r->id.idiag_dport != inet->inet_dport &&
121 r->id.idiag_dport)
122 goto next;
123
124 if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
125 spin_unlock_bh(&hslot->lock);
126 goto done;
127 }
128next:
129 num++;
130 }
131 spin_unlock_bh(&hslot->lock);
132 }
133done:
134 cb->args[0] = slot;
135 cb->args[1] = num;
136}
137
138static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
139 struct inet_diag_req *r, struct nlattr *bc)
140{
141 udp_dump(&udp_table, skb, cb, r, bc);
142}
143
144static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
145 struct inet_diag_req *req)
146{
147 return udp_dump_one(&udp_table, in_skb, nlh, req);
148}
149
150static const struct inet_diag_handler udp_diag_handler = {
151 .dump = udp_diag_dump,
152 .dump_one = udp_diag_dump_one,
153 .idiag_type = IPPROTO_UDP,
154};
155
156static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
157 struct inet_diag_req *r, struct nlattr *bc)
158{
159 udp_dump(&udplite_table, skb, cb, r, bc);
160}
161
162static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
163 struct inet_diag_req *req)
164{
165 return udp_dump_one(&udplite_table, in_skb, nlh, req);
166}
167
168static const struct inet_diag_handler udplite_diag_handler = {
169 .dump = udplite_diag_dump,
170 .dump_one = udplite_diag_dump_one,
171 .idiag_type = IPPROTO_UDPLITE,
172};
173
174static int __init udp_diag_init(void)
175{
176 int err;
177
178 err = inet_diag_register(&udp_diag_handler);
179 if (err)
180 goto out;
181 err = inet_diag_register(&udplite_diag_handler);
182 if (err)
183 goto out_lite;
184out:
185 return err;
186out_lite:
187 inet_diag_unregister(&udp_diag_handler);
188 goto out;
189}
190
191static void __exit udp_diag_exit(void)
192{
193 inet_diag_unregister(&udplite_diag_handler);
194 inet_diag_unregister(&udp_diag_handler);
195}
196
197module_init(udp_diag_init);
198module_exit(udp_diag_exit);
199MODULE_LICENSE("GPL");
200MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
201MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 82806455e859..9247d9d70e9d 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if IS_ENABLED(CONFIG_IPV6)
68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
@@ -84,7 +84,7 @@ static int __init ipip_init(void)
84 xfrm_unregister_type(&ipip_type, AF_INET); 84 xfrm_unregister_type(&ipip_type, AF_INET);
85 return -EAGAIN; 85 return -EAGAIN;
86 } 86 }
87#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 87#if IS_ENABLED(CONFIG_IPV6)
88 if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { 88 if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
89 printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); 89 printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
90 xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); 90 xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
@@ -97,7 +97,7 @@ static int __init ipip_init(void)
97 97
98static void __exit ipip_fini(void) 98static void __exit ipip_fini(void)
99{ 99{
100#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 100#if IS_ENABLED(CONFIG_IPV6)
101 if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) 101 if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
102 printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); 102 printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
103#endif 103#endif