aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c46
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c58
-rw-r--r--net/ipv4/devinet.c30
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_semantics.c80
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c87
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/ip_fragment.c39
-rw-r--r--net/ipv4/ip_gre.c25
-rw-r--r--net/ipv4/ip_sockglue.c14
-rw-r--r--net/ipv4/ipcomp.c11
-rw-r--r--net/ipv4/ipip.c20
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c381
-rw-r--r--net/ipv4/netfilter/arptable_filter.c95
-rw-r--r--net/ipv4/netfilter/ip_tables.c552
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c14
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/netfilter/iptable_filter.c124
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c166
-rw-r--r--net/ipv4/netfilter/iptable_raw.c96
-rw-r--r--net/ipv4/netfilter/iptable_security.c117
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c105
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c39
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c41
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c154
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c31
-rw-r--r--net/ipv4/proc.c32
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/tcp.c65
-rw-r--r--net/ipv4/tcp_input.c12
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_output.c22
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c19
-rw-r--r--net/ipv4/udplite.c4
46 files changed, 1260 insertions, 1376 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7d12c6a9b19b..33b7dffa7732 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1385,7 +1385,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1385} 1385}
1386EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1386EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1387 1387
1388unsigned long snmp_fold_field(void *mib[], int offt) 1388unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1389{ 1389{
1390 unsigned long res = 0; 1390 unsigned long res = 0;
1391 int i; 1391 int i;
@@ -1398,7 +1398,7 @@ unsigned long snmp_fold_field(void *mib[], int offt)
1398} 1398}
1399EXPORT_SYMBOL_GPL(snmp_fold_field); 1399EXPORT_SYMBOL_GPL(snmp_fold_field);
1400 1400
1401int snmp_mib_init(void *ptr[2], size_t mibsize) 1401int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
1402{ 1402{
1403 BUG_ON(ptr == NULL); 1403 BUG_ON(ptr == NULL);
1404 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1404 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
@@ -1416,7 +1416,7 @@ err0:
1416} 1416}
1417EXPORT_SYMBOL_GPL(snmp_mib_init); 1417EXPORT_SYMBOL_GPL(snmp_mib_init);
1418 1418
1419void snmp_mib_free(void *ptr[2]) 1419void snmp_mib_free(void __percpu *ptr[2])
1420{ 1420{
1421 BUG_ON(ptr == NULL); 1421 BUG_ON(ptr == NULL);
1422 free_percpu(ptr[0]); 1422 free_percpu(ptr[0]);
@@ -1460,25 +1460,25 @@ static const struct net_protocol icmp_protocol = {
1460 1460
1461static __net_init int ipv4_mib_init_net(struct net *net) 1461static __net_init int ipv4_mib_init_net(struct net *net)
1462{ 1462{
1463 if (snmp_mib_init((void **)net->mib.tcp_statistics, 1463 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
1464 sizeof(struct tcp_mib)) < 0) 1464 sizeof(struct tcp_mib)) < 0)
1465 goto err_tcp_mib; 1465 goto err_tcp_mib;
1466 if (snmp_mib_init((void **)net->mib.ip_statistics, 1466 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
1467 sizeof(struct ipstats_mib)) < 0) 1467 sizeof(struct ipstats_mib)) < 0)
1468 goto err_ip_mib; 1468 goto err_ip_mib;
1469 if (snmp_mib_init((void **)net->mib.net_statistics, 1469 if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
1470 sizeof(struct linux_mib)) < 0) 1470 sizeof(struct linux_mib)) < 0)
1471 goto err_net_mib; 1471 goto err_net_mib;
1472 if (snmp_mib_init((void **)net->mib.udp_statistics, 1472 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
1473 sizeof(struct udp_mib)) < 0) 1473 sizeof(struct udp_mib)) < 0)
1474 goto err_udp_mib; 1474 goto err_udp_mib;
1475 if (snmp_mib_init((void **)net->mib.udplite_statistics, 1475 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
1476 sizeof(struct udp_mib)) < 0) 1476 sizeof(struct udp_mib)) < 0)
1477 goto err_udplite_mib; 1477 goto err_udplite_mib;
1478 if (snmp_mib_init((void **)net->mib.icmp_statistics, 1478 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
1479 sizeof(struct icmp_mib)) < 0) 1479 sizeof(struct icmp_mib)) < 0)
1480 goto err_icmp_mib; 1480 goto err_icmp_mib;
1481 if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, 1481 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
1482 sizeof(struct icmpmsg_mib)) < 0) 1482 sizeof(struct icmpmsg_mib)) < 0)
1483 goto err_icmpmsg_mib; 1483 goto err_icmpmsg_mib;
1484 1484
@@ -1486,30 +1486,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
1486 return 0; 1486 return 0;
1487 1487
1488err_icmpmsg_mib: 1488err_icmpmsg_mib:
1489 snmp_mib_free((void **)net->mib.icmp_statistics); 1489 snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
1490err_icmp_mib: 1490err_icmp_mib:
1491 snmp_mib_free((void **)net->mib.udplite_statistics); 1491 snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
1492err_udplite_mib: 1492err_udplite_mib:
1493 snmp_mib_free((void **)net->mib.udp_statistics); 1493 snmp_mib_free((void __percpu **)net->mib.udp_statistics);
1494err_udp_mib: 1494err_udp_mib:
1495 snmp_mib_free((void **)net->mib.net_statistics); 1495 snmp_mib_free((void __percpu **)net->mib.net_statistics);
1496err_net_mib: 1496err_net_mib:
1497 snmp_mib_free((void **)net->mib.ip_statistics); 1497 snmp_mib_free((void __percpu **)net->mib.ip_statistics);
1498err_ip_mib: 1498err_ip_mib:
1499 snmp_mib_free((void **)net->mib.tcp_statistics); 1499 snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
1500err_tcp_mib: 1500err_tcp_mib:
1501 return -ENOMEM; 1501 return -ENOMEM;
1502} 1502}
1503 1503
1504static __net_exit void ipv4_mib_exit_net(struct net *net) 1504static __net_exit void ipv4_mib_exit_net(struct net *net)
1505{ 1505{
1506 snmp_mib_free((void **)net->mib.icmpmsg_statistics); 1506 snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
1507 snmp_mib_free((void **)net->mib.icmp_statistics); 1507 snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
1508 snmp_mib_free((void **)net->mib.udplite_statistics); 1508 snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
1509 snmp_mib_free((void **)net->mib.udp_statistics); 1509 snmp_mib_free((void __percpu **)net->mib.udp_statistics);
1510 snmp_mib_free((void **)net->mib.net_statistics); 1510 snmp_mib_free((void __percpu **)net->mib.net_statistics);
1511 snmp_mib_free((void **)net->mib.ip_statistics); 1511 snmp_mib_free((void __percpu **)net->mib.ip_statistics);
1512 snmp_mib_free((void **)net->mib.tcp_statistics); 1512 snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
1513} 1513}
1514 1514
1515static __net_initdata struct pernet_operations ipv4_mib_ops = { 1515static __net_initdata struct pernet_operations ipv4_mib_ops = {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7ed3e4ae93ae..987b47dc69ad 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
393 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 393 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
394 return; 394 return;
395 395
396 x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 396 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
397 if (!x) 397 if (!x)
398 return; 398 return;
399 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", 399 printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..c4dd13542802 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
70 * bonding can change the skb before 70 * bonding can change the skb before
71 * sending (e.g. insert 8021q tag). 71 * sending (e.g. insert 8021q tag).
72 * Harald Welte : convert to make use of jenkins hash 72 * Harald Welte : convert to make use of jenkins hash
73 * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support.
73 */ 74 */
74 75
75#include <linux/module.h> 76#include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
524/* 525/*
525 * Check if we can use proxy ARP for this path 526 * Check if we can use proxy ARP for this path
526 */ 527 */
527 528static inline int arp_fwd_proxy(struct in_device *in_dev,
528static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) 529 struct net_device *dev, struct rtable *rt)
529{ 530{
530 struct in_device *out_dev; 531 struct in_device *out_dev;
531 int imi, omi = -1; 532 int imi, omi = -1;
532 533
534 if (rt->u.dst.dev == dev)
535 return 0;
536
533 if (!IN_DEV_PROXY_ARP(in_dev)) 537 if (!IN_DEV_PROXY_ARP(in_dev))
534 return 0; 538 return 0;
535 539
@@ -548,6 +552,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
548} 552}
549 553
550/* 554/*
555 * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
556 *
557 * RFC3069 supports proxy arp replies back to the same interface. This
558 * is done to support (ethernet) switch features, like RFC 3069, where
559 * the individual ports are not allowed to communicate with each
560 * other, BUT they are allowed to talk to the upstream router. As
561 * described in RFC 3069, it is possible to allow these hosts to
562 * communicate through the upstream router, by proxy_arp'ing.
563 *
564 * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
565 *
566 * This technology is known by different names:
567 * In RFC 3069 it is called VLAN Aggregation.
568 * Cisco and Allied Telesyn call it Private VLAN.
569 * Hewlett-Packard call it Source-Port filtering or port-isolation.
570 * Ericsson call it MAC-Forced Forwarding (RFC Draft).
571 *
572 */
573static inline int arp_fwd_pvlan(struct in_device *in_dev,
574 struct net_device *dev, struct rtable *rt,
575 __be32 sip, __be32 tip)
576{
577 /* Private VLAN is only concerned about the same ethernet segment */
578 if (rt->u.dst.dev != dev)
579 return 0;
580
581 /* Don't reply on self probes (often done by windowz boxes)*/
582 if (sip == tip)
583 return 0;
584
585 if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
586 return 1;
587 else
588 return 0;
589}
590
591/*
551 * Interface to link layer: send routine and receive handler. 592 * Interface to link layer: send routine and receive handler.
552 */ 593 */
553 594
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
833 } 874 }
834 goto out; 875 goto out;
835 } else if (IN_DEV_FORWARD(in_dev)) { 876 } else if (IN_DEV_FORWARD(in_dev)) {
836 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 877 if (addr_type == RTN_UNICAST &&
837 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { 878 (arp_fwd_proxy(in_dev, dev, rt) ||
879 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
880 pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
881 {
838 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 882 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
839 if (n) 883 if (n)
840 neigh_release(n); 884 neigh_release(n);
@@ -863,7 +907,8 @@ static int arp_process(struct sk_buff *skb)
863 devices (strip is candidate) 907 devices (strip is candidate)
864 */ 908 */
865 if (n == NULL && 909 if (n == NULL &&
866 arp->ar_op == htons(ARPOP_REPLY) && 910 (arp->ar_op == htons(ARPOP_REPLY) ||
911 (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) &&
867 inet_addr_type(net, sip) == RTN_UNICAST) 912 inet_addr_type(net, sip) == RTN_UNICAST)
868 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 913 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
869 } 914 }
@@ -1239,8 +1284,7 @@ void __init arp_init(void)
1239 dev_add_pack(&arp_packet_type); 1284 dev_add_pack(&arp_packet_type);
1240 arp_proc_init(); 1285 arp_proc_init();
1241#ifdef CONFIG_SYSCTL 1286#ifdef CONFIG_SYSCTL
1242 neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, 1287 neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL);
1243 NET_IPV4_NEIGH, "ipv4", NULL);
1244#endif 1288#endif
1245 register_netdevice_notifier(&arp_netdev_notifier); 1289 register_netdevice_notifier(&arp_netdev_notifier);
1246} 1290}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 26dec2be9615..51ca946e3392 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,20 +64,20 @@
64 64
65static struct ipv4_devconf ipv4_devconf = { 65static struct ipv4_devconf ipv4_devconf = {
66 .data = { 66 .data = {
67 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, 67 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
68 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, 68 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, 69 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, 70 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
71 }, 71 },
72}; 72};
73 73
74static struct ipv4_devconf ipv4_devconf_dflt = { 74static struct ipv4_devconf ipv4_devconf_dflt = {
75 .data = { 75 .data = {
76 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, 76 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
77 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, 77 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, 78 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, 79 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 80 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81 }, 81 },
82}; 82};
83 83
@@ -1365,7 +1365,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
1365 { \ 1365 { \
1366 .procname = name, \ 1366 .procname = name, \
1367 .data = ipv4_devconf.data + \ 1367 .data = ipv4_devconf.data + \
1368 NET_IPV4_CONF_ ## attr - 1, \ 1368 IPV4_DEVCONF_ ## attr - 1, \
1369 .maxlen = sizeof(int), \ 1369 .maxlen = sizeof(int), \
1370 .mode = mval, \ 1370 .mode = mval, \
1371 .proc_handler = proc, \ 1371 .proc_handler = proc, \
@@ -1386,7 +1386,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
1386 1386
1387static struct devinet_sysctl_table { 1387static struct devinet_sysctl_table {
1388 struct ctl_table_header *sysctl_header; 1388 struct ctl_table_header *sysctl_header;
1389 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; 1389 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1390 char *dev_name; 1390 char *dev_name;
1391} devinet_sysctl = { 1391} devinet_sysctl = {
1392 .devinet_vars = { 1392 .devinet_vars = {
@@ -1413,6 +1413,7 @@ static struct devinet_sysctl_table {
1413 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1413 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1414 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1414 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1415 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1415 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1416 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1416 1417
1417 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1418 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1418 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1419 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@ -1491,8 +1492,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1491 1492
1492static void devinet_sysctl_register(struct in_device *idev) 1493static void devinet_sysctl_register(struct in_device *idev)
1493{ 1494{
1494 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, 1495 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1495 NET_IPV4_NEIGH, "ipv4", NULL);
1496 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1496 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1497 &idev->cnf); 1497 &idev->cnf);
1498} 1498}
@@ -1507,7 +1507,7 @@ static struct ctl_table ctl_forward_entry[] = {
1507 { 1507 {
1508 .procname = "ip_forward", 1508 .procname = "ip_forward",
1509 .data = &ipv4_devconf.data[ 1509 .data = &ipv4_devconf.data[
1510 NET_IPV4_CONF_FORWARDING - 1], 1510 IPV4_DEVCONF_FORWARDING - 1],
1511 .maxlen = sizeof(int), 1511 .maxlen = sizeof(int),
1512 .mode = 0644, 1512 .mode = 0644,
1513 .proc_handler = devinet_sysctl_forward, 1513 .proc_handler = devinet_sysctl_forward,
@@ -1551,7 +1551,7 @@ static __net_init int devinet_init_net(struct net *net)
1551 if (tbl == NULL) 1551 if (tbl == NULL)
1552 goto err_alloc_ctl; 1552 goto err_alloc_ctl;
1553 1553
1554 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; 1554 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1555 tbl[0].extra1 = all; 1555 tbl[0].extra1 = all;
1556 tbl[0].extra2 = net; 1556 tbl[0].extra2 = net;
1557#endif 1557#endif
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1948895beb6d..14ca1f1c3fb0 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
422 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) 422 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
423 return; 423 return;
424 424
425 x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 425 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
426 if (!x) 426 if (!x)
427 return; 427 return;
428 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", 428 NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 82dbf711d6d0..9b3e28ed5240 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -883,7 +883,7 @@ static void nl_fib_input(struct sk_buff *skb)
883 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 883 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
884} 884}
885 885
886static int nl_fib_lookup_init(struct net *net) 886static int __net_init nl_fib_lookup_init(struct net *net)
887{ 887{
888 struct sock *sk; 888 struct sock *sk;
889 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 889 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
@@ -1004,7 +1004,7 @@ fail:
1004 return err; 1004 return err;
1005} 1005}
1006 1006
1007static void __net_exit ip_fib_net_exit(struct net *net) 1007static void ip_fib_net_exit(struct net *net)
1008{ 1008{
1009 unsigned int i; 1009 unsigned int i;
1010 1010
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ed19aa6919c2..1af0ea0fb6a2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -62,8 +62,8 @@ static DEFINE_SPINLOCK(fib_multipath_lock);
62#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 62#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
63for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 63for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64 64
65#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 65#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
66for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 66for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
67 67
68#else /* CONFIG_IP_ROUTE_MULTIPATH */ 68#else /* CONFIG_IP_ROUTE_MULTIPATH */
69 69
@@ -72,7 +72,7 @@ for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++,
72#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ 72#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
73for (nhsel=0; nhsel < 1; nhsel++) 73for (nhsel=0; nhsel < 1; nhsel++)
74 74
75#define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ 75#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
76for (nhsel=0; nhsel < 1; nhsel++) 76for (nhsel=0; nhsel < 1; nhsel++)
77 77
78#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 78#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -145,9 +145,9 @@ void free_fib_info(struct fib_info *fi)
145 return; 145 return;
146 } 146 }
147 change_nexthops(fi) { 147 change_nexthops(fi) {
148 if (nh->nh_dev) 148 if (nexthop_nh->nh_dev)
149 dev_put(nh->nh_dev); 149 dev_put(nexthop_nh->nh_dev);
150 nh->nh_dev = NULL; 150 nexthop_nh->nh_dev = NULL;
151 } endfor_nexthops(fi); 151 } endfor_nexthops(fi);
152 fib_info_cnt--; 152 fib_info_cnt--;
153 release_net(fi->fib_net); 153 release_net(fi->fib_net);
@@ -162,9 +162,9 @@ void fib_release_info(struct fib_info *fi)
162 if (fi->fib_prefsrc) 162 if (fi->fib_prefsrc)
163 hlist_del(&fi->fib_lhash); 163 hlist_del(&fi->fib_lhash);
164 change_nexthops(fi) { 164 change_nexthops(fi) {
165 if (!nh->nh_dev) 165 if (!nexthop_nh->nh_dev)
166 continue; 166 continue;
167 hlist_del(&nh->nh_hash); 167 hlist_del(&nexthop_nh->nh_hash);
168 } endfor_nexthops(fi) 168 } endfor_nexthops(fi)
169 fi->fib_dead = 1; 169 fi->fib_dead = 1;
170 fib_info_put(fi); 170 fib_info_put(fi);
@@ -395,19 +395,20 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
395 if (!rtnh_ok(rtnh, remaining)) 395 if (!rtnh_ok(rtnh, remaining))
396 return -EINVAL; 396 return -EINVAL;
397 397
398 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 398 nexthop_nh->nh_flags =
399 nh->nh_oif = rtnh->rtnh_ifindex; 399 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
400 nh->nh_weight = rtnh->rtnh_hops + 1; 400 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
401 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
401 402
402 attrlen = rtnh_attrlen(rtnh); 403 attrlen = rtnh_attrlen(rtnh);
403 if (attrlen > 0) { 404 if (attrlen > 0) {
404 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 405 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
405 406
406 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 407 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
407 nh->nh_gw = nla ? nla_get_be32(nla) : 0; 408 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
408#ifdef CONFIG_NET_CLS_ROUTE 409#ifdef CONFIG_NET_CLS_ROUTE
409 nla = nla_find(attrs, attrlen, RTA_FLOW); 410 nla = nla_find(attrs, attrlen, RTA_FLOW);
410 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 411 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
411#endif 412#endif
412 } 413 }
413 414
@@ -527,10 +528,6 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
527 if (nh->nh_gw) { 528 if (nh->nh_gw) {
528 struct fib_result res; 529 struct fib_result res;
529 530
530#ifdef CONFIG_IP_ROUTE_PERVASIVE
531 if (nh->nh_flags&RTNH_F_PERVASIVE)
532 return 0;
533#endif
534 if (nh->nh_flags&RTNH_F_ONLINK) { 531 if (nh->nh_flags&RTNH_F_ONLINK) {
535 struct net_device *dev; 532 struct net_device *dev;
536 533
@@ -738,7 +735,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
738 735
739 fi->fib_nhs = nhs; 736 fi->fib_nhs = nhs;
740 change_nexthops(fi) { 737 change_nexthops(fi) {
741 nh->nh_parent = fi; 738 nexthop_nh->nh_parent = fi;
742 } endfor_nexthops(fi) 739 } endfor_nexthops(fi)
743 740
744 if (cfg->fc_mx) { 741 if (cfg->fc_mx) {
@@ -808,7 +805,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
808 goto failure; 805 goto failure;
809 } else { 806 } else {
810 change_nexthops(fi) { 807 change_nexthops(fi) {
811 if ((err = fib_check_nh(cfg, fi, nh)) != 0) 808 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
812 goto failure; 809 goto failure;
813 } endfor_nexthops(fi) 810 } endfor_nexthops(fi)
814 } 811 }
@@ -843,11 +840,11 @@ link_it:
843 struct hlist_head *head; 840 struct hlist_head *head;
844 unsigned int hash; 841 unsigned int hash;
845 842
846 if (!nh->nh_dev) 843 if (!nexthop_nh->nh_dev)
847 continue; 844 continue;
848 hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 845 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
849 head = &fib_info_devhash[hash]; 846 head = &fib_info_devhash[hash];
850 hlist_add_head(&nh->nh_hash, head); 847 hlist_add_head(&nexthop_nh->nh_hash, head);
851 } endfor_nexthops(fi) 848 } endfor_nexthops(fi)
852 spin_unlock_bh(&fib_info_lock); 849 spin_unlock_bh(&fib_info_lock);
853 return fi; 850 return fi;
@@ -1080,21 +1077,21 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1080 prev_fi = fi; 1077 prev_fi = fi;
1081 dead = 0; 1078 dead = 0;
1082 change_nexthops(fi) { 1079 change_nexthops(fi) {
1083 if (nh->nh_flags&RTNH_F_DEAD) 1080 if (nexthop_nh->nh_flags&RTNH_F_DEAD)
1084 dead++; 1081 dead++;
1085 else if (nh->nh_dev == dev && 1082 else if (nexthop_nh->nh_dev == dev &&
1086 nh->nh_scope != scope) { 1083 nexthop_nh->nh_scope != scope) {
1087 nh->nh_flags |= RTNH_F_DEAD; 1084 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1088#ifdef CONFIG_IP_ROUTE_MULTIPATH 1085#ifdef CONFIG_IP_ROUTE_MULTIPATH
1089 spin_lock_bh(&fib_multipath_lock); 1086 spin_lock_bh(&fib_multipath_lock);
1090 fi->fib_power -= nh->nh_power; 1087 fi->fib_power -= nexthop_nh->nh_power;
1091 nh->nh_power = 0; 1088 nexthop_nh->nh_power = 0;
1092 spin_unlock_bh(&fib_multipath_lock); 1089 spin_unlock_bh(&fib_multipath_lock);
1093#endif 1090#endif
1094 dead++; 1091 dead++;
1095 } 1092 }
1096#ifdef CONFIG_IP_ROUTE_MULTIPATH 1093#ifdef CONFIG_IP_ROUTE_MULTIPATH
1097 if (force > 1 && nh->nh_dev == dev) { 1094 if (force > 1 && nexthop_nh->nh_dev == dev) {
1098 dead = fi->fib_nhs; 1095 dead = fi->fib_nhs;
1099 break; 1096 break;
1100 } 1097 }
@@ -1144,18 +1141,20 @@ int fib_sync_up(struct net_device *dev)
1144 prev_fi = fi; 1141 prev_fi = fi;
1145 alive = 0; 1142 alive = 0;
1146 change_nexthops(fi) { 1143 change_nexthops(fi) {
1147 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1144 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1148 alive++; 1145 alive++;
1149 continue; 1146 continue;
1150 } 1147 }
1151 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1148 if (nexthop_nh->nh_dev == NULL ||
1149 !(nexthop_nh->nh_dev->flags&IFF_UP))
1152 continue; 1150 continue;
1153 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 1151 if (nexthop_nh->nh_dev != dev ||
1152 !__in_dev_get_rtnl(dev))
1154 continue; 1153 continue;
1155 alive++; 1154 alive++;
1156 spin_lock_bh(&fib_multipath_lock); 1155 spin_lock_bh(&fib_multipath_lock);
1157 nh->nh_power = 0; 1156 nexthop_nh->nh_power = 0;
1158 nh->nh_flags &= ~RTNH_F_DEAD; 1157 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
1159 spin_unlock_bh(&fib_multipath_lock); 1158 spin_unlock_bh(&fib_multipath_lock);
1160 } endfor_nexthops(fi) 1159 } endfor_nexthops(fi)
1161 1160
@@ -1182,9 +1181,9 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182 if (fi->fib_power <= 0) { 1181 if (fi->fib_power <= 0) {
1183 int power = 0; 1182 int power = 0;
1184 change_nexthops(fi) { 1183 change_nexthops(fi) {
1185 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1184 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1186 power += nh->nh_weight; 1185 power += nexthop_nh->nh_weight;
1187 nh->nh_power = nh->nh_weight; 1186 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1188 } 1187 }
1189 } endfor_nexthops(fi); 1188 } endfor_nexthops(fi);
1190 fi->fib_power = power; 1189 fi->fib_power = power;
@@ -1204,9 +1203,10 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1204 w = jiffies % fi->fib_power; 1203 w = jiffies % fi->fib_power;
1205 1204
1206 change_nexthops(fi) { 1205 change_nexthops(fi) {
1207 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 1206 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
1208 if ((w -= nh->nh_power) <= 0) { 1207 nexthop_nh->nh_power) {
1209 nh->nh_power--; 1208 if ((w -= nexthop_nh->nh_power) <= 0) {
1209 nexthop_nh->nh_power--;
1210 fi->fib_power--; 1210 fi->fib_power--;
1211 res->nh_sel = nhsel; 1211 res->nh_sel = nhsel;
1212 spin_unlock_bh(&fib_multipath_lock); 1212 spin_unlock_bh(&fib_multipath_lock);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe11f60ce41b..4b4c2bcd15db 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,7 @@ struct icmp_bxm {
114/* An array of errno for error messages from dest unreach. */ 114/* An array of errno for error messages from dest unreach. */
115/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ 115/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
116 116
117struct icmp_err icmp_err_convert[] = { 117const struct icmp_err icmp_err_convert[] = {
118 { 118 {
119 .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ 119 .errno = ENETUNREACH, /* ICMP_NET_UNREACH */
120 .fatal = 0, 120 .fatal = 0,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a42f658e756a..63bf298ca109 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1799 iml->next = inet->mc_list; 1799 iml->next = inet->mc_list;
1800 iml->sflist = NULL; 1800 iml->sflist = NULL;
1801 iml->sfmode = MCAST_EXCLUDE; 1801 iml->sfmode = MCAST_EXCLUDE;
1802 inet->mc_list = iml; 1802 rcu_assign_pointer(inet->mc_list, iml);
1803 ip_mc_inc_group(in_dev, addr); 1803 ip_mc_inc_group(in_dev, addr);
1804 err = 0; 1804 err = 0;
1805done: 1805done:
@@ -1807,24 +1807,46 @@ done:
1807 return err; 1807 return err;
1808} 1808}
1809 1809
1810static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1811{
1812 struct ip_sf_socklist *psf;
1813
1814 psf = container_of(rp, struct ip_sf_socklist, rcu);
1815 /* sk_omem_alloc should have been decreased by the caller*/
1816 kfree(psf);
1817}
1818
1810static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, 1819static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1811 struct in_device *in_dev) 1820 struct in_device *in_dev)
1812{ 1821{
1822 struct ip_sf_socklist *psf = iml->sflist;
1813 int err; 1823 int err;
1814 1824
1815 if (iml->sflist == NULL) { 1825 if (psf == NULL) {
1816 /* any-source empty exclude case */ 1826 /* any-source empty exclude case */
1817 return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, 1827 return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
1818 iml->sfmode, 0, NULL, 0); 1828 iml->sfmode, 0, NULL, 0);
1819 } 1829 }
1820 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, 1830 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
1821 iml->sfmode, iml->sflist->sl_count, 1831 iml->sfmode, psf->sl_count, psf->sl_addr, 0);
1822 iml->sflist->sl_addr, 0); 1832 rcu_assign_pointer(iml->sflist, NULL);
1823 sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); 1833 /* decrease mem now to avoid the memleak warning */
1824 iml->sflist = NULL; 1834 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
1835 call_rcu(&psf->rcu, ip_sf_socklist_reclaim);
1825 return err; 1836 return err;
1826} 1837}
1827 1838
1839
1840static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1841{
1842 struct ip_mc_socklist *iml;
1843
1844 iml = container_of(rp, struct ip_mc_socklist, rcu);
1845 /* sk_omem_alloc should have been decreased by the caller*/
1846 kfree(iml);
1847}
1848
1849
1828/* 1850/*
1829 * Ask a socket to leave a group. 1851 * Ask a socket to leave a group.
1830 */ 1852 */
@@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1854 1876
1855 (void) ip_mc_leave_src(sk, iml, in_dev); 1877 (void) ip_mc_leave_src(sk, iml, in_dev);
1856 1878
1857 *imlp = iml->next; 1879 rcu_assign_pointer(*imlp, iml->next);
1858 1880
1859 if (in_dev) 1881 if (in_dev)
1860 ip_mc_dec_group(in_dev, group); 1882 ip_mc_dec_group(in_dev, group);
1861 rtnl_unlock(); 1883 rtnl_unlock();
1862 sock_kfree_s(sk, iml, sizeof(*iml)); 1884 /* decrease mem now to avoid the memleak warning */
1885 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
1886 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
1863 return 0; 1887 return 0;
1864 } 1888 }
1865 if (!in_dev) 1889 if (!in_dev)
@@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1974 if (psl) { 1998 if (psl) {
1975 for (i=0; i<psl->sl_count; i++) 1999 for (i=0; i<psl->sl_count; i++)
1976 newpsl->sl_addr[i] = psl->sl_addr[i]; 2000 newpsl->sl_addr[i] = psl->sl_addr[i];
1977 sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); 2001 /* decrease mem now to avoid the memleak warning */
2002 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2003 call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
1978 } 2004 }
1979 pmc->sflist = psl = newpsl; 2005 rcu_assign_pointer(pmc->sflist, newpsl);
2006 psl = newpsl;
1980 } 2007 }
1981 rv = 1; /* > 0 for insert logic below if sl_count is 0 */ 2008 rv = 1; /* > 0 for insert logic below if sl_count is 0 */
1982 for (i=0; i<psl->sl_count; i++) { 2009 for (i=0; i<psl->sl_count; i++) {
@@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2072 if (psl) { 2099 if (psl) {
2073 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2100 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2074 psl->sl_count, psl->sl_addr, 0); 2101 psl->sl_count, psl->sl_addr, 0);
2075 sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); 2102 /* decrease mem now to avoid the memleak warning */
2103 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2104 call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
2076 } else 2105 } else
2077 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2106 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2078 0, NULL, 0); 2107 0, NULL, 0);
2079 pmc->sflist = newpsl; 2108 rcu_assign_pointer(pmc->sflist, newpsl);
2080 pmc->sfmode = msf->imsf_fmode; 2109 pmc->sfmode = msf->imsf_fmode;
2081 err = 0; 2110 err = 0;
2082done: 2111done:
@@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2209 struct ip_mc_socklist *pmc; 2238 struct ip_mc_socklist *pmc;
2210 struct ip_sf_socklist *psl; 2239 struct ip_sf_socklist *psl;
2211 int i; 2240 int i;
2241 int ret;
2212 2242
2243 ret = 1;
2213 if (!ipv4_is_multicast(loc_addr)) 2244 if (!ipv4_is_multicast(loc_addr))
2214 return 1; 2245 goto out;
2215 2246
2216 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2247 rcu_read_lock();
2248 for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
2217 if (pmc->multi.imr_multiaddr.s_addr == loc_addr && 2249 if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
2218 pmc->multi.imr_ifindex == dif) 2250 pmc->multi.imr_ifindex == dif)
2219 break; 2251 break;
2220 } 2252 }
2253 ret = inet->mc_all;
2221 if (!pmc) 2254 if (!pmc)
2222 return inet->mc_all; 2255 goto unlock;
2223 psl = pmc->sflist; 2256 psl = pmc->sflist;
2257 ret = (pmc->sfmode == MCAST_EXCLUDE);
2224 if (!psl) 2258 if (!psl)
2225 return pmc->sfmode == MCAST_EXCLUDE; 2259 goto unlock;
2226 2260
2227 for (i=0; i<psl->sl_count; i++) { 2261 for (i=0; i<psl->sl_count; i++) {
2228 if (psl->sl_addr[i] == rmt_addr) 2262 if (psl->sl_addr[i] == rmt_addr)
2229 break; 2263 break;
2230 } 2264 }
2265 ret = 0;
2231 if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) 2266 if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
2232 return 0; 2267 goto unlock;
2233 if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) 2268 if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
2234 return 0; 2269 goto unlock;
2235 return 1; 2270 ret = 1;
2271unlock:
2272 rcu_read_unlock();
2273out:
2274 return ret;
2236} 2275}
2237 2276
2238/* 2277/*
@@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk)
2251 rtnl_lock(); 2290 rtnl_lock();
2252 while ((iml = inet->mc_list) != NULL) { 2291 while ((iml = inet->mc_list) != NULL) {
2253 struct in_device *in_dev; 2292 struct in_device *in_dev;
2254 inet->mc_list = iml->next; 2293 rcu_assign_pointer(inet->mc_list, iml->next);
2255 2294
2256 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2295 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2257 (void) ip_mc_leave_src(sk, iml, in_dev); 2296 (void) ip_mc_leave_src(sk, iml, in_dev);
@@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk)
2259 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2298 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2260 in_dev_put(in_dev); 2299 in_dev_put(in_dev);
2261 } 2300 }
2262 sock_kfree_s(sk, iml, sizeof(*iml)); 2301 /* decrease mem now to avoid the memleak warning */
2302 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2303 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
2263 } 2304 }
2264 rtnl_unlock(); 2305 rtnl_unlock();
2265} 2306}
@@ -2603,7 +2644,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
2603 .release = seq_release_net, 2644 .release = seq_release_net,
2604}; 2645};
2605 2646
2606static int igmp_net_init(struct net *net) 2647static int __net_init igmp_net_init(struct net *net)
2607{ 2648{
2608 struct proc_dir_entry *pde; 2649 struct proc_dir_entry *pde;
2609 2650
@@ -2621,7 +2662,7 @@ out_igmp:
2621 return -ENOMEM; 2662 return -ENOMEM;
2622} 2663}
2623 2664
2624static void igmp_net_exit(struct net *net) 2665static void __net_exit igmp_net_exit(struct net *net)
2625{ 2666{
2626 proc_net_remove(net, "mcfilter"); 2667 proc_net_remove(net, "mcfilter");
2627 proc_net_remove(net, "igmp"); 2668 proc_net_remove(net, "igmp");
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ee16475f8fc3..8da6429269dd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -529,6 +529,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
529 syn_ack_recalc(req, thresh, max_retries, 529 syn_ack_recalc(req, thresh, max_retries,
530 queue->rskq_defer_accept, 530 queue->rskq_defer_accept,
531 &expire, &resend); 531 &expire, &resend);
532 if (req->rsk_ops->syn_ack_timeout)
533 req->rsk_ops->syn_ack_timeout(parent, req);
532 if (!expire && 534 if (!expire &&
533 (!resend || 535 (!resend ||
534 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 536 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 86964b353c31..b59430bc041c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -32,6 +32,8 @@
32#include <linux/netdevice.h> 32#include <linux/netdevice.h>
33#include <linux/jhash.h> 33#include <linux/jhash.h>
34#include <linux/random.h> 34#include <linux/random.h>
35#include <net/route.h>
36#include <net/dst.h>
35#include <net/sock.h> 37#include <net/sock.h>
36#include <net/ip.h> 38#include <net/ip.h>
37#include <net/icmp.h> 39#include <net/icmp.h>
@@ -205,11 +207,34 @@ static void ip_expire(unsigned long arg)
205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { 207 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
206 struct sk_buff *head = qp->q.fragments; 208 struct sk_buff *head = qp->q.fragments;
207 209
208 /* Send an ICMP "Fragment Reassembly Timeout" message. */
209 rcu_read_lock(); 210 rcu_read_lock();
210 head->dev = dev_get_by_index_rcu(net, qp->iif); 211 head->dev = dev_get_by_index_rcu(net, qp->iif);
211 if (head->dev) 212 if (!head->dev)
212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 213 goto out_rcu_unlock;
214
215 /*
216 * Only search router table for the head fragment,
217 * when defraging timeout at PRE_ROUTING HOOK.
218 */
219 if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) {
220 const struct iphdr *iph = ip_hdr(head);
221 int err = ip_route_input(head, iph->daddr, iph->saddr,
222 iph->tos, head->dev);
223 if (unlikely(err))
224 goto out_rcu_unlock;
225
226 /*
227 * Only an end host needs to send an ICMP
228 * "Fragment Reassembly Timeout" message, per RFC792.
229 */
230 if (skb_rtable(head)->rt_type != RTN_LOCAL)
231 goto out_rcu_unlock;
232
233 }
234
235 /* Send an ICMP "Fragment Reassembly Timeout" message. */
236 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
237out_rcu_unlock:
213 rcu_read_unlock(); 238 rcu_read_unlock();
214 } 239 }
215out: 240out:
@@ -646,7 +671,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
646 { } 671 { }
647}; 672};
648 673
649static int ip4_frags_ns_ctl_register(struct net *net) 674static int __net_init ip4_frags_ns_ctl_register(struct net *net)
650{ 675{
651 struct ctl_table *table; 676 struct ctl_table *table;
652 struct ctl_table_header *hdr; 677 struct ctl_table_header *hdr;
@@ -676,7 +701,7 @@ err_alloc:
676 return -ENOMEM; 701 return -ENOMEM;
677} 702}
678 703
679static void ip4_frags_ns_ctl_unregister(struct net *net) 704static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
680{ 705{
681 struct ctl_table *table; 706 struct ctl_table *table;
682 707
@@ -704,7 +729,7 @@ static inline void ip4_frags_ctl_register(void)
704} 729}
705#endif 730#endif
706 731
707static int ipv4_frags_init_net(struct net *net) 732static int __net_init ipv4_frags_init_net(struct net *net)
708{ 733{
709 /* 734 /*
710 * Fragment cache limits. We will commit 256K at one time. Should we 735 * Fragment cache limits. We will commit 256K at one time. Should we
@@ -726,7 +751,7 @@ static int ipv4_frags_init_net(struct net *net)
726 return ip4_frags_ns_ctl_register(net); 751 return ip4_frags_ns_ctl_register(net);
727} 752}
728 753
729static void ipv4_frags_exit_net(struct net *net) 754static void __net_exit ipv4_frags_exit_net(struct net *net)
730{ 755{
731 ip4_frags_ns_ctl_unregister(net); 756 ip4_frags_ns_ctl_unregister(net);
732 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); 757 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f36ce156cac6..c0c5274d0271 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
793 } 793 }
794 794
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
797 ip_rt_put(rt); 797 ip_rt_put(rt);
798 goto tx_error; 798 goto tx_error;
799 } 799 }
@@ -1307,7 +1307,7 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1307 } 1307 }
1308} 1308}
1309 1309
1310static int ipgre_init_net(struct net *net) 1310static int __net_init ipgre_init_net(struct net *net)
1311{ 1311{
1312 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1313 int err; 1313 int err;
@@ -1334,7 +1334,7 @@ err_alloc_dev:
1334 return err; 1334 return err;
1335} 1335}
1336 1336
1337static void ipgre_exit_net(struct net *net) 1337static void __net_exit ipgre_exit_net(struct net *net)
1338{ 1338{
1339 struct ipgre_net *ign; 1339 struct ipgre_net *ign;
1340 LIST_HEAD(list); 1340 LIST_HEAD(list);
@@ -1665,14 +1665,15 @@ static int __init ipgre_init(void)
1665 1665
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667 1667
1668 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1669 printk(KERN_INFO "ipgre init: can't add protocol\n");
1670 return -EAGAIN;
1671 }
1672
1673 err = register_pernet_device(&ipgre_net_ops); 1668 err = register_pernet_device(&ipgre_net_ops);
1674 if (err < 0) 1669 if (err < 0)
1675 goto gen_device_failed; 1670 return err;
1671
1672 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673 if (err < 0) {
1674 printk(KERN_INFO "ipgre init: can't add protocol\n");
1675 goto add_proto_failed;
1676 }
1676 1677
1677 err = rtnl_link_register(&ipgre_link_ops); 1678 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0) 1679 if (err < 0)
@@ -1688,9 +1689,9 @@ out:
1688tap_ops_failed: 1689tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops); 1690 rtnl_link_unregister(&ipgre_link_ops);
1690rtnl_link_failed: 1691rtnl_link_failed:
1691 unregister_pernet_device(&ipgre_net_ops);
1692gen_device_failed:
1693 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1692 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1693add_proto_failed:
1694 unregister_pernet_device(&ipgre_net_ops);
1694 goto out; 1695 goto out;
1695} 1696}
1696 1697
@@ -1698,9 +1699,9 @@ static void __exit ipgre_fini(void)
1698{ 1699{
1699 rtnl_link_unregister(&ipgre_tap_ops); 1700 rtnl_link_unregister(&ipgre_tap_ops);
1700 rtnl_link_unregister(&ipgre_link_ops); 1701 rtnl_link_unregister(&ipgre_link_ops);
1701 unregister_pernet_device(&ipgre_net_ops);
1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1704 unregister_pernet_device(&ipgre_net_ops);
1704} 1705}
1705 1706
1706module_init(ipgre_init); 1707module_init(ipgre_init);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cafad9baff03..644dc43a55de 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
451 (1<<IP_TTL) | (1<<IP_HDRINCL) | 451 (1<<IP_TTL) | (1<<IP_HDRINCL) |
452 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 452 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
453 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 453 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
454 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || 454 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
455 (1<<IP_MINTTL))) ||
455 optname == IP_MULTICAST_TTL || 456 optname == IP_MULTICAST_TTL ||
456 optname == IP_MULTICAST_ALL || 457 optname == IP_MULTICAST_ALL ||
457 optname == IP_MULTICAST_LOOP || 458 optname == IP_MULTICAST_LOOP ||
@@ -936,6 +937,14 @@ mc_msf_out:
936 inet->transparent = !!val; 937 inet->transparent = !!val;
937 break; 938 break;
938 939
940 case IP_MINTTL:
941 if (optlen < 1)
942 goto e_inval;
943 if (val < 0 || val > 255)
944 goto e_inval;
945 inet->min_ttl = val;
946 break;
947
939 default: 948 default:
940 err = -ENOPROTOOPT; 949 err = -ENOPROTOOPT;
941 break; 950 break;
@@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1198 case IP_TRANSPARENT: 1207 case IP_TRANSPARENT:
1199 val = inet->transparent; 1208 val = inet->transparent;
1200 break; 1209 break;
1210 case IP_MINTTL:
1211 val = inet->min_ttl;
1212 break;
1201 default: 1213 default:
1202 release_sock(sk); 1214 release_sock(sk);
1203 return -ENOPROTOOPT; 1215 return -ENOPROTOOPT;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 544ce0876f12..629067571f02 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -25,6 +25,7 @@
25 25
26static void ipcomp4_err(struct sk_buff *skb, u32 info) 26static void ipcomp4_err(struct sk_buff *skb, u32 info)
27{ 27{
28 struct net *net = dev_net(skb->dev);
28 __be32 spi; 29 __be32 spi;
29 struct iphdr *iph = (struct iphdr *)skb->data; 30 struct iphdr *iph = (struct iphdr *)skb->data;
30 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); 31 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
@@ -35,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
35 return; 36 return;
36 37
37 spi = htonl(ntohs(ipch->cpi)); 38 spi = htonl(ntohs(ipch->cpi));
38 x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, 39 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr,
39 spi, IPPROTO_COMP, AF_INET); 40 spi, IPPROTO_COMP, AF_INET);
40 if (!x) 41 if (!x)
41 return; 42 return;
@@ -47,9 +48,10 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
47/* We always hold one tunnel user reference to indicate a tunnel */ 48/* We always hold one tunnel user reference to indicate a tunnel */
48static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) 49static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
49{ 50{
51 struct net *net = xs_net(x);
50 struct xfrm_state *t; 52 struct xfrm_state *t;
51 53
52 t = xfrm_state_alloc(&init_net); 54 t = xfrm_state_alloc(net);
53 if (t == NULL) 55 if (t == NULL)
54 goto out; 56 goto out;
55 57
@@ -61,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
61 t->props.mode = x->props.mode; 63 t->props.mode = x->props.mode;
62 t->props.saddr.a4 = x->props.saddr.a4; 64 t->props.saddr.a4 = x->props.saddr.a4;
63 t->props.flags = x->props.flags; 65 t->props.flags = x->props.flags;
66 memcpy(&t->mark, &x->mark, sizeof(t->mark));
64 67
65 if (xfrm_init_state(t)) 68 if (xfrm_init_state(t))
66 goto error; 69 goto error;
@@ -82,10 +85,12 @@ error:
82 */ 85 */
83static int ipcomp_tunnel_attach(struct xfrm_state *x) 86static int ipcomp_tunnel_attach(struct xfrm_state *x)
84{ 87{
88 struct net *net = xs_net(x);
85 int err = 0; 89 int err = 0;
86 struct xfrm_state *t; 90 struct xfrm_state *t;
91 u32 mark = x->mark.v & x->mark.m;
87 92
88 t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, 93 t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4,
89 x->props.saddr.a4, IPPROTO_IPIP, AF_INET); 94 x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
90 if (!t) { 95 if (!t) {
91 t = ipcomp_tunnel_create(x); 96 t = ipcomp_tunnel_create(x);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index eda04fed3379..2f302d3ac9a3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -130,7 +130,6 @@ struct ipip_net {
130 struct net_device *fb_tunnel_dev; 130 struct net_device *fb_tunnel_dev;
131}; 131};
132 132
133static void ipip_fb_tunnel_init(struct net_device *dev);
134static void ipip_tunnel_init(struct net_device *dev); 133static void ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 134static void ipip_tunnel_setup(struct net_device *dev);
136 135
@@ -730,7 +729,7 @@ static void ipip_tunnel_init(struct net_device *dev)
730 ipip_tunnel_bind_dev(dev); 729 ipip_tunnel_bind_dev(dev);
731} 730}
732 731
733static void ipip_fb_tunnel_init(struct net_device *dev) 732static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
734{ 733{
735 struct ip_tunnel *tunnel = netdev_priv(dev); 734 struct ip_tunnel *tunnel = netdev_priv(dev);
736 struct iphdr *iph = &tunnel->parms.iph; 735 struct iphdr *iph = &tunnel->parms.iph;
@@ -773,7 +772,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
773 } 772 }
774} 773}
775 774
776static int ipip_init_net(struct net *net) 775static int __net_init ipip_init_net(struct net *net)
777{ 776{
778 struct ipip_net *ipn = net_generic(net, ipip_net_id); 777 struct ipip_net *ipn = net_generic(net, ipip_net_id);
779 int err; 778 int err;
@@ -806,7 +805,7 @@ err_alloc_dev:
806 return err; 805 return err;
807} 806}
808 807
809static void ipip_exit_net(struct net *net) 808static void __net_exit ipip_exit_net(struct net *net)
810{ 809{
811 struct ipip_net *ipn = net_generic(net, ipip_net_id); 810 struct ipip_net *ipn = net_generic(net, ipip_net_id);
812 LIST_HEAD(list); 811 LIST_HEAD(list);
@@ -831,15 +830,14 @@ static int __init ipip_init(void)
831 830
832 printk(banner); 831 printk(banner);
833 832
834 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { 833 err = register_pernet_device(&ipip_net_ops);
834 if (err < 0)
835 return err;
836 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
837 if (err < 0) {
838 unregister_pernet_device(&ipip_net_ops);
835 printk(KERN_INFO "ipip init: can't register tunnel\n"); 839 printk(KERN_INFO "ipip init: can't register tunnel\n");
836 return -EAGAIN;
837 } 840 }
838
839 err = register_pernet_device(&ipip_net_ops);
840 if (err)
841 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
842
843 return err; 841 return err;
844} 842}
845 843
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 54596f73eff5..8582e12e4a62 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1163,9 +1163,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1163 int ct; 1163 int ct;
1164 LIST_HEAD(list); 1164 LIST_HEAD(list);
1165 1165
1166 if (!net_eq(dev_net(dev), net))
1167 return NOTIFY_DONE;
1168
1169 if (event != NETDEV_UNREGISTER) 1166 if (event != NETDEV_UNREGISTER)
1170 return NOTIFY_DONE; 1167 return NOTIFY_DONE;
1171 v = &net->ipv4.vif_table[0]; 1168 v = &net->ipv4.vif_table[0];
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 90203e1b9187..57098dcda294 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -27,6 +27,7 @@
27 27
28#include <linux/netfilter/x_tables.h> 28#include <linux/netfilter/x_tables.h>
29#include <linux/netfilter_arp/arp_tables.h> 29#include <linux/netfilter_arp/arp_tables.h>
30#include "../../netfilter/xt_repldata.h"
30 31
31MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
32MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); 33MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -58,6 +59,12 @@ do { \
58#define ARP_NF_ASSERT(x) 59#define ARP_NF_ASSERT(x)
59#endif 60#endif
60 61
62void *arpt_alloc_initial_table(const struct xt_table *info)
63{
64 return xt_alloc_initial_table(arpt, ARPT);
65}
66EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
67
61static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 68static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
62 const char *hdr_addr, int len) 69 const char *hdr_addr, int len)
63{ 70{
@@ -226,7 +233,14 @@ arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
226 return NF_DROP; 233 return NF_DROP;
227} 234}
228 235
229static inline struct arpt_entry *get_entry(void *base, unsigned int offset) 236static inline const struct arpt_entry_target *
237arpt_get_target_c(const struct arpt_entry *e)
238{
239 return arpt_get_target((struct arpt_entry *)e);
240}
241
242static inline struct arpt_entry *
243get_entry(const void *base, unsigned int offset)
230{ 244{
231 return (struct arpt_entry *)(base + offset); 245 return (struct arpt_entry *)(base + offset);
232} 246}
@@ -273,7 +287,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
273 287
274 arp = arp_hdr(skb); 288 arp = arp_hdr(skb);
275 do { 289 do {
276 struct arpt_entry_target *t; 290 const struct arpt_entry_target *t;
277 int hdr_len; 291 int hdr_len;
278 292
279 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 293 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
@@ -285,7 +299,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
285 (2 * skb->dev->addr_len); 299 (2 * skb->dev->addr_len);
286 ADD_COUNTER(e->counters, hdr_len, 1); 300 ADD_COUNTER(e->counters, hdr_len, 1);
287 301
288 t = arpt_get_target(e); 302 t = arpt_get_target_c(e);
289 303
290 /* Standard target? */ 304 /* Standard target? */
291 if (!t->u.kernel.target->target) { 305 if (!t->u.kernel.target->target) {
@@ -351,7 +365,7 @@ static inline bool unconditional(const struct arpt_arp *arp)
351/* Figures out from what hook each rule can be called: returns 0 if 365/* Figures out from what hook each rule can be called: returns 0 if
352 * there are loops. Puts hook bitmask in comefrom. 366 * there are loops. Puts hook bitmask in comefrom.
353 */ 367 */
354static int mark_source_chains(struct xt_table_info *newinfo, 368static int mark_source_chains(const struct xt_table_info *newinfo,
355 unsigned int valid_hooks, void *entry0) 369 unsigned int valid_hooks, void *entry0)
356{ 370{
357 unsigned int hook; 371 unsigned int hook;
@@ -372,7 +386,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
372 386
373 for (;;) { 387 for (;;) {
374 const struct arpt_standard_target *t 388 const struct arpt_standard_target *t
375 = (void *)arpt_get_target(e); 389 = (void *)arpt_get_target_c(e);
376 int visited = e->comefrom & (1 << hook); 390 int visited = e->comefrom & (1 << hook);
377 391
378 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { 392 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
@@ -456,7 +470,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
456 return 1; 470 return 1;
457} 471}
458 472
459static inline int check_entry(struct arpt_entry *e, const char *name) 473static inline int check_entry(const struct arpt_entry *e, const char *name)
460{ 474{
461 const struct arpt_entry_target *t; 475 const struct arpt_entry_target *t;
462 476
@@ -468,7 +482,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
468 if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) 482 if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
469 return -EINVAL; 483 return -EINVAL;
470 484
471 t = arpt_get_target(e); 485 t = arpt_get_target_c(e);
472 if (e->target_offset + t->u.target_size > e->next_offset) 486 if (e->target_offset + t->u.target_size > e->next_offset)
473 return -EINVAL; 487 return -EINVAL;
474 488
@@ -498,8 +512,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
498} 512}
499 513
500static inline int 514static inline int
501find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, 515find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
502 unsigned int *i)
503{ 516{
504 struct arpt_entry_target *t; 517 struct arpt_entry_target *t;
505 struct xt_target *target; 518 struct xt_target *target;
@@ -524,8 +537,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
524 ret = check_target(e, name); 537 ret = check_target(e, name);
525 if (ret) 538 if (ret)
526 goto err; 539 goto err;
527
528 (*i)++;
529 return 0; 540 return 0;
530err: 541err:
531 module_put(t->u.kernel.target->me); 542 module_put(t->u.kernel.target->me);
@@ -533,14 +544,14 @@ out:
533 return ret; 544 return ret;
534} 545}
535 546
536static bool check_underflow(struct arpt_entry *e) 547static bool check_underflow(const struct arpt_entry *e)
537{ 548{
538 const struct arpt_entry_target *t; 549 const struct arpt_entry_target *t;
539 unsigned int verdict; 550 unsigned int verdict;
540 551
541 if (!unconditional(&e->arp)) 552 if (!unconditional(&e->arp))
542 return false; 553 return false;
543 t = arpt_get_target(e); 554 t = arpt_get_target_c(e);
544 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 555 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
545 return false; 556 return false;
546 verdict = ((struct arpt_standard_target *)t)->verdict; 557 verdict = ((struct arpt_standard_target *)t)->verdict;
@@ -550,12 +561,11 @@ static bool check_underflow(struct arpt_entry *e)
550 561
551static inline int check_entry_size_and_hooks(struct arpt_entry *e, 562static inline int check_entry_size_and_hooks(struct arpt_entry *e,
552 struct xt_table_info *newinfo, 563 struct xt_table_info *newinfo,
553 unsigned char *base, 564 const unsigned char *base,
554 unsigned char *limit, 565 const unsigned char *limit,
555 const unsigned int *hook_entries, 566 const unsigned int *hook_entries,
556 const unsigned int *underflows, 567 const unsigned int *underflows,
557 unsigned int valid_hooks, 568 unsigned int valid_hooks)
558 unsigned int *i)
559{ 569{
560 unsigned int h; 570 unsigned int h;
561 571
@@ -592,19 +602,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
592 /* Clear counters and comefrom */ 602 /* Clear counters and comefrom */
593 e->counters = ((struct xt_counters) { 0, 0 }); 603 e->counters = ((struct xt_counters) { 0, 0 });
594 e->comefrom = 0; 604 e->comefrom = 0;
595
596 (*i)++;
597 return 0; 605 return 0;
598} 606}
599 607
600static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) 608static inline void cleanup_entry(struct arpt_entry *e)
601{ 609{
602 struct xt_tgdtor_param par; 610 struct xt_tgdtor_param par;
603 struct arpt_entry_target *t; 611 struct arpt_entry_target *t;
604 612
605 if (i && (*i)-- == 0)
606 return 1;
607
608 t = arpt_get_target(e); 613 t = arpt_get_target(e);
609 par.target = t->u.kernel.target; 614 par.target = t->u.kernel.target;
610 par.targinfo = t->data; 615 par.targinfo = t->data;
@@ -612,26 +617,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
612 if (par.target->destroy != NULL) 617 if (par.target->destroy != NULL)
613 par.target->destroy(&par); 618 par.target->destroy(&par);
614 module_put(par.target->me); 619 module_put(par.target->me);
615 return 0;
616} 620}
617 621
618/* Checks and translates the user-supplied table segment (held in 622/* Checks and translates the user-supplied table segment (held in
619 * newinfo). 623 * newinfo).
620 */ 624 */
621static int translate_table(const char *name, 625static int translate_table(struct xt_table_info *newinfo, void *entry0,
622 unsigned int valid_hooks, 626 const struct arpt_replace *repl)
623 struct xt_table_info *newinfo,
624 void *entry0,
625 unsigned int size,
626 unsigned int number,
627 const unsigned int *hook_entries,
628 const unsigned int *underflows)
629{ 627{
628 struct arpt_entry *iter;
630 unsigned int i; 629 unsigned int i;
631 int ret; 630 int ret = 0;
632 631
633 newinfo->size = size; 632 newinfo->size = repl->size;
634 newinfo->number = number; 633 newinfo->number = repl->num_entries;
635 634
636 /* Init all hooks to impossible value. */ 635 /* Init all hooks to impossible value. */
637 for (i = 0; i < NF_ARP_NUMHOOKS; i++) { 636 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -643,52 +642,61 @@ static int translate_table(const char *name,
643 i = 0; 642 i = 0;
644 643
645 /* Walk through entries, checking offsets. */ 644 /* Walk through entries, checking offsets. */
646 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, 645 xt_entry_foreach(iter, entry0, newinfo->size) {
647 check_entry_size_and_hooks, 646 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
648 newinfo, 647 entry0 + repl->size, repl->hook_entry, repl->underflow,
649 entry0, 648 repl->valid_hooks);
650 entry0 + size, 649 if (ret != 0)
651 hook_entries, underflows, valid_hooks, &i); 650 break;
651 ++i;
652 }
652 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 653 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
653 if (ret != 0) 654 if (ret != 0)
654 return ret; 655 return ret;
655 656
656 if (i != number) { 657 if (i != repl->num_entries) {
657 duprintf("translate_table: %u not %u entries\n", 658 duprintf("translate_table: %u not %u entries\n",
658 i, number); 659 i, repl->num_entries);
659 return -EINVAL; 660 return -EINVAL;
660 } 661 }
661 662
662 /* Check hooks all assigned */ 663 /* Check hooks all assigned */
663 for (i = 0; i < NF_ARP_NUMHOOKS; i++) { 664 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
664 /* Only hooks which are valid */ 665 /* Only hooks which are valid */
665 if (!(valid_hooks & (1 << i))) 666 if (!(repl->valid_hooks & (1 << i)))
666 continue; 667 continue;
667 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 668 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
668 duprintf("Invalid hook entry %u %u\n", 669 duprintf("Invalid hook entry %u %u\n",
669 i, hook_entries[i]); 670 i, repl->hook_entry[i]);
670 return -EINVAL; 671 return -EINVAL;
671 } 672 }
672 if (newinfo->underflow[i] == 0xFFFFFFFF) { 673 if (newinfo->underflow[i] == 0xFFFFFFFF) {
673 duprintf("Invalid underflow %u %u\n", 674 duprintf("Invalid underflow %u %u\n",
674 i, underflows[i]); 675 i, repl->underflow[i]);
675 return -EINVAL; 676 return -EINVAL;
676 } 677 }
677 } 678 }
678 679
679 if (!mark_source_chains(newinfo, valid_hooks, entry0)) { 680 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
680 duprintf("Looping hook\n"); 681 duprintf("Looping hook\n");
681 return -ELOOP; 682 return -ELOOP;
682 } 683 }
683 684
684 /* Finally, each sanity check must pass */ 685 /* Finally, each sanity check must pass */
685 i = 0; 686 i = 0;
686 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, 687 xt_entry_foreach(iter, entry0, newinfo->size) {
687 find_check_entry, name, size, &i); 688 ret = find_check_entry(iter, repl->name, repl->size);
689 if (ret != 0)
690 break;
691 ++i;
692 }
688 693
689 if (ret != 0) { 694 if (ret != 0) {
690 ARPT_ENTRY_ITERATE(entry0, newinfo->size, 695 xt_entry_foreach(iter, entry0, newinfo->size) {
691 cleanup_entry, &i); 696 if (i-- == 0)
697 break;
698 cleanup_entry(iter);
699 }
692 return ret; 700 return ret;
693 } 701 }
694 702
@@ -701,30 +709,10 @@ static int translate_table(const char *name,
701 return ret; 709 return ret;
702} 710}
703 711
704/* Gets counters. */
705static inline int add_entry_to_counter(const struct arpt_entry *e,
706 struct xt_counters total[],
707 unsigned int *i)
708{
709 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
710
711 (*i)++;
712 return 0;
713}
714
715static inline int set_entry_to_counter(const struct arpt_entry *e,
716 struct xt_counters total[],
717 unsigned int *i)
718{
719 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
720
721 (*i)++;
722 return 0;
723}
724
725static void get_counters(const struct xt_table_info *t, 712static void get_counters(const struct xt_table_info *t,
726 struct xt_counters counters[]) 713 struct xt_counters counters[])
727{ 714{
715 struct arpt_entry *iter;
728 unsigned int cpu; 716 unsigned int cpu;
729 unsigned int i; 717 unsigned int i;
730 unsigned int curcpu; 718 unsigned int curcpu;
@@ -740,32 +728,32 @@ static void get_counters(const struct xt_table_info *t,
740 curcpu = smp_processor_id(); 728 curcpu = smp_processor_id();
741 729
742 i = 0; 730 i = 0;
743 ARPT_ENTRY_ITERATE(t->entries[curcpu], 731 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
744 t->size, 732 SET_COUNTER(counters[i], iter->counters.bcnt,
745 set_entry_to_counter, 733 iter->counters.pcnt);
746 counters, 734 ++i;
747 &i); 735 }
748 736
749 for_each_possible_cpu(cpu) { 737 for_each_possible_cpu(cpu) {
750 if (cpu == curcpu) 738 if (cpu == curcpu)
751 continue; 739 continue;
752 i = 0; 740 i = 0;
753 xt_info_wrlock(cpu); 741 xt_info_wrlock(cpu);
754 ARPT_ENTRY_ITERATE(t->entries[cpu], 742 xt_entry_foreach(iter, t->entries[cpu], t->size) {
755 t->size, 743 ADD_COUNTER(counters[i], iter->counters.bcnt,
756 add_entry_to_counter, 744 iter->counters.pcnt);
757 counters, 745 ++i;
758 &i); 746 }
759 xt_info_wrunlock(cpu); 747 xt_info_wrunlock(cpu);
760 } 748 }
761 local_bh_enable(); 749 local_bh_enable();
762} 750}
763 751
764static struct xt_counters *alloc_counters(struct xt_table *table) 752static struct xt_counters *alloc_counters(const struct xt_table *table)
765{ 753{
766 unsigned int countersize; 754 unsigned int countersize;
767 struct xt_counters *counters; 755 struct xt_counters *counters;
768 struct xt_table_info *private = table->private; 756 const struct xt_table_info *private = table->private;
769 757
770 /* We need atomic snapshot of counters: rest doesn't change 758 /* We need atomic snapshot of counters: rest doesn't change
771 * (other than comefrom, which userspace doesn't care 759 * (other than comefrom, which userspace doesn't care
@@ -783,11 +771,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
783} 771}
784 772
785static int copy_entries_to_user(unsigned int total_size, 773static int copy_entries_to_user(unsigned int total_size,
786 struct xt_table *table, 774 const struct xt_table *table,
787 void __user *userptr) 775 void __user *userptr)
788{ 776{
789 unsigned int off, num; 777 unsigned int off, num;
790 struct arpt_entry *e; 778 const struct arpt_entry *e;
791 struct xt_counters *counters; 779 struct xt_counters *counters;
792 struct xt_table_info *private = table->private; 780 struct xt_table_info *private = table->private;
793 int ret = 0; 781 int ret = 0;
@@ -807,7 +795,7 @@ static int copy_entries_to_user(unsigned int total_size,
807 /* FIXME: use iterator macros --RR */ 795 /* FIXME: use iterator macros --RR */
808 /* ... then go back and fix counters and names */ 796 /* ... then go back and fix counters and names */
809 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 797 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
810 struct arpt_entry_target *t; 798 const struct arpt_entry_target *t;
811 799
812 e = (struct arpt_entry *)(loc_cpu_entry + off); 800 e = (struct arpt_entry *)(loc_cpu_entry + off);
813 if (copy_to_user(userptr + off 801 if (copy_to_user(userptr + off
@@ -818,7 +806,7 @@ static int copy_entries_to_user(unsigned int total_size,
818 goto free_counters; 806 goto free_counters;
819 } 807 }
820 808
821 t = arpt_get_target(e); 809 t = arpt_get_target_c(e);
822 if (copy_to_user(userptr + off + e->target_offset 810 if (copy_to_user(userptr + off + e->target_offset
823 + offsetof(struct arpt_entry_target, 811 + offsetof(struct arpt_entry_target,
824 u.user.name), 812 u.user.name),
@@ -835,7 +823,7 @@ static int copy_entries_to_user(unsigned int total_size,
835} 823}
836 824
837#ifdef CONFIG_COMPAT 825#ifdef CONFIG_COMPAT
838static void compat_standard_from_user(void *dst, void *src) 826static void compat_standard_from_user(void *dst, const void *src)
839{ 827{
840 int v = *(compat_int_t *)src; 828 int v = *(compat_int_t *)src;
841 829
@@ -844,7 +832,7 @@ static void compat_standard_from_user(void *dst, void *src)
844 memcpy(dst, &v, sizeof(v)); 832 memcpy(dst, &v, sizeof(v));
845} 833}
846 834
847static int compat_standard_to_user(void __user *dst, void *src) 835static int compat_standard_to_user(void __user *dst, const void *src)
848{ 836{
849 compat_int_t cv = *(int *)src; 837 compat_int_t cv = *(int *)src;
850 838
@@ -853,18 +841,18 @@ static int compat_standard_to_user(void __user *dst, void *src)
853 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; 841 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
854} 842}
855 843
856static int compat_calc_entry(struct arpt_entry *e, 844static int compat_calc_entry(const struct arpt_entry *e,
857 const struct xt_table_info *info, 845 const struct xt_table_info *info,
858 void *base, struct xt_table_info *newinfo) 846 const void *base, struct xt_table_info *newinfo)
859{ 847{
860 struct arpt_entry_target *t; 848 const struct arpt_entry_target *t;
861 unsigned int entry_offset; 849 unsigned int entry_offset;
862 int off, i, ret; 850 int off, i, ret;
863 851
864 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); 852 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
865 entry_offset = (void *)e - base; 853 entry_offset = (void *)e - base;
866 854
867 t = arpt_get_target(e); 855 t = arpt_get_target_c(e);
868 off += xt_compat_target_offset(t->u.kernel.target); 856 off += xt_compat_target_offset(t->u.kernel.target);
869 newinfo->size -= off; 857 newinfo->size -= off;
870 ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); 858 ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
@@ -885,7 +873,9 @@ static int compat_calc_entry(struct arpt_entry *e,
885static int compat_table_info(const struct xt_table_info *info, 873static int compat_table_info(const struct xt_table_info *info,
886 struct xt_table_info *newinfo) 874 struct xt_table_info *newinfo)
887{ 875{
876 struct arpt_entry *iter;
888 void *loc_cpu_entry; 877 void *loc_cpu_entry;
878 int ret;
889 879
890 if (!newinfo || !info) 880 if (!newinfo || !info)
891 return -EINVAL; 881 return -EINVAL;
@@ -894,13 +884,17 @@ static int compat_table_info(const struct xt_table_info *info,
894 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 884 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
895 newinfo->initial_entries = 0; 885 newinfo->initial_entries = 0;
896 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 886 loc_cpu_entry = info->entries[raw_smp_processor_id()];
897 return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size, 887 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
898 compat_calc_entry, info, loc_cpu_entry, 888 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
899 newinfo); 889 if (ret != 0)
890 return ret;
891 }
892 return 0;
900} 893}
901#endif 894#endif
902 895
903static int get_info(struct net *net, void __user *user, int *len, int compat) 896static int get_info(struct net *net, void __user *user,
897 const int *len, int compat)
904{ 898{
905 char name[ARPT_TABLE_MAXNAMELEN]; 899 char name[ARPT_TABLE_MAXNAMELEN];
906 struct xt_table *t; 900 struct xt_table *t;
@@ -959,7 +953,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
959} 953}
960 954
961static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, 955static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
962 int *len) 956 const int *len)
963{ 957{
964 int ret; 958 int ret;
965 struct arpt_get_entries get; 959 struct arpt_get_entries get;
@@ -1010,6 +1004,7 @@ static int __do_replace(struct net *net, const char *name,
1010 struct xt_table_info *oldinfo; 1004 struct xt_table_info *oldinfo;
1011 struct xt_counters *counters; 1005 struct xt_counters *counters;
1012 void *loc_cpu_old_entry; 1006 void *loc_cpu_old_entry;
1007 struct arpt_entry *iter;
1013 1008
1014 ret = 0; 1009 ret = 0;
1015 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1010 counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1053,8 +1048,8 @@ static int __do_replace(struct net *net, const char *name,
1053 1048
1054 /* Decrease module usage counts and free resource */ 1049 /* Decrease module usage counts and free resource */
1055 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1050 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1056 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1051 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1057 NULL); 1052 cleanup_entry(iter);
1058 1053
1059 xt_free_table_info(oldinfo); 1054 xt_free_table_info(oldinfo);
1060 if (copy_to_user(counters_ptr, counters, 1055 if (copy_to_user(counters_ptr, counters,
@@ -1073,12 +1068,14 @@ static int __do_replace(struct net *net, const char *name,
1073 return ret; 1068 return ret;
1074} 1069}
1075 1070
1076static int do_replace(struct net *net, void __user *user, unsigned int len) 1071static int do_replace(struct net *net, const void __user *user,
1072 unsigned int len)
1077{ 1073{
1078 int ret; 1074 int ret;
1079 struct arpt_replace tmp; 1075 struct arpt_replace tmp;
1080 struct xt_table_info *newinfo; 1076 struct xt_table_info *newinfo;
1081 void *loc_cpu_entry; 1077 void *loc_cpu_entry;
1078 struct arpt_entry *iter;
1082 1079
1083 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1080 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1084 return -EFAULT; 1081 return -EFAULT;
@@ -1099,9 +1096,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
1099 goto free_newinfo; 1096 goto free_newinfo;
1100 } 1097 }
1101 1098
1102 ret = translate_table(tmp.name, tmp.valid_hooks, 1099 ret = translate_table(newinfo, loc_cpu_entry, &tmp);
1103 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1104 tmp.hook_entry, tmp.underflow);
1105 if (ret != 0) 1100 if (ret != 0)
1106 goto free_newinfo; 1101 goto free_newinfo;
1107 1102
@@ -1114,27 +1109,15 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
1114 return 0; 1109 return 0;
1115 1110
1116 free_newinfo_untrans: 1111 free_newinfo_untrans:
1117 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); 1112 xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1113 cleanup_entry(iter);
1118 free_newinfo: 1114 free_newinfo:
1119 xt_free_table_info(newinfo); 1115 xt_free_table_info(newinfo);
1120 return ret; 1116 return ret;
1121} 1117}
1122 1118
1123/* We're lazy, and add to the first CPU; overflow works its fey magic 1119static int do_add_counters(struct net *net, const void __user *user,
1124 * and everything is OK. */ 1120 unsigned int len, int compat)
1125static int
1126add_counter_to_entry(struct arpt_entry *e,
1127 const struct xt_counters addme[],
1128 unsigned int *i)
1129{
1130 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1131
1132 (*i)++;
1133 return 0;
1134}
1135
1136static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1137 int compat)
1138{ 1121{
1139 unsigned int i, curcpu; 1122 unsigned int i, curcpu;
1140 struct xt_counters_info tmp; 1123 struct xt_counters_info tmp;
@@ -1147,6 +1130,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1147 const struct xt_table_info *private; 1130 const struct xt_table_info *private;
1148 int ret = 0; 1131 int ret = 0;
1149 void *loc_cpu_entry; 1132 void *loc_cpu_entry;
1133 struct arpt_entry *iter;
1150#ifdef CONFIG_COMPAT 1134#ifdef CONFIG_COMPAT
1151 struct compat_xt_counters_info compat_tmp; 1135 struct compat_xt_counters_info compat_tmp;
1152 1136
@@ -1204,11 +1188,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1204 curcpu = smp_processor_id(); 1188 curcpu = smp_processor_id();
1205 loc_cpu_entry = private->entries[curcpu]; 1189 loc_cpu_entry = private->entries[curcpu];
1206 xt_info_wrlock(curcpu); 1190 xt_info_wrlock(curcpu);
1207 ARPT_ENTRY_ITERATE(loc_cpu_entry, 1191 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1208 private->size, 1192 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1209 add_counter_to_entry, 1193 ++i;
1210 paddc, 1194 }
1211 &i);
1212 xt_info_wrunlock(curcpu); 1195 xt_info_wrunlock(curcpu);
1213 unlock_up_free: 1196 unlock_up_free:
1214 local_bh_enable(); 1197 local_bh_enable();
@@ -1221,28 +1204,22 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1221} 1204}
1222 1205
1223#ifdef CONFIG_COMPAT 1206#ifdef CONFIG_COMPAT
1224static inline int 1207static inline void compat_release_entry(struct compat_arpt_entry *e)
1225compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
1226{ 1208{
1227 struct arpt_entry_target *t; 1209 struct arpt_entry_target *t;
1228 1210
1229 if (i && (*i)-- == 0)
1230 return 1;
1231
1232 t = compat_arpt_get_target(e); 1211 t = compat_arpt_get_target(e);
1233 module_put(t->u.kernel.target->me); 1212 module_put(t->u.kernel.target->me);
1234 return 0;
1235} 1213}
1236 1214
1237static inline int 1215static inline int
1238check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, 1216check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1239 struct xt_table_info *newinfo, 1217 struct xt_table_info *newinfo,
1240 unsigned int *size, 1218 unsigned int *size,
1241 unsigned char *base, 1219 const unsigned char *base,
1242 unsigned char *limit, 1220 const unsigned char *limit,
1243 unsigned int *hook_entries, 1221 const unsigned int *hook_entries,
1244 unsigned int *underflows, 1222 const unsigned int *underflows,
1245 unsigned int *i,
1246 const char *name) 1223 const char *name)
1247{ 1224{
1248 struct arpt_entry_target *t; 1225 struct arpt_entry_target *t;
@@ -1302,8 +1279,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1302 /* Clear counters and comefrom */ 1279 /* Clear counters and comefrom */
1303 memset(&e->counters, 0, sizeof(e->counters)); 1280 memset(&e->counters, 0, sizeof(e->counters));
1304 e->comefrom = 0; 1281 e->comefrom = 0;
1305
1306 (*i)++;
1307 return 0; 1282 return 0;
1308 1283
1309release_target: 1284release_target:
@@ -1347,19 +1322,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1347 return ret; 1322 return ret;
1348} 1323}
1349 1324
1350static inline int compat_check_entry(struct arpt_entry *e, const char *name,
1351 unsigned int *i)
1352{
1353 int ret;
1354
1355 ret = check_target(e, name);
1356 if (ret)
1357 return ret;
1358
1359 (*i)++;
1360 return 0;
1361}
1362
1363static int translate_compat_table(const char *name, 1325static int translate_compat_table(const char *name,
1364 unsigned int valid_hooks, 1326 unsigned int valid_hooks,
1365 struct xt_table_info **pinfo, 1327 struct xt_table_info **pinfo,
@@ -1372,8 +1334,10 @@ static int translate_compat_table(const char *name,
1372 unsigned int i, j; 1334 unsigned int i, j;
1373 struct xt_table_info *newinfo, *info; 1335 struct xt_table_info *newinfo, *info;
1374 void *pos, *entry0, *entry1; 1336 void *pos, *entry0, *entry1;
1337 struct compat_arpt_entry *iter0;
1338 struct arpt_entry *iter1;
1375 unsigned int size; 1339 unsigned int size;
1376 int ret; 1340 int ret = 0;
1377 1341
1378 info = *pinfo; 1342 info = *pinfo;
1379 entry0 = *pentry0; 1343 entry0 = *pentry0;
@@ -1390,13 +1354,14 @@ static int translate_compat_table(const char *name,
1390 j = 0; 1354 j = 0;
1391 xt_compat_lock(NFPROTO_ARP); 1355 xt_compat_lock(NFPROTO_ARP);
1392 /* Walk through entries, checking offsets. */ 1356 /* Walk through entries, checking offsets. */
1393 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, 1357 xt_entry_foreach(iter0, entry0, total_size) {
1394 check_compat_entry_size_and_hooks, 1358 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1395 info, &size, entry0, 1359 entry0, entry0 + total_size, hook_entries, underflows,
1396 entry0 + total_size, 1360 name);
1397 hook_entries, underflows, &j, name); 1361 if (ret != 0)
1398 if (ret != 0) 1362 goto out_unlock;
1399 goto out_unlock; 1363 ++j;
1364 }
1400 1365
1401 ret = -EINVAL; 1366 ret = -EINVAL;
1402 if (j != number) { 1367 if (j != number) {
@@ -1435,9 +1400,12 @@ static int translate_compat_table(const char *name,
1435 entry1 = newinfo->entries[raw_smp_processor_id()]; 1400 entry1 = newinfo->entries[raw_smp_processor_id()];
1436 pos = entry1; 1401 pos = entry1;
1437 size = total_size; 1402 size = total_size;
1438 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, 1403 xt_entry_foreach(iter0, entry0, total_size) {
1439 compat_copy_entry_from_user, 1404 ret = compat_copy_entry_from_user(iter0, &pos,
1440 &pos, &size, name, newinfo, entry1); 1405 &size, name, newinfo, entry1);
1406 if (ret != 0)
1407 break;
1408 }
1441 xt_compat_flush_offsets(NFPROTO_ARP); 1409 xt_compat_flush_offsets(NFPROTO_ARP);
1442 xt_compat_unlock(NFPROTO_ARP); 1410 xt_compat_unlock(NFPROTO_ARP);
1443 if (ret) 1411 if (ret)
@@ -1448,13 +1416,32 @@ static int translate_compat_table(const char *name,
1448 goto free_newinfo; 1416 goto free_newinfo;
1449 1417
1450 i = 0; 1418 i = 0;
1451 ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, 1419 xt_entry_foreach(iter1, entry1, newinfo->size) {
1452 name, &i); 1420 ret = check_target(iter1, name);
1421 if (ret != 0)
1422 break;
1423 ++i;
1424 }
1453 if (ret) { 1425 if (ret) {
1426 /*
1427 * The first i matches need cleanup_entry (calls ->destroy)
1428 * because they had called ->check already. The other j-i
1429 * entries need only release.
1430 */
1431 int skip = i;
1454 j -= i; 1432 j -= i;
1455 COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, 1433 xt_entry_foreach(iter0, entry0, newinfo->size) {
1456 compat_release_entry, &j); 1434 if (skip-- > 0)
1457 ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); 1435 continue;
1436 if (j-- == 0)
1437 break;
1438 compat_release_entry(iter0);
1439 }
1440 xt_entry_foreach(iter1, entry1, newinfo->size) {
1441 if (i-- == 0)
1442 break;
1443 cleanup_entry(iter1);
1444 }
1458 xt_free_table_info(newinfo); 1445 xt_free_table_info(newinfo);
1459 return ret; 1446 return ret;
1460 } 1447 }
@@ -1472,7 +1459,11 @@ static int translate_compat_table(const char *name,
1472free_newinfo: 1459free_newinfo:
1473 xt_free_table_info(newinfo); 1460 xt_free_table_info(newinfo);
1474out: 1461out:
1475 COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); 1462 xt_entry_foreach(iter0, entry0, total_size) {
1463 if (j-- == 0)
1464 break;
1465 compat_release_entry(iter0);
1466 }
1476 return ret; 1467 return ret;
1477out_unlock: 1468out_unlock:
1478 xt_compat_flush_offsets(NFPROTO_ARP); 1469 xt_compat_flush_offsets(NFPROTO_ARP);
@@ -1499,6 +1490,7 @@ static int compat_do_replace(struct net *net, void __user *user,
1499 struct compat_arpt_replace tmp; 1490 struct compat_arpt_replace tmp;
1500 struct xt_table_info *newinfo; 1491 struct xt_table_info *newinfo;
1501 void *loc_cpu_entry; 1492 void *loc_cpu_entry;
1493 struct arpt_entry *iter;
1502 1494
1503 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1495 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1504 return -EFAULT; 1496 return -EFAULT;
@@ -1536,7 +1528,8 @@ static int compat_do_replace(struct net *net, void __user *user,
1536 return 0; 1528 return 0;
1537 1529
1538 free_newinfo_untrans: 1530 free_newinfo_untrans:
1539 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); 1531 xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1532 cleanup_entry(iter);
1540 free_newinfo: 1533 free_newinfo:
1541 xt_free_table_info(newinfo); 1534 xt_free_table_info(newinfo);
1542 return ret; 1535 return ret;
@@ -1570,7 +1563,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1570static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, 1563static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1571 compat_uint_t *size, 1564 compat_uint_t *size,
1572 struct xt_counters *counters, 1565 struct xt_counters *counters,
1573 unsigned int *i) 1566 unsigned int i)
1574{ 1567{
1575 struct arpt_entry_target *t; 1568 struct arpt_entry_target *t;
1576 struct compat_arpt_entry __user *ce; 1569 struct compat_arpt_entry __user *ce;
@@ -1578,14 +1571,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1578 compat_uint_t origsize; 1571 compat_uint_t origsize;
1579 int ret; 1572 int ret;
1580 1573
1581 ret = -EFAULT;
1582 origsize = *size; 1574 origsize = *size;
1583 ce = (struct compat_arpt_entry __user *)*dstptr; 1575 ce = (struct compat_arpt_entry __user *)*dstptr;
1584 if (copy_to_user(ce, e, sizeof(struct arpt_entry))) 1576 if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
1585 goto out; 1577 copy_to_user(&ce->counters, &counters[i],
1586 1578 sizeof(counters[i])) != 0)
1587 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) 1579 return -EFAULT;
1588 goto out;
1589 1580
1590 *dstptr += sizeof(struct compat_arpt_entry); 1581 *dstptr += sizeof(struct compat_arpt_entry);
1591 *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); 1582 *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
@@ -1595,18 +1586,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1595 t = arpt_get_target(e); 1586 t = arpt_get_target(e);
1596 ret = xt_compat_target_to_user(t, dstptr, size); 1587 ret = xt_compat_target_to_user(t, dstptr, size);
1597 if (ret) 1588 if (ret)
1598 goto out; 1589 return ret;
1599 ret = -EFAULT;
1600 next_offset = e->next_offset - (origsize - *size); 1590 next_offset = e->next_offset - (origsize - *size);
1601 if (put_user(target_offset, &ce->target_offset)) 1591 if (put_user(target_offset, &ce->target_offset) != 0 ||
1602 goto out; 1592 put_user(next_offset, &ce->next_offset) != 0)
1603 if (put_user(next_offset, &ce->next_offset)) 1593 return -EFAULT;
1604 goto out;
1605
1606 (*i)++;
1607 return 0; 1594 return 0;
1608out:
1609 return ret;
1610} 1595}
1611 1596
1612static int compat_copy_entries_to_user(unsigned int total_size, 1597static int compat_copy_entries_to_user(unsigned int total_size,
@@ -1620,6 +1605,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1620 int ret = 0; 1605 int ret = 0;
1621 void *loc_cpu_entry; 1606 void *loc_cpu_entry;
1622 unsigned int i = 0; 1607 unsigned int i = 0;
1608 struct arpt_entry *iter;
1623 1609
1624 counters = alloc_counters(table); 1610 counters = alloc_counters(table);
1625 if (IS_ERR(counters)) 1611 if (IS_ERR(counters))
@@ -1629,9 +1615,12 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1629 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1615 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1630 pos = userptr; 1616 pos = userptr;
1631 size = total_size; 1617 size = total_size;
1632 ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size, 1618 xt_entry_foreach(iter, loc_cpu_entry, total_size) {
1633 compat_copy_entry_to_user, 1619 ret = compat_copy_entry_to_user(iter, &pos,
1634 &pos, &size, counters, &i); 1620 &size, counters, i++);
1621 if (ret != 0)
1622 break;
1623 }
1635 vfree(counters); 1624 vfree(counters);
1636 return ret; 1625 return ret;
1637} 1626}
@@ -1799,12 +1788,7 @@ struct xt_table *arpt_register_table(struct net *net,
1799 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 1788 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1800 memcpy(loc_cpu_entry, repl->entries, repl->size); 1789 memcpy(loc_cpu_entry, repl->entries, repl->size);
1801 1790
1802 ret = translate_table(table->name, table->valid_hooks, 1791 ret = translate_table(newinfo, loc_cpu_entry, repl);
1803 newinfo, loc_cpu_entry, repl->size,
1804 repl->num_entries,
1805 repl->hook_entry,
1806 repl->underflow);
1807
1808 duprintf("arpt_register_table: translate table gives %d\n", ret); 1792 duprintf("arpt_register_table: translate table gives %d\n", ret);
1809 if (ret != 0) 1793 if (ret != 0)
1810 goto out_free; 1794 goto out_free;
@@ -1827,13 +1811,14 @@ void arpt_unregister_table(struct xt_table *table)
1827 struct xt_table_info *private; 1811 struct xt_table_info *private;
1828 void *loc_cpu_entry; 1812 void *loc_cpu_entry;
1829 struct module *table_owner = table->me; 1813 struct module *table_owner = table->me;
1814 struct arpt_entry *iter;
1830 1815
1831 private = xt_unregister_table(table); 1816 private = xt_unregister_table(table);
1832 1817
1833 /* Decrease module usage counts and free resources */ 1818 /* Decrease module usage counts and free resources */
1834 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1819 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1835 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, 1820 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1836 cleanup_entry, NULL); 1821 cleanup_entry(iter);
1837 if (private->number > private->initial_entries) 1822 if (private->number > private->initial_entries)
1838 module_put(table_owner); 1823 module_put(table_owner);
1839 xt_free_table_info(private); 1824 xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 97337601827a..bfe26f32b930 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/netfilter/x_tables.h>
9#include <linux/netfilter_arp/arp_tables.h> 10#include <linux/netfilter_arp/arp_tables.h>
10 11
11MODULE_LICENSE("GPL"); 12MODULE_LICENSE("GPL");
@@ -15,93 +16,37 @@ MODULE_DESCRIPTION("arptables filter table");
15#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ 16#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
16 (1 << NF_ARP_FORWARD)) 17 (1 << NF_ARP_FORWARD))
17 18
18static const struct
19{
20 struct arpt_replace repl;
21 struct arpt_standard entries[3];
22 struct arpt_error term;
23} initial_table __net_initdata = {
24 .repl = {
25 .name = "filter",
26 .valid_hooks = FILTER_VALID_HOOKS,
27 .num_entries = 4,
28 .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
29 .hook_entry = {
30 [NF_ARP_IN] = 0,
31 [NF_ARP_OUT] = sizeof(struct arpt_standard),
32 [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
33 },
34 .underflow = {
35 [NF_ARP_IN] = 0,
36 [NF_ARP_OUT] = sizeof(struct arpt_standard),
37 [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
38 },
39 },
40 .entries = {
41 ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */
42 ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */
43 ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */
44 },
45 .term = ARPT_ERROR_INIT,
46};
47
48static const struct xt_table packet_filter = { 19static const struct xt_table packet_filter = {
49 .name = "filter", 20 .name = "filter",
50 .valid_hooks = FILTER_VALID_HOOKS, 21 .valid_hooks = FILTER_VALID_HOOKS,
51 .me = THIS_MODULE, 22 .me = THIS_MODULE,
52 .af = NFPROTO_ARP, 23 .af = NFPROTO_ARP,
24 .priority = NF_IP_PRI_FILTER,
53}; 25};
54 26
55/* The work comes in here from netfilter.c */ 27/* The work comes in here from netfilter.c */
56static unsigned int arpt_in_hook(unsigned int hook, 28static unsigned int
57 struct sk_buff *skb, 29arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
58 const struct net_device *in, 30 const struct net_device *in, const struct net_device *out,
59 const struct net_device *out, 31 int (*okfn)(struct sk_buff *))
60 int (*okfn)(struct sk_buff *))
61{ 32{
62 return arpt_do_table(skb, hook, in, out, 33 const struct net *net = dev_net((in != NULL) ? in : out);
63 dev_net(in)->ipv4.arptable_filter);
64}
65 34
66static unsigned int arpt_out_hook(unsigned int hook, 35 return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
67 struct sk_buff *skb,
68 const struct net_device *in,
69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *))
71{
72 return arpt_do_table(skb, hook, in, out,
73 dev_net(out)->ipv4.arptable_filter);
74} 36}
75 37
76static struct nf_hook_ops arpt_ops[] __read_mostly = { 38static struct nf_hook_ops *arpfilter_ops __read_mostly;
77 {
78 .hook = arpt_in_hook,
79 .owner = THIS_MODULE,
80 .pf = NFPROTO_ARP,
81 .hooknum = NF_ARP_IN,
82 .priority = NF_IP_PRI_FILTER,
83 },
84 {
85 .hook = arpt_out_hook,
86 .owner = THIS_MODULE,
87 .pf = NFPROTO_ARP,
88 .hooknum = NF_ARP_OUT,
89 .priority = NF_IP_PRI_FILTER,
90 },
91 {
92 .hook = arpt_in_hook,
93 .owner = THIS_MODULE,
94 .pf = NFPROTO_ARP,
95 .hooknum = NF_ARP_FORWARD,
96 .priority = NF_IP_PRI_FILTER,
97 },
98};
99 39
100static int __net_init arptable_filter_net_init(struct net *net) 40static int __net_init arptable_filter_net_init(struct net *net)
101{ 41{
102 /* Register table */ 42 struct arpt_replace *repl;
43
44 repl = arpt_alloc_initial_table(&packet_filter);
45 if (repl == NULL)
46 return -ENOMEM;
103 net->ipv4.arptable_filter = 47 net->ipv4.arptable_filter =
104 arpt_register_table(net, &packet_filter, &initial_table.repl); 48 arpt_register_table(net, &packet_filter, repl);
49 kfree(repl);
105 if (IS_ERR(net->ipv4.arptable_filter)) 50 if (IS_ERR(net->ipv4.arptable_filter))
106 return PTR_ERR(net->ipv4.arptable_filter); 51 return PTR_ERR(net->ipv4.arptable_filter);
107 return 0; 52 return 0;
@@ -125,9 +70,11 @@ static int __init arptable_filter_init(void)
125 if (ret < 0) 70 if (ret < 0)
126 return ret; 71 return ret;
127 72
128 ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); 73 arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
129 if (ret < 0) 74 if (IS_ERR(arpfilter_ops)) {
75 ret = PTR_ERR(arpfilter_ops);
130 goto cleanup_table; 76 goto cleanup_table;
77 }
131 return ret; 78 return ret;
132 79
133cleanup_table: 80cleanup_table:
@@ -137,7 +84,7 @@ cleanup_table:
137 84
138static void __exit arptable_filter_fini(void) 85static void __exit arptable_filter_fini(void)
139{ 86{
140 nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); 87 xt_hook_unlink(&packet_filter, arpfilter_ops);
141 unregister_pernet_subsys(&arptable_filter_net_ops); 88 unregister_pernet_subsys(&arptable_filter_net_ops);
142} 89}
143 90
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3ce53cf13d5a..c92f4e541cf6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -28,6 +28,7 @@
28#include <linux/netfilter/x_tables.h> 28#include <linux/netfilter/x_tables.h>
29#include <linux/netfilter_ipv4/ip_tables.h> 29#include <linux/netfilter_ipv4/ip_tables.h>
30#include <net/netfilter/nf_log.h> 30#include <net/netfilter/nf_log.h>
31#include "../../netfilter/xt_repldata.h"
31 32
32MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 34MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -66,6 +67,12 @@ do { \
66#define inline 67#define inline
67#endif 68#endif
68 69
70void *ipt_alloc_initial_table(const struct xt_table *info)
71{
72 return xt_alloc_initial_table(ipt, IPT);
73}
74EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
75
69/* 76/*
70 We keep a set of rules for each CPU, so we can avoid write-locking 77 We keep a set of rules for each CPU, so we can avoid write-locking
71 them in the softirq when updating the counters and therefore 78 them in the softirq when updating the counters and therefore
@@ -169,7 +176,7 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
169 176
170/* Performance critical - called for every packet */ 177/* Performance critical - called for every packet */
171static inline bool 178static inline bool
172do_match(struct ipt_entry_match *m, const struct sk_buff *skb, 179do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
173 struct xt_match_param *par) 180 struct xt_match_param *par)
174{ 181{
175 par->match = m->u.kernel.match; 182 par->match = m->u.kernel.match;
@@ -184,7 +191,7 @@ do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
184 191
185/* Performance critical */ 192/* Performance critical */
186static inline struct ipt_entry * 193static inline struct ipt_entry *
187get_entry(void *base, unsigned int offset) 194get_entry(const void *base, unsigned int offset)
188{ 195{
189 return (struct ipt_entry *)(base + offset); 196 return (struct ipt_entry *)(base + offset);
190} 197}
@@ -199,6 +206,13 @@ static inline bool unconditional(const struct ipt_ip *ip)
199#undef FWINV 206#undef FWINV
200} 207}
201 208
209/* for const-correctness */
210static inline const struct ipt_entry_target *
211ipt_get_target_c(const struct ipt_entry *e)
212{
213 return ipt_get_target((struct ipt_entry *)e);
214}
215
202#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 216#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
203 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 217 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
204static const char *const hooknames[] = { 218static const char *const hooknames[] = {
@@ -233,11 +247,11 @@ static struct nf_loginfo trace_loginfo = {
233 247
234/* Mildly perf critical (only if packet tracing is on) */ 248/* Mildly perf critical (only if packet tracing is on) */
235static inline int 249static inline int
236get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, 250get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
237 const char *hookname, const char **chainname, 251 const char *hookname, const char **chainname,
238 const char **comment, unsigned int *rulenum) 252 const char **comment, unsigned int *rulenum)
239{ 253{
240 struct ipt_standard_target *t = (void *)ipt_get_target(s); 254 const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
241 255
242 if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { 256 if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
243 /* Head of user chain: ERROR target with chainname */ 257 /* Head of user chain: ERROR target with chainname */
@@ -263,17 +277,18 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
263 return 0; 277 return 0;
264} 278}
265 279
266static void trace_packet(struct sk_buff *skb, 280static void trace_packet(const struct sk_buff *skb,
267 unsigned int hook, 281 unsigned int hook,
268 const struct net_device *in, 282 const struct net_device *in,
269 const struct net_device *out, 283 const struct net_device *out,
270 const char *tablename, 284 const char *tablename,
271 struct xt_table_info *private, 285 const struct xt_table_info *private,
272 struct ipt_entry *e) 286 const struct ipt_entry *e)
273{ 287{
274 void *table_base; 288 const void *table_base;
275 const struct ipt_entry *root; 289 const struct ipt_entry *root;
276 const char *hookname, *chainname, *comment; 290 const char *hookname, *chainname, *comment;
291 const struct ipt_entry *iter;
277 unsigned int rulenum = 0; 292 unsigned int rulenum = 0;
278 293
279 table_base = private->entries[smp_processor_id()]; 294 table_base = private->entries[smp_processor_id()];
@@ -282,10 +297,10 @@ static void trace_packet(struct sk_buff *skb,
282 hookname = chainname = hooknames[hook]; 297 hookname = chainname = hooknames[hook];
283 comment = comments[NF_IP_TRACE_COMMENT_RULE]; 298 comment = comments[NF_IP_TRACE_COMMENT_RULE];
284 299
285 IPT_ENTRY_ITERATE(root, 300 xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
286 private->size - private->hook_entry[hook], 301 if (get_chainname_rulenum(iter, e, hookname,
287 get_chainname_rulenum, 302 &chainname, &comment, &rulenum) != 0)
288 e, hookname, &chainname, &comment, &rulenum); 303 break;
289 304
290 nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, 305 nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
291 "TRACE: %s:%s:%s:%u ", 306 "TRACE: %s:%s:%s:%u ",
@@ -315,9 +330,9 @@ ipt_do_table(struct sk_buff *skb,
315 /* Initializing verdict to NF_DROP keeps gcc happy. */ 330 /* Initializing verdict to NF_DROP keeps gcc happy. */
316 unsigned int verdict = NF_DROP; 331 unsigned int verdict = NF_DROP;
317 const char *indev, *outdev; 332 const char *indev, *outdev;
318 void *table_base; 333 const void *table_base;
319 struct ipt_entry *e, *back; 334 struct ipt_entry *e, *back;
320 struct xt_table_info *private; 335 const struct xt_table_info *private;
321 struct xt_match_param mtpar; 336 struct xt_match_param mtpar;
322 struct xt_target_param tgpar; 337 struct xt_target_param tgpar;
323 338
@@ -350,17 +365,22 @@ ipt_do_table(struct sk_buff *skb,
350 back = get_entry(table_base, private->underflow[hook]); 365 back = get_entry(table_base, private->underflow[hook]);
351 366
352 do { 367 do {
353 struct ipt_entry_target *t; 368 const struct ipt_entry_target *t;
369 const struct xt_entry_match *ematch;
354 370
355 IP_NF_ASSERT(e); 371 IP_NF_ASSERT(e);
356 IP_NF_ASSERT(back); 372 IP_NF_ASSERT(back);
357 if (!ip_packet_match(ip, indev, outdev, 373 if (!ip_packet_match(ip, indev, outdev,
358 &e->ip, mtpar.fragoff) || 374 &e->ip, mtpar.fragoff)) {
359 IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { 375 no_match:
360 e = ipt_next_entry(e); 376 e = ipt_next_entry(e);
361 continue; 377 continue;
362 } 378 }
363 379
380 xt_ematch_foreach(ematch, e)
381 if (do_match(ematch, skb, &mtpar) != 0)
382 goto no_match;
383
364 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 384 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
365 385
366 t = ipt_get_target(e); 386 t = ipt_get_target(e);
@@ -443,7 +463,7 @@ ipt_do_table(struct sk_buff *skb,
443/* Figures out from what hook each rule can be called: returns 0 if 463/* Figures out from what hook each rule can be called: returns 0 if
444 there are loops. Puts hook bitmask in comefrom. */ 464 there are loops. Puts hook bitmask in comefrom. */
445static int 465static int
446mark_source_chains(struct xt_table_info *newinfo, 466mark_source_chains(const struct xt_table_info *newinfo,
447 unsigned int valid_hooks, void *entry0) 467 unsigned int valid_hooks, void *entry0)
448{ 468{
449 unsigned int hook; 469 unsigned int hook;
@@ -461,8 +481,8 @@ mark_source_chains(struct xt_table_info *newinfo,
461 e->counters.pcnt = pos; 481 e->counters.pcnt = pos;
462 482
463 for (;;) { 483 for (;;) {
464 struct ipt_standard_target *t 484 const struct ipt_standard_target *t
465 = (void *)ipt_get_target(e); 485 = (void *)ipt_get_target_c(e);
466 int visited = e->comefrom & (1 << hook); 486 int visited = e->comefrom & (1 << hook);
467 487
468 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 488 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
@@ -552,27 +572,23 @@ mark_source_chains(struct xt_table_info *newinfo,
552 return 1; 572 return 1;
553} 573}
554 574
555static int 575static void cleanup_match(struct ipt_entry_match *m, struct net *net)
556cleanup_match(struct ipt_entry_match *m, unsigned int *i)
557{ 576{
558 struct xt_mtdtor_param par; 577 struct xt_mtdtor_param par;
559 578
560 if (i && (*i)-- == 0) 579 par.net = net;
561 return 1;
562
563 par.match = m->u.kernel.match; 580 par.match = m->u.kernel.match;
564 par.matchinfo = m->data; 581 par.matchinfo = m->data;
565 par.family = NFPROTO_IPV4; 582 par.family = NFPROTO_IPV4;
566 if (par.match->destroy != NULL) 583 if (par.match->destroy != NULL)
567 par.match->destroy(&par); 584 par.match->destroy(&par);
568 module_put(par.match->me); 585 module_put(par.match->me);
569 return 0;
570} 586}
571 587
572static int 588static int
573check_entry(struct ipt_entry *e, const char *name) 589check_entry(const struct ipt_entry *e, const char *name)
574{ 590{
575 struct ipt_entry_target *t; 591 const struct ipt_entry_target *t;
576 592
577 if (!ip_checkentry(&e->ip)) { 593 if (!ip_checkentry(&e->ip)) {
578 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 594 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
@@ -583,7 +599,7 @@ check_entry(struct ipt_entry *e, const char *name)
583 e->next_offset) 599 e->next_offset)
584 return -EINVAL; 600 return -EINVAL;
585 601
586 t = ipt_get_target(e); 602 t = ipt_get_target_c(e);
587 if (e->target_offset + t->u.target_size > e->next_offset) 603 if (e->target_offset + t->u.target_size > e->next_offset)
588 return -EINVAL; 604 return -EINVAL;
589 605
@@ -591,8 +607,7 @@ check_entry(struct ipt_entry *e, const char *name)
591} 607}
592 608
593static int 609static int
594check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, 610check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
595 unsigned int *i)
596{ 611{
597 const struct ipt_ip *ip = par->entryinfo; 612 const struct ipt_ip *ip = par->entryinfo;
598 int ret; 613 int ret;
@@ -607,13 +622,11 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
607 par.match->name); 622 par.match->name);
608 return ret; 623 return ret;
609 } 624 }
610 ++*i;
611 return 0; 625 return 0;
612} 626}
613 627
614static int 628static int
615find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, 629find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
616 unsigned int *i)
617{ 630{
618 struct xt_match *match; 631 struct xt_match *match;
619 int ret; 632 int ret;
@@ -627,7 +640,7 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
627 } 640 }
628 m->u.kernel.match = match; 641 m->u.kernel.match = match;
629 642
630 ret = check_match(m, par, i); 643 ret = check_match(m, par);
631 if (ret) 644 if (ret)
632 goto err; 645 goto err;
633 646
@@ -637,10 +650,11 @@ err:
637 return ret; 650 return ret;
638} 651}
639 652
640static int check_target(struct ipt_entry *e, const char *name) 653static int check_target(struct ipt_entry *e, struct net *net, const char *name)
641{ 654{
642 struct ipt_entry_target *t = ipt_get_target(e); 655 struct ipt_entry_target *t = ipt_get_target(e);
643 struct xt_tgchk_param par = { 656 struct xt_tgchk_param par = {
657 .net = net,
644 .table = name, 658 .table = name,
645 .entryinfo = e, 659 .entryinfo = e,
646 .target = t->u.kernel.target, 660 .target = t->u.kernel.target,
@@ -661,27 +675,32 @@ static int check_target(struct ipt_entry *e, const char *name)
661} 675}
662 676
663static int 677static int
664find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, 678find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
665 unsigned int *i) 679 unsigned int size)
666{ 680{
667 struct ipt_entry_target *t; 681 struct ipt_entry_target *t;
668 struct xt_target *target; 682 struct xt_target *target;
669 int ret; 683 int ret;
670 unsigned int j; 684 unsigned int j;
671 struct xt_mtchk_param mtpar; 685 struct xt_mtchk_param mtpar;
686 struct xt_entry_match *ematch;
672 687
673 ret = check_entry(e, name); 688 ret = check_entry(e, name);
674 if (ret) 689 if (ret)
675 return ret; 690 return ret;
676 691
677 j = 0; 692 j = 0;
693 mtpar.net = net;
678 mtpar.table = name; 694 mtpar.table = name;
679 mtpar.entryinfo = &e->ip; 695 mtpar.entryinfo = &e->ip;
680 mtpar.hook_mask = e->comefrom; 696 mtpar.hook_mask = e->comefrom;
681 mtpar.family = NFPROTO_IPV4; 697 mtpar.family = NFPROTO_IPV4;
682 ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); 698 xt_ematch_foreach(ematch, e) {
683 if (ret != 0) 699 ret = find_check_match(ematch, &mtpar);
684 goto cleanup_matches; 700 if (ret != 0)
701 goto cleanup_matches;
702 ++j;
703 }
685 704
686 t = ipt_get_target(e); 705 t = ipt_get_target(e);
687 target = try_then_request_module(xt_find_target(AF_INET, 706 target = try_then_request_module(xt_find_target(AF_INET,
@@ -695,27 +714,29 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
695 } 714 }
696 t->u.kernel.target = target; 715 t->u.kernel.target = target;
697 716
698 ret = check_target(e, name); 717 ret = check_target(e, net, name);
699 if (ret) 718 if (ret)
700 goto err; 719 goto err;
701
702 (*i)++;
703 return 0; 720 return 0;
704 err: 721 err:
705 module_put(t->u.kernel.target->me); 722 module_put(t->u.kernel.target->me);
706 cleanup_matches: 723 cleanup_matches:
707 IPT_MATCH_ITERATE(e, cleanup_match, &j); 724 xt_ematch_foreach(ematch, e) {
725 if (j-- == 0)
726 break;
727 cleanup_match(ematch, net);
728 }
708 return ret; 729 return ret;
709} 730}
710 731
711static bool check_underflow(struct ipt_entry *e) 732static bool check_underflow(const struct ipt_entry *e)
712{ 733{
713 const struct ipt_entry_target *t; 734 const struct ipt_entry_target *t;
714 unsigned int verdict; 735 unsigned int verdict;
715 736
716 if (!unconditional(&e->ip)) 737 if (!unconditional(&e->ip))
717 return false; 738 return false;
718 t = ipt_get_target(e); 739 t = ipt_get_target_c(e);
719 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 740 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
720 return false; 741 return false;
721 verdict = ((struct ipt_standard_target *)t)->verdict; 742 verdict = ((struct ipt_standard_target *)t)->verdict;
@@ -726,12 +747,11 @@ static bool check_underflow(struct ipt_entry *e)
726static int 747static int
727check_entry_size_and_hooks(struct ipt_entry *e, 748check_entry_size_and_hooks(struct ipt_entry *e,
728 struct xt_table_info *newinfo, 749 struct xt_table_info *newinfo,
729 unsigned char *base, 750 const unsigned char *base,
730 unsigned char *limit, 751 const unsigned char *limit,
731 const unsigned int *hook_entries, 752 const unsigned int *hook_entries,
732 const unsigned int *underflows, 753 const unsigned int *underflows,
733 unsigned int valid_hooks, 754 unsigned int valid_hooks)
734 unsigned int *i)
735{ 755{
736 unsigned int h; 756 unsigned int h;
737 757
@@ -768,50 +788,42 @@ check_entry_size_and_hooks(struct ipt_entry *e,
768 /* Clear counters and comefrom */ 788 /* Clear counters and comefrom */
769 e->counters = ((struct xt_counters) { 0, 0 }); 789 e->counters = ((struct xt_counters) { 0, 0 });
770 e->comefrom = 0; 790 e->comefrom = 0;
771
772 (*i)++;
773 return 0; 791 return 0;
774} 792}
775 793
776static int 794static void
777cleanup_entry(struct ipt_entry *e, unsigned int *i) 795cleanup_entry(struct ipt_entry *e, struct net *net)
778{ 796{
779 struct xt_tgdtor_param par; 797 struct xt_tgdtor_param par;
780 struct ipt_entry_target *t; 798 struct ipt_entry_target *t;
781 799 struct xt_entry_match *ematch;
782 if (i && (*i)-- == 0)
783 return 1;
784 800
785 /* Cleanup all matches */ 801 /* Cleanup all matches */
786 IPT_MATCH_ITERATE(e, cleanup_match, NULL); 802 xt_ematch_foreach(ematch, e)
803 cleanup_match(ematch, net);
787 t = ipt_get_target(e); 804 t = ipt_get_target(e);
788 805
806 par.net = net;
789 par.target = t->u.kernel.target; 807 par.target = t->u.kernel.target;
790 par.targinfo = t->data; 808 par.targinfo = t->data;
791 par.family = NFPROTO_IPV4; 809 par.family = NFPROTO_IPV4;
792 if (par.target->destroy != NULL) 810 if (par.target->destroy != NULL)
793 par.target->destroy(&par); 811 par.target->destroy(&par);
794 module_put(par.target->me); 812 module_put(par.target->me);
795 return 0;
796} 813}
797 814
798/* Checks and translates the user-supplied table segment (held in 815/* Checks and translates the user-supplied table segment (held in
799 newinfo) */ 816 newinfo) */
800static int 817static int
801translate_table(const char *name, 818translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
802 unsigned int valid_hooks, 819 const struct ipt_replace *repl)
803 struct xt_table_info *newinfo,
804 void *entry0,
805 unsigned int size,
806 unsigned int number,
807 const unsigned int *hook_entries,
808 const unsigned int *underflows)
809{ 820{
821 struct ipt_entry *iter;
810 unsigned int i; 822 unsigned int i;
811 int ret; 823 int ret = 0;
812 824
813 newinfo->size = size; 825 newinfo->size = repl->size;
814 newinfo->number = number; 826 newinfo->number = repl->num_entries;
815 827
816 /* Init all hooks to impossible value. */ 828 /* Init all hooks to impossible value. */
817 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 829 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -822,49 +834,56 @@ translate_table(const char *name,
822 duprintf("translate_table: size %u\n", newinfo->size); 834 duprintf("translate_table: size %u\n", newinfo->size);
823 i = 0; 835 i = 0;
824 /* Walk through entries, checking offsets. */ 836 /* Walk through entries, checking offsets. */
825 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, 837 xt_entry_foreach(iter, entry0, newinfo->size) {
826 check_entry_size_and_hooks, 838 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
827 newinfo, 839 entry0 + repl->size, repl->hook_entry, repl->underflow,
828 entry0, 840 repl->valid_hooks);
829 entry0 + size, 841 if (ret != 0)
830 hook_entries, underflows, valid_hooks, &i); 842 return ret;
831 if (ret != 0) 843 ++i;
832 return ret; 844 }
833 845
834 if (i != number) { 846 if (i != repl->num_entries) {
835 duprintf("translate_table: %u not %u entries\n", 847 duprintf("translate_table: %u not %u entries\n",
836 i, number); 848 i, repl->num_entries);
837 return -EINVAL; 849 return -EINVAL;
838 } 850 }
839 851
840 /* Check hooks all assigned */ 852 /* Check hooks all assigned */
841 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 853 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
842 /* Only hooks which are valid */ 854 /* Only hooks which are valid */
843 if (!(valid_hooks & (1 << i))) 855 if (!(repl->valid_hooks & (1 << i)))
844 continue; 856 continue;
845 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 857 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
846 duprintf("Invalid hook entry %u %u\n", 858 duprintf("Invalid hook entry %u %u\n",
847 i, hook_entries[i]); 859 i, repl->hook_entry[i]);
848 return -EINVAL; 860 return -EINVAL;
849 } 861 }
850 if (newinfo->underflow[i] == 0xFFFFFFFF) { 862 if (newinfo->underflow[i] == 0xFFFFFFFF) {
851 duprintf("Invalid underflow %u %u\n", 863 duprintf("Invalid underflow %u %u\n",
852 i, underflows[i]); 864 i, repl->underflow[i]);
853 return -EINVAL; 865 return -EINVAL;
854 } 866 }
855 } 867 }
856 868
857 if (!mark_source_chains(newinfo, valid_hooks, entry0)) 869 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
858 return -ELOOP; 870 return -ELOOP;
859 871
860 /* Finally, each sanity check must pass */ 872 /* Finally, each sanity check must pass */
861 i = 0; 873 i = 0;
862 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, 874 xt_entry_foreach(iter, entry0, newinfo->size) {
863 find_check_entry, name, size, &i); 875 ret = find_check_entry(iter, net, repl->name, repl->size);
876 if (ret != 0)
877 break;
878 ++i;
879 }
864 880
865 if (ret != 0) { 881 if (ret != 0) {
866 IPT_ENTRY_ITERATE(entry0, newinfo->size, 882 xt_entry_foreach(iter, entry0, newinfo->size) {
867 cleanup_entry, &i); 883 if (i-- == 0)
884 break;
885 cleanup_entry(iter, net);
886 }
868 return ret; 887 return ret;
869 } 888 }
870 889
@@ -877,33 +896,11 @@ translate_table(const char *name,
877 return ret; 896 return ret;
878} 897}
879 898
880/* Gets counters. */
881static inline int
882add_entry_to_counter(const struct ipt_entry *e,
883 struct xt_counters total[],
884 unsigned int *i)
885{
886 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
887
888 (*i)++;
889 return 0;
890}
891
892static inline int
893set_entry_to_counter(const struct ipt_entry *e,
894 struct ipt_counters total[],
895 unsigned int *i)
896{
897 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
898
899 (*i)++;
900 return 0;
901}
902
903static void 899static void
904get_counters(const struct xt_table_info *t, 900get_counters(const struct xt_table_info *t,
905 struct xt_counters counters[]) 901 struct xt_counters counters[])
906{ 902{
903 struct ipt_entry *iter;
907 unsigned int cpu; 904 unsigned int cpu;
908 unsigned int i; 905 unsigned int i;
909 unsigned int curcpu; 906 unsigned int curcpu;
@@ -919,32 +916,32 @@ get_counters(const struct xt_table_info *t,
919 curcpu = smp_processor_id(); 916 curcpu = smp_processor_id();
920 917
921 i = 0; 918 i = 0;
922 IPT_ENTRY_ITERATE(t->entries[curcpu], 919 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
923 t->size, 920 SET_COUNTER(counters[i], iter->counters.bcnt,
924 set_entry_to_counter, 921 iter->counters.pcnt);
925 counters, 922 ++i;
926 &i); 923 }
927 924
928 for_each_possible_cpu(cpu) { 925 for_each_possible_cpu(cpu) {
929 if (cpu == curcpu) 926 if (cpu == curcpu)
930 continue; 927 continue;
931 i = 0; 928 i = 0;
932 xt_info_wrlock(cpu); 929 xt_info_wrlock(cpu);
933 IPT_ENTRY_ITERATE(t->entries[cpu], 930 xt_entry_foreach(iter, t->entries[cpu], t->size) {
934 t->size, 931 ADD_COUNTER(counters[i], iter->counters.bcnt,
935 add_entry_to_counter, 932 iter->counters.pcnt);
936 counters, 933 ++i; /* macro does multi eval of i */
937 &i); 934 }
938 xt_info_wrunlock(cpu); 935 xt_info_wrunlock(cpu);
939 } 936 }
940 local_bh_enable(); 937 local_bh_enable();
941} 938}
942 939
943static struct xt_counters * alloc_counters(struct xt_table *table) 940static struct xt_counters *alloc_counters(const struct xt_table *table)
944{ 941{
945 unsigned int countersize; 942 unsigned int countersize;
946 struct xt_counters *counters; 943 struct xt_counters *counters;
947 struct xt_table_info *private = table->private; 944 const struct xt_table_info *private = table->private;
948 945
949 /* We need atomic snapshot of counters: rest doesn't change 946 /* We need atomic snapshot of counters: rest doesn't change
950 (other than comefrom, which userspace doesn't care 947 (other than comefrom, which userspace doesn't care
@@ -962,11 +959,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
962 959
963static int 960static int
964copy_entries_to_user(unsigned int total_size, 961copy_entries_to_user(unsigned int total_size,
965 struct xt_table *table, 962 const struct xt_table *table,
966 void __user *userptr) 963 void __user *userptr)
967{ 964{
968 unsigned int off, num; 965 unsigned int off, num;
969 struct ipt_entry *e; 966 const struct ipt_entry *e;
970 struct xt_counters *counters; 967 struct xt_counters *counters;
971 const struct xt_table_info *private = table->private; 968 const struct xt_table_info *private = table->private;
972 int ret = 0; 969 int ret = 0;
@@ -1018,7 +1015,7 @@ copy_entries_to_user(unsigned int total_size,
1018 } 1015 }
1019 } 1016 }
1020 1017
1021 t = ipt_get_target(e); 1018 t = ipt_get_target_c(e);
1022 if (copy_to_user(userptr + off + e->target_offset 1019 if (copy_to_user(userptr + off + e->target_offset
1023 + offsetof(struct ipt_entry_target, 1020 + offsetof(struct ipt_entry_target,
1024 u.user.name), 1021 u.user.name),
@@ -1035,7 +1032,7 @@ copy_entries_to_user(unsigned int total_size,
1035} 1032}
1036 1033
1037#ifdef CONFIG_COMPAT 1034#ifdef CONFIG_COMPAT
1038static void compat_standard_from_user(void *dst, void *src) 1035static void compat_standard_from_user(void *dst, const void *src)
1039{ 1036{
1040 int v = *(compat_int_t *)src; 1037 int v = *(compat_int_t *)src;
1041 1038
@@ -1044,7 +1041,7 @@ static void compat_standard_from_user(void *dst, void *src)
1044 memcpy(dst, &v, sizeof(v)); 1041 memcpy(dst, &v, sizeof(v));
1045} 1042}
1046 1043
1047static int compat_standard_to_user(void __user *dst, void *src) 1044static int compat_standard_to_user(void __user *dst, const void *src)
1048{ 1045{
1049 compat_int_t cv = *(int *)src; 1046 compat_int_t cv = *(int *)src;
1050 1047
@@ -1053,25 +1050,20 @@ static int compat_standard_to_user(void __user *dst, void *src)
1053 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; 1050 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1054} 1051}
1055 1052
1056static inline int 1053static int compat_calc_entry(const struct ipt_entry *e,
1057compat_calc_match(struct ipt_entry_match *m, int *size)
1058{
1059 *size += xt_compat_match_offset(m->u.kernel.match);
1060 return 0;
1061}
1062
1063static int compat_calc_entry(struct ipt_entry *e,
1064 const struct xt_table_info *info, 1054 const struct xt_table_info *info,
1065 void *base, struct xt_table_info *newinfo) 1055 const void *base, struct xt_table_info *newinfo)
1066{ 1056{
1067 struct ipt_entry_target *t; 1057 const struct xt_entry_match *ematch;
1058 const struct ipt_entry_target *t;
1068 unsigned int entry_offset; 1059 unsigned int entry_offset;
1069 int off, i, ret; 1060 int off, i, ret;
1070 1061
1071 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); 1062 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1072 entry_offset = (void *)e - base; 1063 entry_offset = (void *)e - base;
1073 IPT_MATCH_ITERATE(e, compat_calc_match, &off); 1064 xt_ematch_foreach(ematch, e)
1074 t = ipt_get_target(e); 1065 off += xt_compat_match_offset(ematch->u.kernel.match);
1066 t = ipt_get_target_c(e);
1075 off += xt_compat_target_offset(t->u.kernel.target); 1067 off += xt_compat_target_offset(t->u.kernel.target);
1076 newinfo->size -= off; 1068 newinfo->size -= off;
1077 ret = xt_compat_add_offset(AF_INET, entry_offset, off); 1069 ret = xt_compat_add_offset(AF_INET, entry_offset, off);
@@ -1092,7 +1084,9 @@ static int compat_calc_entry(struct ipt_entry *e,
1092static int compat_table_info(const struct xt_table_info *info, 1084static int compat_table_info(const struct xt_table_info *info,
1093 struct xt_table_info *newinfo) 1085 struct xt_table_info *newinfo)
1094{ 1086{
1087 struct ipt_entry *iter;
1095 void *loc_cpu_entry; 1088 void *loc_cpu_entry;
1089 int ret;
1096 1090
1097 if (!newinfo || !info) 1091 if (!newinfo || !info)
1098 return -EINVAL; 1092 return -EINVAL;
@@ -1101,13 +1095,17 @@ static int compat_table_info(const struct xt_table_info *info,
1101 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1095 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1102 newinfo->initial_entries = 0; 1096 newinfo->initial_entries = 0;
1103 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1097 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1104 return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, 1098 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1105 compat_calc_entry, info, loc_cpu_entry, 1099 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1106 newinfo); 1100 if (ret != 0)
1101 return ret;
1102 }
1103 return 0;
1107} 1104}
1108#endif 1105#endif
1109 1106
1110static int get_info(struct net *net, void __user *user, int *len, int compat) 1107static int get_info(struct net *net, void __user *user,
1108 const int *len, int compat)
1111{ 1109{
1112 char name[IPT_TABLE_MAXNAMELEN]; 1110 char name[IPT_TABLE_MAXNAMELEN];
1113 struct xt_table *t; 1111 struct xt_table *t;
@@ -1167,7 +1165,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
1167} 1165}
1168 1166
1169static int 1167static int
1170get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) 1168get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1169 const int *len)
1171{ 1170{
1172 int ret; 1171 int ret;
1173 struct ipt_get_entries get; 1172 struct ipt_get_entries get;
@@ -1215,6 +1214,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1215 struct xt_table_info *oldinfo; 1214 struct xt_table_info *oldinfo;
1216 struct xt_counters *counters; 1215 struct xt_counters *counters;
1217 void *loc_cpu_old_entry; 1216 void *loc_cpu_old_entry;
1217 struct ipt_entry *iter;
1218 1218
1219 ret = 0; 1219 ret = 0;
1220 counters = vmalloc(num_counters * sizeof(struct xt_counters)); 1220 counters = vmalloc(num_counters * sizeof(struct xt_counters));
@@ -1257,8 +1257,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1257 1257
1258 /* Decrease module usage counts and free resource */ 1258 /* Decrease module usage counts and free resource */
1259 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1259 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1260 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, 1260 xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1261 NULL); 1261 cleanup_entry(iter, net);
1262
1262 xt_free_table_info(oldinfo); 1263 xt_free_table_info(oldinfo);
1263 if (copy_to_user(counters_ptr, counters, 1264 if (copy_to_user(counters_ptr, counters,
1264 sizeof(struct xt_counters) * num_counters) != 0) 1265 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1277,12 +1278,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1277} 1278}
1278 1279
1279static int 1280static int
1280do_replace(struct net *net, void __user *user, unsigned int len) 1281do_replace(struct net *net, const void __user *user, unsigned int len)
1281{ 1282{
1282 int ret; 1283 int ret;
1283 struct ipt_replace tmp; 1284 struct ipt_replace tmp;
1284 struct xt_table_info *newinfo; 1285 struct xt_table_info *newinfo;
1285 void *loc_cpu_entry; 1286 void *loc_cpu_entry;
1287 struct ipt_entry *iter;
1286 1288
1287 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1289 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1288 return -EFAULT; 1290 return -EFAULT;
@@ -1303,9 +1305,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1303 goto free_newinfo; 1305 goto free_newinfo;
1304 } 1306 }
1305 1307
1306 ret = translate_table(tmp.name, tmp.valid_hooks, 1308 ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1307 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1308 tmp.hook_entry, tmp.underflow);
1309 if (ret != 0) 1309 if (ret != 0)
1310 goto free_newinfo; 1310 goto free_newinfo;
1311 1311
@@ -1318,27 +1318,16 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1318 return 0; 1318 return 0;
1319 1319
1320 free_newinfo_untrans: 1320 free_newinfo_untrans:
1321 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); 1321 xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1322 cleanup_entry(iter, net);
1322 free_newinfo: 1323 free_newinfo:
1323 xt_free_table_info(newinfo); 1324 xt_free_table_info(newinfo);
1324 return ret; 1325 return ret;
1325} 1326}
1326 1327
1327/* We're lazy, and add to the first CPU; overflow works its fey magic
1328 * and everything is OK. */
1329static int 1328static int
1330add_counter_to_entry(struct ipt_entry *e, 1329do_add_counters(struct net *net, const void __user *user,
1331 const struct xt_counters addme[], 1330 unsigned int len, int compat)
1332 unsigned int *i)
1333{
1334 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1335
1336 (*i)++;
1337 return 0;
1338}
1339
1340static int
1341do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1342{ 1331{
1343 unsigned int i, curcpu; 1332 unsigned int i, curcpu;
1344 struct xt_counters_info tmp; 1333 struct xt_counters_info tmp;
@@ -1351,6 +1340,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1351 const struct xt_table_info *private; 1340 const struct xt_table_info *private;
1352 int ret = 0; 1341 int ret = 0;
1353 void *loc_cpu_entry; 1342 void *loc_cpu_entry;
1343 struct ipt_entry *iter;
1354#ifdef CONFIG_COMPAT 1344#ifdef CONFIG_COMPAT
1355 struct compat_xt_counters_info compat_tmp; 1345 struct compat_xt_counters_info compat_tmp;
1356 1346
@@ -1408,11 +1398,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1408 curcpu = smp_processor_id(); 1398 curcpu = smp_processor_id();
1409 loc_cpu_entry = private->entries[curcpu]; 1399 loc_cpu_entry = private->entries[curcpu];
1410 xt_info_wrlock(curcpu); 1400 xt_info_wrlock(curcpu);
1411 IPT_ENTRY_ITERATE(loc_cpu_entry, 1401 xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1412 private->size, 1402 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1413 add_counter_to_entry, 1403 ++i;
1414 paddc, 1404 }
1415 &i);
1416 xt_info_wrunlock(curcpu); 1405 xt_info_wrunlock(curcpu);
1417 unlock_up_free: 1406 unlock_up_free:
1418 local_bh_enable(); 1407 local_bh_enable();
@@ -1440,45 +1429,40 @@ struct compat_ipt_replace {
1440static int 1429static int
1441compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, 1430compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1442 unsigned int *size, struct xt_counters *counters, 1431 unsigned int *size, struct xt_counters *counters,
1443 unsigned int *i) 1432 unsigned int i)
1444{ 1433{
1445 struct ipt_entry_target *t; 1434 struct ipt_entry_target *t;
1446 struct compat_ipt_entry __user *ce; 1435 struct compat_ipt_entry __user *ce;
1447 u_int16_t target_offset, next_offset; 1436 u_int16_t target_offset, next_offset;
1448 compat_uint_t origsize; 1437 compat_uint_t origsize;
1449 int ret; 1438 const struct xt_entry_match *ematch;
1439 int ret = 0;
1450 1440
1451 ret = -EFAULT;
1452 origsize = *size; 1441 origsize = *size;
1453 ce = (struct compat_ipt_entry __user *)*dstptr; 1442 ce = (struct compat_ipt_entry __user *)*dstptr;
1454 if (copy_to_user(ce, e, sizeof(struct ipt_entry))) 1443 if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1455 goto out; 1444 copy_to_user(&ce->counters, &counters[i],
1456 1445 sizeof(counters[i])) != 0)
1457 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) 1446 return -EFAULT;
1458 goto out;
1459 1447
1460 *dstptr += sizeof(struct compat_ipt_entry); 1448 *dstptr += sizeof(struct compat_ipt_entry);
1461 *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); 1449 *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1462 1450
1463 ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); 1451 xt_ematch_foreach(ematch, e) {
1452 ret = xt_compat_match_to_user(ematch, dstptr, size);
1453 if (ret != 0)
1454 return ret;
1455 }
1464 target_offset = e->target_offset - (origsize - *size); 1456 target_offset = e->target_offset - (origsize - *size);
1465 if (ret)
1466 goto out;
1467 t = ipt_get_target(e); 1457 t = ipt_get_target(e);
1468 ret = xt_compat_target_to_user(t, dstptr, size); 1458 ret = xt_compat_target_to_user(t, dstptr, size);
1469 if (ret) 1459 if (ret)
1470 goto out; 1460 return ret;
1471 ret = -EFAULT;
1472 next_offset = e->next_offset - (origsize - *size); 1461 next_offset = e->next_offset - (origsize - *size);
1473 if (put_user(target_offset, &ce->target_offset)) 1462 if (put_user(target_offset, &ce->target_offset) != 0 ||
1474 goto out; 1463 put_user(next_offset, &ce->next_offset) != 0)
1475 if (put_user(next_offset, &ce->next_offset)) 1464 return -EFAULT;
1476 goto out;
1477
1478 (*i)++;
1479 return 0; 1465 return 0;
1480out:
1481 return ret;
1482} 1466}
1483 1467
1484static int 1468static int
@@ -1486,7 +1470,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1486 const char *name, 1470 const char *name,
1487 const struct ipt_ip *ip, 1471 const struct ipt_ip *ip,
1488 unsigned int hookmask, 1472 unsigned int hookmask,
1489 int *size, unsigned int *i) 1473 int *size)
1490{ 1474{
1491 struct xt_match *match; 1475 struct xt_match *match;
1492 1476
@@ -1500,47 +1484,32 @@ compat_find_calc_match(struct ipt_entry_match *m,
1500 } 1484 }
1501 m->u.kernel.match = match; 1485 m->u.kernel.match = match;
1502 *size += xt_compat_match_offset(match); 1486 *size += xt_compat_match_offset(match);
1503
1504 (*i)++;
1505 return 0;
1506}
1507
1508static int
1509compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1510{
1511 if (i && (*i)-- == 0)
1512 return 1;
1513
1514 module_put(m->u.kernel.match->me);
1515 return 0; 1487 return 0;
1516} 1488}
1517 1489
1518static int 1490static void compat_release_entry(struct compat_ipt_entry *e)
1519compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1520{ 1491{
1521 struct ipt_entry_target *t; 1492 struct ipt_entry_target *t;
1522 1493 struct xt_entry_match *ematch;
1523 if (i && (*i)-- == 0)
1524 return 1;
1525 1494
1526 /* Cleanup all matches */ 1495 /* Cleanup all matches */
1527 COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL); 1496 xt_ematch_foreach(ematch, e)
1497 module_put(ematch->u.kernel.match->me);
1528 t = compat_ipt_get_target(e); 1498 t = compat_ipt_get_target(e);
1529 module_put(t->u.kernel.target->me); 1499 module_put(t->u.kernel.target->me);
1530 return 0;
1531} 1500}
1532 1501
1533static int 1502static int
1534check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, 1503check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1535 struct xt_table_info *newinfo, 1504 struct xt_table_info *newinfo,
1536 unsigned int *size, 1505 unsigned int *size,
1537 unsigned char *base, 1506 const unsigned char *base,
1538 unsigned char *limit, 1507 const unsigned char *limit,
1539 unsigned int *hook_entries, 1508 const unsigned int *hook_entries,
1540 unsigned int *underflows, 1509 const unsigned int *underflows,
1541 unsigned int *i,
1542 const char *name) 1510 const char *name)
1543{ 1511{
1512 struct xt_entry_match *ematch;
1544 struct ipt_entry_target *t; 1513 struct ipt_entry_target *t;
1545 struct xt_target *target; 1514 struct xt_target *target;
1546 unsigned int entry_offset; 1515 unsigned int entry_offset;
@@ -1569,10 +1538,13 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1569 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); 1538 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1570 entry_offset = (void *)e - (void *)base; 1539 entry_offset = (void *)e - (void *)base;
1571 j = 0; 1540 j = 0;
1572 ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name, 1541 xt_ematch_foreach(ematch, e) {
1573 &e->ip, e->comefrom, &off, &j); 1542 ret = compat_find_calc_match(ematch, name,
1574 if (ret != 0) 1543 &e->ip, e->comefrom, &off);
1575 goto release_matches; 1544 if (ret != 0)
1545 goto release_matches;
1546 ++j;
1547 }
1576 1548
1577 t = compat_ipt_get_target(e); 1549 t = compat_ipt_get_target(e);
1578 target = try_then_request_module(xt_find_target(AF_INET, 1550 target = try_then_request_module(xt_find_target(AF_INET,
@@ -1604,14 +1576,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1604 /* Clear counters and comefrom */ 1576 /* Clear counters and comefrom */
1605 memset(&e->counters, 0, sizeof(e->counters)); 1577 memset(&e->counters, 0, sizeof(e->counters));
1606 e->comefrom = 0; 1578 e->comefrom = 0;
1607
1608 (*i)++;
1609 return 0; 1579 return 0;
1610 1580
1611out: 1581out:
1612 module_put(t->u.kernel.target->me); 1582 module_put(t->u.kernel.target->me);
1613release_matches: 1583release_matches:
1614 IPT_MATCH_ITERATE(e, compat_release_match, &j); 1584 xt_ematch_foreach(ematch, e) {
1585 if (j-- == 0)
1586 break;
1587 module_put(ematch->u.kernel.match->me);
1588 }
1615 return ret; 1589 return ret;
1616} 1590}
1617 1591
@@ -1625,6 +1599,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1625 struct ipt_entry *de; 1599 struct ipt_entry *de;
1626 unsigned int origsize; 1600 unsigned int origsize;
1627 int ret, h; 1601 int ret, h;
1602 struct xt_entry_match *ematch;
1628 1603
1629 ret = 0; 1604 ret = 0;
1630 origsize = *size; 1605 origsize = *size;
@@ -1635,10 +1610,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1635 *dstptr += sizeof(struct ipt_entry); 1610 *dstptr += sizeof(struct ipt_entry);
1636 *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); 1611 *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1637 1612
1638 ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user, 1613 xt_ematch_foreach(ematch, e) {
1639 dstptr, size); 1614 ret = xt_compat_match_from_user(ematch, dstptr, size);
1640 if (ret) 1615 if (ret != 0)
1641 return ret; 1616 return ret;
1617 }
1642 de->target_offset = e->target_offset - (origsize - *size); 1618 de->target_offset = e->target_offset - (origsize - *size);
1643 t = compat_ipt_get_target(e); 1619 t = compat_ipt_get_target(e);
1644 target = t->u.kernel.target; 1620 target = t->u.kernel.target;
@@ -1655,36 +1631,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1655} 1631}
1656 1632
1657static int 1633static int
1658compat_check_entry(struct ipt_entry *e, const char *name, 1634compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1659 unsigned int *i)
1660{ 1635{
1636 struct xt_entry_match *ematch;
1661 struct xt_mtchk_param mtpar; 1637 struct xt_mtchk_param mtpar;
1662 unsigned int j; 1638 unsigned int j;
1663 int ret; 1639 int ret = 0;
1664 1640
1665 j = 0; 1641 j = 0;
1642 mtpar.net = net;
1666 mtpar.table = name; 1643 mtpar.table = name;
1667 mtpar.entryinfo = &e->ip; 1644 mtpar.entryinfo = &e->ip;
1668 mtpar.hook_mask = e->comefrom; 1645 mtpar.hook_mask = e->comefrom;
1669 mtpar.family = NFPROTO_IPV4; 1646 mtpar.family = NFPROTO_IPV4;
1670 ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); 1647 xt_ematch_foreach(ematch, e) {
1671 if (ret) 1648 ret = check_match(ematch, &mtpar);
1672 goto cleanup_matches; 1649 if (ret != 0)
1650 goto cleanup_matches;
1651 ++j;
1652 }
1673 1653
1674 ret = check_target(e, name); 1654 ret = check_target(e, net, name);
1675 if (ret) 1655 if (ret)
1676 goto cleanup_matches; 1656 goto cleanup_matches;
1677
1678 (*i)++;
1679 return 0; 1657 return 0;
1680 1658
1681 cleanup_matches: 1659 cleanup_matches:
1682 IPT_MATCH_ITERATE(e, cleanup_match, &j); 1660 xt_ematch_foreach(ematch, e) {
1661 if (j-- == 0)
1662 break;
1663 cleanup_match(ematch, net);
1664 }
1683 return ret; 1665 return ret;
1684} 1666}
1685 1667
1686static int 1668static int
1687translate_compat_table(const char *name, 1669translate_compat_table(struct net *net,
1670 const char *name,
1688 unsigned int valid_hooks, 1671 unsigned int valid_hooks,
1689 struct xt_table_info **pinfo, 1672 struct xt_table_info **pinfo,
1690 void **pentry0, 1673 void **pentry0,
@@ -1696,6 +1679,8 @@ translate_compat_table(const char *name,
1696 unsigned int i, j; 1679 unsigned int i, j;
1697 struct xt_table_info *newinfo, *info; 1680 struct xt_table_info *newinfo, *info;
1698 void *pos, *entry0, *entry1; 1681 void *pos, *entry0, *entry1;
1682 struct compat_ipt_entry *iter0;
1683 struct ipt_entry *iter1;
1699 unsigned int size; 1684 unsigned int size;
1700 int ret; 1685 int ret;
1701 1686
@@ -1714,13 +1699,14 @@ translate_compat_table(const char *name,
1714 j = 0; 1699 j = 0;
1715 xt_compat_lock(AF_INET); 1700 xt_compat_lock(AF_INET);
1716 /* Walk through entries, checking offsets. */ 1701 /* Walk through entries, checking offsets. */
1717 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, 1702 xt_entry_foreach(iter0, entry0, total_size) {
1718 check_compat_entry_size_and_hooks, 1703 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1719 info, &size, entry0, 1704 entry0, entry0 + total_size, hook_entries, underflows,
1720 entry0 + total_size, 1705 name);
1721 hook_entries, underflows, &j, name); 1706 if (ret != 0)
1722 if (ret != 0) 1707 goto out_unlock;
1723 goto out_unlock; 1708 ++j;
1709 }
1724 1710
1725 ret = -EINVAL; 1711 ret = -EINVAL;
1726 if (j != number) { 1712 if (j != number) {
@@ -1759,9 +1745,12 @@ translate_compat_table(const char *name,
1759 entry1 = newinfo->entries[raw_smp_processor_id()]; 1745 entry1 = newinfo->entries[raw_smp_processor_id()];
1760 pos = entry1; 1746 pos = entry1;
1761 size = total_size; 1747 size = total_size;
1762 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, 1748 xt_entry_foreach(iter0, entry0, total_size) {
1763 compat_copy_entry_from_user, 1749 ret = compat_copy_entry_from_user(iter0, &pos,
1764 &pos, &size, name, newinfo, entry1); 1750 &size, name, newinfo, entry1);
1751 if (ret != 0)
1752 break;
1753 }
1765 xt_compat_flush_offsets(AF_INET); 1754 xt_compat_flush_offsets(AF_INET);
1766 xt_compat_unlock(AF_INET); 1755 xt_compat_unlock(AF_INET);
1767 if (ret) 1756 if (ret)
@@ -1772,13 +1761,32 @@ translate_compat_table(const char *name,
1772 goto free_newinfo; 1761 goto free_newinfo;
1773 1762
1774 i = 0; 1763 i = 0;
1775 ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, 1764 xt_entry_foreach(iter1, entry1, newinfo->size) {
1776 name, &i); 1765 ret = compat_check_entry(iter1, net, name);
1766 if (ret != 0)
1767 break;
1768 ++i;
1769 }
1777 if (ret) { 1770 if (ret) {
1771 /*
1772 * The first i matches need cleanup_entry (calls ->destroy)
1773 * because they had called ->check already. The other j-i
1774 * entries need only release.
1775 */
1776 int skip = i;
1778 j -= i; 1777 j -= i;
1779 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, 1778 xt_entry_foreach(iter0, entry0, newinfo->size) {
1780 compat_release_entry, &j); 1779 if (skip-- > 0)
1781 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); 1780 continue;
1781 if (j-- == 0)
1782 break;
1783 compat_release_entry(iter0);
1784 }
1785 xt_entry_foreach(iter1, entry1, newinfo->size) {
1786 if (i-- == 0)
1787 break;
1788 cleanup_entry(iter1, net);
1789 }
1782 xt_free_table_info(newinfo); 1790 xt_free_table_info(newinfo);
1783 return ret; 1791 return ret;
1784 } 1792 }
@@ -1796,7 +1804,11 @@ translate_compat_table(const char *name,
1796free_newinfo: 1804free_newinfo:
1797 xt_free_table_info(newinfo); 1805 xt_free_table_info(newinfo);
1798out: 1806out:
1799 COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); 1807 xt_entry_foreach(iter0, entry0, total_size) {
1808 if (j-- == 0)
1809 break;
1810 compat_release_entry(iter0);
1811 }
1800 return ret; 1812 return ret;
1801out_unlock: 1813out_unlock:
1802 xt_compat_flush_offsets(AF_INET); 1814 xt_compat_flush_offsets(AF_INET);
@@ -1811,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1811 struct compat_ipt_replace tmp; 1823 struct compat_ipt_replace tmp;
1812 struct xt_table_info *newinfo; 1824 struct xt_table_info *newinfo;
1813 void *loc_cpu_entry; 1825 void *loc_cpu_entry;
1826 struct ipt_entry *iter;
1814 1827
1815 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1828 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1816 return -EFAULT; 1829 return -EFAULT;
@@ -1833,7 +1846,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1833 goto free_newinfo; 1846 goto free_newinfo;
1834 } 1847 }
1835 1848
1836 ret = translate_compat_table(tmp.name, tmp.valid_hooks, 1849 ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
1837 &newinfo, &loc_cpu_entry, tmp.size, 1850 &newinfo, &loc_cpu_entry, tmp.size,
1838 tmp.num_entries, tmp.hook_entry, 1851 tmp.num_entries, tmp.hook_entry,
1839 tmp.underflow); 1852 tmp.underflow);
@@ -1849,7 +1862,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1849 return 0; 1862 return 0;
1850 1863
1851 free_newinfo_untrans: 1864 free_newinfo_untrans:
1852 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); 1865 xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1866 cleanup_entry(iter, net);
1853 free_newinfo: 1867 free_newinfo:
1854 xt_free_table_info(newinfo); 1868 xt_free_table_info(newinfo);
1855 return ret; 1869 return ret;
@@ -1898,6 +1912,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1898 int ret = 0; 1912 int ret = 0;
1899 const void *loc_cpu_entry; 1913 const void *loc_cpu_entry;
1900 unsigned int i = 0; 1914 unsigned int i = 0;
1915 struct ipt_entry *iter;
1901 1916
1902 counters = alloc_counters(table); 1917 counters = alloc_counters(table);
1903 if (IS_ERR(counters)) 1918 if (IS_ERR(counters))
@@ -1910,9 +1925,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1910 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1925 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1911 pos = userptr; 1926 pos = userptr;
1912 size = total_size; 1927 size = total_size;
1913 ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, 1928 xt_entry_foreach(iter, loc_cpu_entry, total_size) {
1914 compat_copy_entry_to_user, 1929 ret = compat_copy_entry_to_user(iter, &pos,
1915 &pos, &size, counters, &i); 1930 &size, counters, i++);
1931 if (ret != 0)
1932 break;
1933 }
1916 1934
1917 vfree(counters); 1935 vfree(counters);
1918 return ret; 1936 return ret;
@@ -2086,11 +2104,7 @@ struct xt_table *ipt_register_table(struct net *net,
2086 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2104 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2087 memcpy(loc_cpu_entry, repl->entries, repl->size); 2105 memcpy(loc_cpu_entry, repl->entries, repl->size);
2088 2106
2089 ret = translate_table(table->name, table->valid_hooks, 2107 ret = translate_table(net, newinfo, loc_cpu_entry, repl);
2090 newinfo, loc_cpu_entry, repl->size,
2091 repl->num_entries,
2092 repl->hook_entry,
2093 repl->underflow);
2094 if (ret != 0) 2108 if (ret != 0)
2095 goto out_free; 2109 goto out_free;
2096 2110
@@ -2108,17 +2122,19 @@ out:
2108 return ERR_PTR(ret); 2122 return ERR_PTR(ret);
2109} 2123}
2110 2124
2111void ipt_unregister_table(struct xt_table *table) 2125void ipt_unregister_table(struct net *net, struct xt_table *table)
2112{ 2126{
2113 struct xt_table_info *private; 2127 struct xt_table_info *private;
2114 void *loc_cpu_entry; 2128 void *loc_cpu_entry;
2115 struct module *table_owner = table->me; 2129 struct module *table_owner = table->me;
2130 struct ipt_entry *iter;
2116 2131
2117 private = xt_unregister_table(table); 2132 private = xt_unregister_table(table);
2118 2133
2119 /* Decrease module usage counts and free resources */ 2134 /* Decrease module usage counts and free resources */
2120 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2135 loc_cpu_entry = private->entries[raw_smp_processor_id()];
2121 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); 2136 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2137 cleanup_entry(iter, net);
2122 if (private->number > private->initial_entries) 2138 if (private->number > private->initial_entries)
2123 module_put(table_owner); 2139 module_put(table_owner);
2124 xt_free_table_info(private); 2140 xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 40ca2d240abb..0886f96c736b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -560,8 +560,7 @@ struct clusterip_seq_position {
560 560
561static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 561static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
562{ 562{
563 const struct proc_dir_entry *pde = s->private; 563 struct clusterip_config *c = s->private;
564 struct clusterip_config *c = pde->data;
565 unsigned int weight; 564 unsigned int weight;
566 u_int32_t local_nodes; 565 u_int32_t local_nodes;
567 struct clusterip_seq_position *idx; 566 struct clusterip_seq_position *idx;
@@ -632,10 +631,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
632 631
633 if (!ret) { 632 if (!ret) {
634 struct seq_file *sf = file->private_data; 633 struct seq_file *sf = file->private_data;
635 struct proc_dir_entry *pde = PDE(inode); 634 struct clusterip_config *c = PDE(inode)->data;
636 struct clusterip_config *c = pde->data;
637 635
638 sf->private = pde; 636 sf->private = c;
639 637
640 clusterip_config_get(c); 638 clusterip_config_get(c);
641 } 639 }
@@ -645,8 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
645 643
646static int clusterip_proc_release(struct inode *inode, struct file *file) 644static int clusterip_proc_release(struct inode *inode, struct file *file)
647{ 645{
648 struct proc_dir_entry *pde = PDE(inode); 646 struct clusterip_config *c = PDE(inode)->data;
649 struct clusterip_config *c = pde->data;
650 int ret; 647 int ret;
651 648
652 ret = seq_release(inode, file); 649 ret = seq_release(inode, file);
@@ -660,10 +657,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
660static ssize_t clusterip_proc_write(struct file *file, const char __user *input, 657static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
661 size_t size, loff_t *ofs) 658 size_t size, loff_t *ofs)
662{ 659{
660 struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data;
663#define PROC_WRITELEN 10 661#define PROC_WRITELEN 10
664 char buffer[PROC_WRITELEN+1]; 662 char buffer[PROC_WRITELEN+1];
665 const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
666 struct clusterip_config *c = pde->data;
667 unsigned long nodenum; 663 unsigned long nodenum;
668 664
669 if (copy_from_user(buffer, input, PROC_WRITELEN)) 665 if (copy_from_user(buffer, input, PROC_WRITELEN))
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 399061c3fd7d..09a5d3f7cc41 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -338,7 +338,7 @@ struct compat_ipt_ulog_info {
338 char prefix[ULOG_PREFIX_LEN]; 338 char prefix[ULOG_PREFIX_LEN];
339}; 339};
340 340
341static void ulog_tg_compat_from_user(void *dst, void *src) 341static void ulog_tg_compat_from_user(void *dst, const void *src)
342{ 342{
343 const struct compat_ipt_ulog_info *cl = src; 343 const struct compat_ipt_ulog_info *cl = src;
344 struct ipt_ulog_info l = { 344 struct ipt_ulog_info l = {
@@ -351,7 +351,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src)
351 memcpy(dst, &l, sizeof(l)); 351 memcpy(dst, &l, sizeof(l));
352} 352}
353 353
354static int ulog_tg_compat_to_user(void __user *dst, void *src) 354static int ulog_tg_compat_to_user(void __user *dst, const void *src)
355{ 355{
356 const struct ipt_ulog_info *l = src; 356 const struct ipt_ulog_info *l = src;
357 struct compat_ipt_ulog_info cl = { 357 struct compat_ipt_ulog_info cl = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index df566cbd68e5..c8dc9800d620 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,104 +23,32 @@ MODULE_DESCRIPTION("iptables filter table");
23 (1 << NF_INET_FORWARD) | \ 23 (1 << NF_INET_FORWARD) | \
24 (1 << NF_INET_LOCAL_OUT)) 24 (1 << NF_INET_LOCAL_OUT))
25 25
26static struct
27{
28 struct ipt_replace repl;
29 struct ipt_standard entries[3];
30 struct ipt_error term;
31} initial_table __net_initdata = {
32 .repl = {
33 .name = "filter",
34 .valid_hooks = FILTER_VALID_HOOKS,
35 .num_entries = 4,
36 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
37 .hook_entry = {
38 [NF_INET_LOCAL_IN] = 0,
39 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
40 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
41 },
42 .underflow = {
43 [NF_INET_LOCAL_IN] = 0,
44 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
46 },
47 },
48 .entries = {
49 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
50 IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
51 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
52 },
53 .term = IPT_ERROR_INIT, /* ERROR */
54};
55
56static const struct xt_table packet_filter = { 26static const struct xt_table packet_filter = {
57 .name = "filter", 27 .name = "filter",
58 .valid_hooks = FILTER_VALID_HOOKS, 28 .valid_hooks = FILTER_VALID_HOOKS,
59 .me = THIS_MODULE, 29 .me = THIS_MODULE,
60 .af = NFPROTO_IPV4, 30 .af = NFPROTO_IPV4,
31 .priority = NF_IP_PRI_FILTER,
61}; 32};
62 33
63/* The work comes in here from netfilter.c. */
64static unsigned int
65ipt_local_in_hook(unsigned int hook,
66 struct sk_buff *skb,
67 const struct net_device *in,
68 const struct net_device *out,
69 int (*okfn)(struct sk_buff *))
70{
71 return ipt_do_table(skb, hook, in, out,
72 dev_net(in)->ipv4.iptable_filter);
73}
74
75static unsigned int 34static unsigned int
76ipt_hook(unsigned int hook, 35iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
77 struct sk_buff *skb, 36 const struct net_device *in, const struct net_device *out,
78 const struct net_device *in, 37 int (*okfn)(struct sk_buff *))
79 const struct net_device *out,
80 int (*okfn)(struct sk_buff *))
81{ 38{
82 return ipt_do_table(skb, hook, in, out, 39 const struct net *net;
83 dev_net(in)->ipv4.iptable_filter);
84}
85 40
86static unsigned int 41 if (hook == NF_INET_LOCAL_OUT &&
87ipt_local_out_hook(unsigned int hook, 42 (skb->len < sizeof(struct iphdr) ||
88 struct sk_buff *skb, 43 ip_hdrlen(skb) < sizeof(struct iphdr)))
89 const struct net_device *in, 44 /* root is playing with raw sockets. */
90 const struct net_device *out,
91 int (*okfn)(struct sk_buff *))
92{
93 /* root is playing with raw sockets. */
94 if (skb->len < sizeof(struct iphdr) ||
95 ip_hdrlen(skb) < sizeof(struct iphdr))
96 return NF_ACCEPT; 45 return NF_ACCEPT;
97 return ipt_do_table(skb, hook, in, out, 46
98 dev_net(out)->ipv4.iptable_filter); 47 net = dev_net((in != NULL) ? in : out);
48 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
99} 49}
100 50
101static struct nf_hook_ops ipt_ops[] __read_mostly = { 51static struct nf_hook_ops *filter_ops __read_mostly;
102 {
103 .hook = ipt_local_in_hook,
104 .owner = THIS_MODULE,
105 .pf = NFPROTO_IPV4,
106 .hooknum = NF_INET_LOCAL_IN,
107 .priority = NF_IP_PRI_FILTER,
108 },
109 {
110 .hook = ipt_hook,
111 .owner = THIS_MODULE,
112 .pf = NFPROTO_IPV4,
113 .hooknum = NF_INET_FORWARD,
114 .priority = NF_IP_PRI_FILTER,
115 },
116 {
117 .hook = ipt_local_out_hook,
118 .owner = THIS_MODULE,
119 .pf = NFPROTO_IPV4,
120 .hooknum = NF_INET_LOCAL_OUT,
121 .priority = NF_IP_PRI_FILTER,
122 },
123};
124 52
125/* Default to forward because I got too much mail already. */ 53/* Default to forward because I got too much mail already. */
126static int forward = NF_ACCEPT; 54static int forward = NF_ACCEPT;
@@ -128,9 +56,18 @@ module_param(forward, bool, 0000);
128 56
129static int __net_init iptable_filter_net_init(struct net *net) 57static int __net_init iptable_filter_net_init(struct net *net)
130{ 58{
131 /* Register table */ 59 struct ipt_replace *repl;
60
61 repl = ipt_alloc_initial_table(&packet_filter);
62 if (repl == NULL)
63 return -ENOMEM;
64 /* Entry 1 is the FORWARD hook */
65 ((struct ipt_standard *)repl->entries)[1].target.verdict =
66 -forward - 1;
67
132 net->ipv4.iptable_filter = 68 net->ipv4.iptable_filter =
133 ipt_register_table(net, &packet_filter, &initial_table.repl); 69 ipt_register_table(net, &packet_filter, repl);
70 kfree(repl);
134 if (IS_ERR(net->ipv4.iptable_filter)) 71 if (IS_ERR(net->ipv4.iptable_filter))
135 return PTR_ERR(net->ipv4.iptable_filter); 72 return PTR_ERR(net->ipv4.iptable_filter);
136 return 0; 73 return 0;
@@ -138,7 +75,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
138 75
139static void __net_exit iptable_filter_net_exit(struct net *net) 76static void __net_exit iptable_filter_net_exit(struct net *net)
140{ 77{
141 ipt_unregister_table(net->ipv4.iptable_filter); 78 ipt_unregister_table(net, net->ipv4.iptable_filter);
142} 79}
143 80
144static struct pernet_operations iptable_filter_net_ops = { 81static struct pernet_operations iptable_filter_net_ops = {
@@ -155,17 +92,16 @@ static int __init iptable_filter_init(void)
155 return -EINVAL; 92 return -EINVAL;
156 } 93 }
157 94
158 /* Entry 1 is the FORWARD hook */
159 initial_table.entries[1].target.verdict = -forward - 1;
160
161 ret = register_pernet_subsys(&iptable_filter_net_ops); 95 ret = register_pernet_subsys(&iptable_filter_net_ops);
162 if (ret < 0) 96 if (ret < 0)
163 return ret; 97 return ret;
164 98
165 /* Register hooks */ 99 /* Register hooks */
166 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 100 filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
167 if (ret < 0) 101 if (IS_ERR(filter_ops)) {
102 ret = PTR_ERR(filter_ops);
168 goto cleanup_table; 103 goto cleanup_table;
104 }
169 105
170 return ret; 106 return ret;
171 107
@@ -176,7 +112,7 @@ static int __init iptable_filter_init(void)
176 112
177static void __exit iptable_filter_fini(void) 113static void __exit iptable_filter_fini(void)
178{ 114{
179 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 115 xt_hook_unlink(&packet_filter, filter_ops);
180 unregister_pernet_subsys(&iptable_filter_net_ops); 116 unregister_pernet_subsys(&iptable_filter_net_ops);
181} 117}
182 118
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index fae78c3076c4..b9b83464cbf4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -27,101 +27,16 @@ MODULE_DESCRIPTION("iptables mangle table");
27 (1 << NF_INET_LOCAL_OUT) | \ 27 (1 << NF_INET_LOCAL_OUT) | \
28 (1 << NF_INET_POST_ROUTING)) 28 (1 << NF_INET_POST_ROUTING))
29 29
30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
31static const struct
32{
33 struct ipt_replace repl;
34 struct ipt_standard entries[5];
35 struct ipt_error term;
36} initial_table __net_initdata = {
37 .repl = {
38 .name = "mangle",
39 .valid_hooks = MANGLE_VALID_HOOKS,
40 .num_entries = 6,
41 .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
42 .hook_entry = {
43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
45 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
46 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
47 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
48 },
49 .underflow = {
50 [NF_INET_PRE_ROUTING] = 0,
51 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
52 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
53 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
54 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
55 },
56 },
57 .entries = {
58 IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
59 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
60 IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
61 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
62 IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
63 },
64 .term = IPT_ERROR_INIT, /* ERROR */
65};
66
67static const struct xt_table packet_mangler = { 30static const struct xt_table packet_mangler = {
68 .name = "mangle", 31 .name = "mangle",
69 .valid_hooks = MANGLE_VALID_HOOKS, 32 .valid_hooks = MANGLE_VALID_HOOKS,
70 .me = THIS_MODULE, 33 .me = THIS_MODULE,
71 .af = NFPROTO_IPV4, 34 .af = NFPROTO_IPV4,
35 .priority = NF_IP_PRI_MANGLE,
72}; 36};
73 37
74/* The work comes in here from netfilter.c. */
75static unsigned int
76ipt_pre_routing_hook(unsigned int hook,
77 struct sk_buff *skb,
78 const struct net_device *in,
79 const struct net_device *out,
80 int (*okfn)(struct sk_buff *))
81{
82 return ipt_do_table(skb, hook, in, out,
83 dev_net(in)->ipv4.iptable_mangle);
84}
85
86static unsigned int
87ipt_post_routing_hook(unsigned int hook,
88 struct sk_buff *skb,
89 const struct net_device *in,
90 const struct net_device *out,
91 int (*okfn)(struct sk_buff *))
92{
93 return ipt_do_table(skb, hook, in, out,
94 dev_net(out)->ipv4.iptable_mangle);
95}
96
97static unsigned int
98ipt_local_in_hook(unsigned int hook,
99 struct sk_buff *skb,
100 const struct net_device *in,
101 const struct net_device *out,
102 int (*okfn)(struct sk_buff *))
103{
104 return ipt_do_table(skb, hook, in, out,
105 dev_net(in)->ipv4.iptable_mangle);
106}
107
108static unsigned int
109ipt_forward_hook(unsigned int hook,
110 struct sk_buff *skb,
111 const struct net_device *in,
112 const struct net_device *out,
113 int (*okfn)(struct sk_buff *))
114{
115 return ipt_do_table(skb, hook, in, out,
116 dev_net(in)->ipv4.iptable_mangle);
117}
118
119static unsigned int 38static unsigned int
120ipt_local_hook(unsigned int hook, 39ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
121 struct sk_buff *skb,
122 const struct net_device *in,
123 const struct net_device *out,
124 int (*okfn)(struct sk_buff *))
125{ 40{
126 unsigned int ret; 41 unsigned int ret;
127 const struct iphdr *iph; 42 const struct iphdr *iph;
@@ -141,7 +56,7 @@ ipt_local_hook(unsigned int hook,
141 daddr = iph->daddr; 56 daddr = iph->daddr;
142 tos = iph->tos; 57 tos = iph->tos;
143 58
144 ret = ipt_do_table(skb, hook, in, out, 59 ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
145 dev_net(out)->ipv4.iptable_mangle); 60 dev_net(out)->ipv4.iptable_mangle);
146 /* Reroute for ANY change. */ 61 /* Reroute for ANY change. */
147 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 62 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
@@ -158,49 +73,36 @@ ipt_local_hook(unsigned int hook,
158 return ret; 73 return ret;
159} 74}
160 75
161static struct nf_hook_ops ipt_ops[] __read_mostly = { 76/* The work comes in here from netfilter.c. */
162 { 77static unsigned int
163 .hook = ipt_pre_routing_hook, 78iptable_mangle_hook(unsigned int hook,
164 .owner = THIS_MODULE, 79 struct sk_buff *skb,
165 .pf = NFPROTO_IPV4, 80 const struct net_device *in,
166 .hooknum = NF_INET_PRE_ROUTING, 81 const struct net_device *out,
167 .priority = NF_IP_PRI_MANGLE, 82 int (*okfn)(struct sk_buff *))
168 }, 83{
169 { 84 if (hook == NF_INET_LOCAL_OUT)
170 .hook = ipt_local_in_hook, 85 return ipt_mangle_out(skb, out);
171 .owner = THIS_MODULE, 86 if (hook == NF_INET_POST_ROUTING)
172 .pf = NFPROTO_IPV4, 87 return ipt_do_table(skb, hook, in, out,
173 .hooknum = NF_INET_LOCAL_IN, 88 dev_net(out)->ipv4.iptable_mangle);
174 .priority = NF_IP_PRI_MANGLE, 89 /* PREROUTING/INPUT/FORWARD: */
175 }, 90 return ipt_do_table(skb, hook, in, out,
176 { 91 dev_net(in)->ipv4.iptable_mangle);
177 .hook = ipt_forward_hook, 92}
178 .owner = THIS_MODULE, 93
179 .pf = NFPROTO_IPV4, 94static struct nf_hook_ops *mangle_ops __read_mostly;
180 .hooknum = NF_INET_FORWARD,
181 .priority = NF_IP_PRI_MANGLE,
182 },
183 {
184 .hook = ipt_local_hook,
185 .owner = THIS_MODULE,
186 .pf = NFPROTO_IPV4,
187 .hooknum = NF_INET_LOCAL_OUT,
188 .priority = NF_IP_PRI_MANGLE,
189 },
190 {
191 .hook = ipt_post_routing_hook,
192 .owner = THIS_MODULE,
193 .pf = NFPROTO_IPV4,
194 .hooknum = NF_INET_POST_ROUTING,
195 .priority = NF_IP_PRI_MANGLE,
196 },
197};
198 95
199static int __net_init iptable_mangle_net_init(struct net *net) 96static int __net_init iptable_mangle_net_init(struct net *net)
200{ 97{
201 /* Register table */ 98 struct ipt_replace *repl;
99
100 repl = ipt_alloc_initial_table(&packet_mangler);
101 if (repl == NULL)
102 return -ENOMEM;
202 net->ipv4.iptable_mangle = 103 net->ipv4.iptable_mangle =
203 ipt_register_table(net, &packet_mangler, &initial_table.repl); 104 ipt_register_table(net, &packet_mangler, repl);
105 kfree(repl);
204 if (IS_ERR(net->ipv4.iptable_mangle)) 106 if (IS_ERR(net->ipv4.iptable_mangle))
205 return PTR_ERR(net->ipv4.iptable_mangle); 107 return PTR_ERR(net->ipv4.iptable_mangle);
206 return 0; 108 return 0;
@@ -208,7 +110,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
208 110
209static void __net_exit iptable_mangle_net_exit(struct net *net) 111static void __net_exit iptable_mangle_net_exit(struct net *net)
210{ 112{
211 ipt_unregister_table(net->ipv4.iptable_mangle); 113 ipt_unregister_table(net, net->ipv4.iptable_mangle);
212} 114}
213 115
214static struct pernet_operations iptable_mangle_net_ops = { 116static struct pernet_operations iptable_mangle_net_ops = {
@@ -225,9 +127,11 @@ static int __init iptable_mangle_init(void)
225 return ret; 127 return ret;
226 128
227 /* Register hooks */ 129 /* Register hooks */
228 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 130 mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
229 if (ret < 0) 131 if (IS_ERR(mangle_ops)) {
132 ret = PTR_ERR(mangle_ops);
230 goto cleanup_table; 133 goto cleanup_table;
134 }
231 135
232 return ret; 136 return ret;
233 137
@@ -238,7 +142,7 @@ static int __init iptable_mangle_init(void)
238 142
239static void __exit iptable_mangle_fini(void) 143static void __exit iptable_mangle_fini(void)
240{ 144{
241 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 145 xt_hook_unlink(&packet_mangler, mangle_ops);
242 unregister_pernet_subsys(&iptable_mangle_net_ops); 146 unregister_pernet_subsys(&iptable_mangle_net_ops);
243} 147}
244 148
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 993edc23be09..06fb9d11953c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,90 +9,44 @@
9 9
10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static const struct
13{
14 struct ipt_replace repl;
15 struct ipt_standard entries[2];
16 struct ipt_error term;
17} initial_table __net_initdata = {
18 .repl = {
19 .name = "raw",
20 .valid_hooks = RAW_VALID_HOOKS,
21 .num_entries = 3,
22 .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
23 .hook_entry = {
24 [NF_INET_PRE_ROUTING] = 0,
25 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
26 },
27 .underflow = {
28 [NF_INET_PRE_ROUTING] = 0,
29 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
30 },
31 },
32 .entries = {
33 IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
34 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
35 },
36 .term = IPT_ERROR_INIT, /* ERROR */
37};
38
39static const struct xt_table packet_raw = { 12static const struct xt_table packet_raw = {
40 .name = "raw", 13 .name = "raw",
41 .valid_hooks = RAW_VALID_HOOKS, 14 .valid_hooks = RAW_VALID_HOOKS,
42 .me = THIS_MODULE, 15 .me = THIS_MODULE,
43 .af = NFPROTO_IPV4, 16 .af = NFPROTO_IPV4,
17 .priority = NF_IP_PRI_RAW,
44}; 18};
45 19
46/* The work comes in here from netfilter.c. */ 20/* The work comes in here from netfilter.c. */
47static unsigned int 21static unsigned int
48ipt_hook(unsigned int hook, 22iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
49 struct sk_buff *skb, 23 const struct net_device *in, const struct net_device *out,
50 const struct net_device *in, 24 int (*okfn)(struct sk_buff *))
51 const struct net_device *out,
52 int (*okfn)(struct sk_buff *))
53{ 25{
54 return ipt_do_table(skb, hook, in, out, 26 const struct net *net;
55 dev_net(in)->ipv4.iptable_raw);
56}
57 27
58static unsigned int 28 if (hook == NF_INET_LOCAL_OUT &&
59ipt_local_hook(unsigned int hook, 29 (skb->len < sizeof(struct iphdr) ||
60 struct sk_buff *skb, 30 ip_hdrlen(skb) < sizeof(struct iphdr)))
61 const struct net_device *in, 31 /* root is playing with raw sockets. */
62 const struct net_device *out,
63 int (*okfn)(struct sk_buff *))
64{
65 /* root is playing with raw sockets. */
66 if (skb->len < sizeof(struct iphdr) ||
67 ip_hdrlen(skb) < sizeof(struct iphdr))
68 return NF_ACCEPT; 32 return NF_ACCEPT;
69 return ipt_do_table(skb, hook, in, out, 33
70 dev_net(out)->ipv4.iptable_raw); 34 net = dev_net((in != NULL) ? in : out);
35 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
71} 36}
72 37
73/* 'raw' is the very first table. */ 38static struct nf_hook_ops *rawtable_ops __read_mostly;
74static struct nf_hook_ops ipt_ops[] __read_mostly = {
75 {
76 .hook = ipt_hook,
77 .pf = NFPROTO_IPV4,
78 .hooknum = NF_INET_PRE_ROUTING,
79 .priority = NF_IP_PRI_RAW,
80 .owner = THIS_MODULE,
81 },
82 {
83 .hook = ipt_local_hook,
84 .pf = NFPROTO_IPV4,
85 .hooknum = NF_INET_LOCAL_OUT,
86 .priority = NF_IP_PRI_RAW,
87 .owner = THIS_MODULE,
88 },
89};
90 39
91static int __net_init iptable_raw_net_init(struct net *net) 40static int __net_init iptable_raw_net_init(struct net *net)
92{ 41{
93 /* Register table */ 42 struct ipt_replace *repl;
43
44 repl = ipt_alloc_initial_table(&packet_raw);
45 if (repl == NULL)
46 return -ENOMEM;
94 net->ipv4.iptable_raw = 47 net->ipv4.iptable_raw =
95 ipt_register_table(net, &packet_raw, &initial_table.repl); 48 ipt_register_table(net, &packet_raw, repl);
49 kfree(repl);
96 if (IS_ERR(net->ipv4.iptable_raw)) 50 if (IS_ERR(net->ipv4.iptable_raw))
97 return PTR_ERR(net->ipv4.iptable_raw); 51 return PTR_ERR(net->ipv4.iptable_raw);
98 return 0; 52 return 0;
@@ -100,7 +54,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
100 54
101static void __net_exit iptable_raw_net_exit(struct net *net) 55static void __net_exit iptable_raw_net_exit(struct net *net)
102{ 56{
103 ipt_unregister_table(net->ipv4.iptable_raw); 57 ipt_unregister_table(net, net->ipv4.iptable_raw);
104} 58}
105 59
106static struct pernet_operations iptable_raw_net_ops = { 60static struct pernet_operations iptable_raw_net_ops = {
@@ -117,9 +71,11 @@ static int __init iptable_raw_init(void)
117 return ret; 71 return ret;
118 72
119 /* Register hooks */ 73 /* Register hooks */
120 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 74 rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
121 if (ret < 0) 75 if (IS_ERR(rawtable_ops)) {
76 ret = PTR_ERR(rawtable_ops);
122 goto cleanup_table; 77 goto cleanup_table;
78 }
123 79
124 return ret; 80 return ret;
125 81
@@ -130,7 +86,7 @@ static int __init iptable_raw_init(void)
130 86
131static void __exit iptable_raw_fini(void) 87static void __exit iptable_raw_fini(void)
132{ 88{
133 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 89 xt_hook_unlink(&packet_raw, rawtable_ops);
134 unregister_pernet_subsys(&iptable_raw_net_ops); 90 unregister_pernet_subsys(&iptable_raw_net_ops);
135} 91}
136 92
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 3bd3d6388da5..cce2f64e6f21 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,109 +27,44 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
27 (1 << NF_INET_FORWARD) | \ 27 (1 << NF_INET_FORWARD) | \
28 (1 << NF_INET_LOCAL_OUT) 28 (1 << NF_INET_LOCAL_OUT)
29 29
30static const struct
31{
32 struct ipt_replace repl;
33 struct ipt_standard entries[3];
34 struct ipt_error term;
35} initial_table __net_initdata = {
36 .repl = {
37 .name = "security",
38 .valid_hooks = SECURITY_VALID_HOOKS,
39 .num_entries = 4,
40 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
41 .hook_entry = {
42 [NF_INET_LOCAL_IN] = 0,
43 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
44 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
45 },
46 .underflow = {
47 [NF_INET_LOCAL_IN] = 0,
48 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
49 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
50 },
51 },
52 .entries = {
53 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
54 IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
55 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
56 },
57 .term = IPT_ERROR_INIT, /* ERROR */
58};
59
60static const struct xt_table security_table = { 30static const struct xt_table security_table = {
61 .name = "security", 31 .name = "security",
62 .valid_hooks = SECURITY_VALID_HOOKS, 32 .valid_hooks = SECURITY_VALID_HOOKS,
63 .me = THIS_MODULE, 33 .me = THIS_MODULE,
64 .af = NFPROTO_IPV4, 34 .af = NFPROTO_IPV4,
35 .priority = NF_IP_PRI_SECURITY,
65}; 36};
66 37
67static unsigned int 38static unsigned int
68ipt_local_in_hook(unsigned int hook, 39iptable_security_hook(unsigned int hook, struct sk_buff *skb,
69 struct sk_buff *skb, 40 const struct net_device *in,
70 const struct net_device *in, 41 const struct net_device *out,
71 const struct net_device *out, 42 int (*okfn)(struct sk_buff *))
72 int (*okfn)(struct sk_buff *))
73{
74 return ipt_do_table(skb, hook, in, out,
75 dev_net(in)->ipv4.iptable_security);
76}
77
78static unsigned int
79ipt_forward_hook(unsigned int hook,
80 struct sk_buff *skb,
81 const struct net_device *in,
82 const struct net_device *out,
83 int (*okfn)(struct sk_buff *))
84{ 43{
85 return ipt_do_table(skb, hook, in, out, 44 const struct net *net;
86 dev_net(in)->ipv4.iptable_security);
87}
88 45
89static unsigned int 46 if (hook == NF_INET_LOCAL_OUT &&
90ipt_local_out_hook(unsigned int hook, 47 (skb->len < sizeof(struct iphdr) ||
91 struct sk_buff *skb, 48 ip_hdrlen(skb) < sizeof(struct iphdr)))
92 const struct net_device *in, 49 /* Somebody is playing with raw sockets. */
93 const struct net_device *out,
94 int (*okfn)(struct sk_buff *))
95{
96 /* Somebody is playing with raw sockets. */
97 if (skb->len < sizeof(struct iphdr) ||
98 ip_hdrlen(skb) < sizeof(struct iphdr))
99 return NF_ACCEPT; 50 return NF_ACCEPT;
100 return ipt_do_table(skb, hook, in, out, 51
101 dev_net(out)->ipv4.iptable_security); 52 net = dev_net((in != NULL) ? in : out);
53 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
102} 54}
103 55
104static struct nf_hook_ops ipt_ops[] __read_mostly = { 56static struct nf_hook_ops *sectbl_ops __read_mostly;
105 {
106 .hook = ipt_local_in_hook,
107 .owner = THIS_MODULE,
108 .pf = NFPROTO_IPV4,
109 .hooknum = NF_INET_LOCAL_IN,
110 .priority = NF_IP_PRI_SECURITY,
111 },
112 {
113 .hook = ipt_forward_hook,
114 .owner = THIS_MODULE,
115 .pf = NFPROTO_IPV4,
116 .hooknum = NF_INET_FORWARD,
117 .priority = NF_IP_PRI_SECURITY,
118 },
119 {
120 .hook = ipt_local_out_hook,
121 .owner = THIS_MODULE,
122 .pf = NFPROTO_IPV4,
123 .hooknum = NF_INET_LOCAL_OUT,
124 .priority = NF_IP_PRI_SECURITY,
125 },
126};
127 57
128static int __net_init iptable_security_net_init(struct net *net) 58static int __net_init iptable_security_net_init(struct net *net)
129{ 59{
130 net->ipv4.iptable_security = 60 struct ipt_replace *repl;
131 ipt_register_table(net, &security_table, &initial_table.repl);
132 61
62 repl = ipt_alloc_initial_table(&security_table);
63 if (repl == NULL)
64 return -ENOMEM;
65 net->ipv4.iptable_security =
66 ipt_register_table(net, &security_table, repl);
67 kfree(repl);
133 if (IS_ERR(net->ipv4.iptable_security)) 68 if (IS_ERR(net->ipv4.iptable_security))
134 return PTR_ERR(net->ipv4.iptable_security); 69 return PTR_ERR(net->ipv4.iptable_security);
135 70
@@ -138,7 +73,7 @@ static int __net_init iptable_security_net_init(struct net *net)
138 73
139static void __net_exit iptable_security_net_exit(struct net *net) 74static void __net_exit iptable_security_net_exit(struct net *net)
140{ 75{
141 ipt_unregister_table(net->ipv4.iptable_security); 76 ipt_unregister_table(net, net->ipv4.iptable_security);
142} 77}
143 78
144static struct pernet_operations iptable_security_net_ops = { 79static struct pernet_operations iptable_security_net_ops = {
@@ -154,9 +89,11 @@ static int __init iptable_security_init(void)
154 if (ret < 0) 89 if (ret < 0)
155 return ret; 90 return ret;
156 91
157 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 92 sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
158 if (ret < 0) 93 if (IS_ERR(sectbl_ops)) {
94 ret = PTR_ERR(sectbl_ops);
159 goto cleanup_table; 95 goto cleanup_table;
96 }
160 97
161 return ret; 98 return ret;
162 99
@@ -167,7 +104,7 @@ cleanup_table:
167 104
168static void __exit iptable_security_fini(void) 105static void __exit iptable_security_fini(void)
169{ 106{
170 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 107 xt_hook_unlink(&security_table, sectbl_ops);
171 unregister_pernet_subsys(&iptable_security_net_ops); 108 unregister_pernet_subsys(&iptable_security_net_ops);
172} 109}
173 110
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d1ea38a7c490..2bb1f87051c4 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
22#include <net/netfilter/nf_conntrack_helper.h> 22#include <net/netfilter/nf_conntrack_helper.h>
23#include <net/netfilter/nf_conntrack_l4proto.h> 23#include <net/netfilter/nf_conntrack_l4proto.h>
24#include <net/netfilter/nf_conntrack_l3proto.h> 24#include <net/netfilter/nf_conntrack_l3proto.h>
25#include <net/netfilter/nf_conntrack_zones.h>
25#include <net/netfilter/nf_conntrack_core.h> 26#include <net/netfilter/nf_conntrack_core.h>
26#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 27#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
27#include <net/netfilter/nf_nat_helper.h> 28#include <net/netfilter/nf_nat_helper.h>
@@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
266 return -EINVAL; 267 return -EINVAL;
267 } 268 }
268 269
269 h = nf_conntrack_find_get(sock_net(sk), &tuple); 270 h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
270 if (h) { 271 if (h) {
271 struct sockaddr_in sin; 272 struct sockaddr_in sin;
272 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 273 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7afd39b5b781..7404bde95994 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
18#include <net/netfilter/nf_conntrack_tuple.h> 18#include <net/netfilter/nf_conntrack_tuple.h>
19#include <net/netfilter/nf_conntrack_l4proto.h> 19#include <net/netfilter/nf_conntrack_l4proto.h>
20#include <net/netfilter/nf_conntrack_core.h> 20#include <net/netfilter/nf_conntrack_core.h>
21#include <net/netfilter/nf_conntrack_zones.h>
21#include <net/netfilter/nf_log.h> 22#include <net/netfilter/nf_log.h>
22 23
23static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; 24static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
@@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
114 115
115/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 116/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
116static int 117static int
117icmp_error_message(struct net *net, struct sk_buff *skb, 118icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
118 enum ip_conntrack_info *ctinfo, 119 enum ip_conntrack_info *ctinfo,
119 unsigned int hooknum) 120 unsigned int hooknum)
120{ 121{
121 struct nf_conntrack_tuple innertuple, origtuple; 122 struct nf_conntrack_tuple innertuple, origtuple;
122 const struct nf_conntrack_l4proto *innerproto; 123 const struct nf_conntrack_l4proto *innerproto;
123 const struct nf_conntrack_tuple_hash *h; 124 const struct nf_conntrack_tuple_hash *h;
125 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
124 126
125 NF_CT_ASSERT(skb->nfct == NULL); 127 NF_CT_ASSERT(skb->nfct == NULL);
126 128
@@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
146 148
147 *ctinfo = IP_CT_RELATED; 149 *ctinfo = IP_CT_RELATED;
148 150
149 h = nf_conntrack_find_get(net, &innertuple); 151 h = nf_conntrack_find_get(net, zone, &innertuple);
150 if (!h) { 152 if (!h) {
151 pr_debug("icmp_error_message: no match\n"); 153 pr_debug("icmp_error_message: no match\n");
152 return -NF_ACCEPT; 154 return -NF_ACCEPT;
@@ -163,7 +165,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
163 165
164/* Small and modified version of icmp_rcv */ 166/* Small and modified version of icmp_rcv */
165static int 167static int
166icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, 168icmp_error(struct net *net, struct nf_conn *tmpl,
169 struct sk_buff *skb, unsigned int dataoff,
167 enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) 170 enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
168{ 171{
169 const struct icmphdr *icmph; 172 const struct icmphdr *icmph;
@@ -208,7 +211,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
208 icmph->type != ICMP_REDIRECT) 211 icmph->type != ICMP_REDIRECT)
209 return NF_ACCEPT; 212 return NF_ACCEPT;
210 213
211 return icmp_error_message(net, skb, ctinfo, hooknum); 214 return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
212} 215}
213 216
214#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 217#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 331ead3ebd1b..cb763ae9ed90 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -17,6 +17,10 @@
17#include <linux/netfilter_bridge.h> 17#include <linux/netfilter_bridge.h>
18#include <linux/netfilter_ipv4.h> 18#include <linux/netfilter_ipv4.h>
19#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 19#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
20#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
21#include <net/netfilter/nf_conntrack.h>
22#endif
23#include <net/netfilter/nf_conntrack_zones.h>
20 24
21/* Returns new sk_buff, or NULL */ 25/* Returns new sk_buff, or NULL */
22static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 26static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
@@ -38,15 +42,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
38static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, 42static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
39 struct sk_buff *skb) 43 struct sk_buff *skb)
40{ 44{
45 u16 zone = NF_CT_DEFAULT_ZONE;
46
47#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
48 if (skb->nfct)
49 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
50#endif
51
41#ifdef CONFIG_BRIDGE_NETFILTER 52#ifdef CONFIG_BRIDGE_NETFILTER
42 if (skb->nf_bridge && 53 if (skb->nf_bridge &&
43 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) 54 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
44 return IP_DEFRAG_CONNTRACK_BRIDGE_IN; 55 return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
45#endif 56#endif
46 if (hooknum == NF_INET_PRE_ROUTING) 57 if (hooknum == NF_INET_PRE_ROUTING)
47 return IP_DEFRAG_CONNTRACK_IN; 58 return IP_DEFRAG_CONNTRACK_IN + zone;
48 else 59 else
49 return IP_DEFRAG_CONNTRACK_OUT; 60 return IP_DEFRAG_CONNTRACK_OUT + zone;
50} 61}
51 62
52static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, 63static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -59,7 +70,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
59#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) 70#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
60 /* Previously seen (loopback)? Ignore. Do this before 71 /* Previously seen (loopback)? Ignore. Do this before
61 fragment check. */ 72 fragment check. */
62 if (skb->nfct) 73 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
63 return NF_ACCEPT; 74 return NF_ACCEPT;
64#endif 75#endif
65#endif 76#endif
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 26066a2327ad..4595281c2863 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,6 +30,7 @@
30#include <net/netfilter/nf_conntrack_helper.h> 30#include <net/netfilter/nf_conntrack_helper.h>
31#include <net/netfilter/nf_conntrack_l3proto.h> 31#include <net/netfilter/nf_conntrack_l3proto.h>
32#include <net/netfilter/nf_conntrack_l4proto.h> 32#include <net/netfilter/nf_conntrack_l4proto.h>
33#include <net/netfilter/nf_conntrack_zones.h>
33 34
34static DEFINE_SPINLOCK(nf_nat_lock); 35static DEFINE_SPINLOCK(nf_nat_lock);
35 36
@@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
69 70
70/* We keep an extra hash for each conntrack, for fast searching. */ 71/* We keep an extra hash for each conntrack, for fast searching. */
71static inline unsigned int 72static inline unsigned int
72hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) 73hash_by_src(const struct net *net, u16 zone,
74 const struct nf_conntrack_tuple *tuple)
73{ 75{
74 unsigned int hash; 76 unsigned int hash;
75 77
76 /* Original src, to ensure we map it consistently if poss. */ 78 /* Original src, to ensure we map it consistently if poss. */
77 hash = jhash_3words((__force u32)tuple->src.u3.ip, 79 hash = jhash_3words((__force u32)tuple->src.u3.ip,
78 (__force u32)tuple->src.u.all, 80 (__force u32)tuple->src.u.all ^ zone,
79 tuple->dst.protonum, 0); 81 tuple->dst.protonum, 0);
80 return ((u64)hash * net->ipv4.nat_htable_size) >> 32; 82 return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
81} 83}
@@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct,
139 141
140/* Only called for SRC manip */ 142/* Only called for SRC manip */
141static int 143static int
142find_appropriate_src(struct net *net, 144find_appropriate_src(struct net *net, u16 zone,
143 const struct nf_conntrack_tuple *tuple, 145 const struct nf_conntrack_tuple *tuple,
144 struct nf_conntrack_tuple *result, 146 struct nf_conntrack_tuple *result,
145 const struct nf_nat_range *range) 147 const struct nf_nat_range *range)
146{ 148{
147 unsigned int h = hash_by_src(net, tuple); 149 unsigned int h = hash_by_src(net, zone, tuple);
148 const struct nf_conn_nat *nat; 150 const struct nf_conn_nat *nat;
149 const struct nf_conn *ct; 151 const struct nf_conn *ct;
150 const struct hlist_node *n; 152 const struct hlist_node *n;
@@ -152,7 +154,7 @@ find_appropriate_src(struct net *net,
152 rcu_read_lock(); 154 rcu_read_lock();
153 hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { 155 hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
154 ct = nat->ct; 156 ct = nat->ct;
155 if (same_src(ct, tuple)) { 157 if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
156 /* Copy source part from reply tuple. */ 158 /* Copy source part from reply tuple. */
157 nf_ct_invert_tuplepr(result, 159 nf_ct_invert_tuplepr(result,
158 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 160 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -175,7 +177,7 @@ find_appropriate_src(struct net *net,
175 the ip with the lowest src-ip/dst-ip/proto usage. 177 the ip with the lowest src-ip/dst-ip/proto usage.
176*/ 178*/
177static void 179static void
178find_best_ips_proto(struct nf_conntrack_tuple *tuple, 180find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
179 const struct nf_nat_range *range, 181 const struct nf_nat_range *range,
180 const struct nf_conn *ct, 182 const struct nf_conn *ct,
181 enum nf_nat_manip_type maniptype) 183 enum nf_nat_manip_type maniptype)
@@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
209 maxip = ntohl(range->max_ip); 211 maxip = ntohl(range->max_ip);
210 j = jhash_2words((__force u32)tuple->src.u3.ip, 212 j = jhash_2words((__force u32)tuple->src.u3.ip,
211 range->flags & IP_NAT_RANGE_PERSISTENT ? 213 range->flags & IP_NAT_RANGE_PERSISTENT ?
212 0 : (__force u32)tuple->dst.u3.ip, 0); 214 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
213 j = ((u64)j * (maxip - minip + 1)) >> 32; 215 j = ((u64)j * (maxip - minip + 1)) >> 32;
214 *var_ipp = htonl(minip + j); 216 *var_ipp = htonl(minip + j);
215} 217}
@@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
229{ 231{
230 struct net *net = nf_ct_net(ct); 232 struct net *net = nf_ct_net(ct);
231 const struct nf_nat_protocol *proto; 233 const struct nf_nat_protocol *proto;
234 u16 zone = nf_ct_zone(ct);
232 235
233 /* 1) If this srcip/proto/src-proto-part is currently mapped, 236 /* 1) If this srcip/proto/src-proto-part is currently mapped,
234 and that same mapping gives a unique tuple within the given 237 and that same mapping gives a unique tuple within the given
@@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
239 manips not an issue. */ 242 manips not an issue. */
240 if (maniptype == IP_NAT_MANIP_SRC && 243 if (maniptype == IP_NAT_MANIP_SRC &&
241 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { 244 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
242 if (find_appropriate_src(net, orig_tuple, tuple, range)) { 245 if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
243 pr_debug("get_unique_tuple: Found current src map\n"); 246 pr_debug("get_unique_tuple: Found current src map\n");
244 if (!nf_nat_used_tuple(tuple, ct)) 247 if (!nf_nat_used_tuple(tuple, ct))
245 return; 248 return;
@@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
249 /* 2) Select the least-used IP/proto combination in the given 252 /* 2) Select the least-used IP/proto combination in the given
250 range. */ 253 range. */
251 *tuple = *orig_tuple; 254 *tuple = *orig_tuple;
252 find_best_ips_proto(tuple, range, ct, maniptype); 255 find_best_ips_proto(zone, tuple, range, ct, maniptype);
253 256
254 /* 3) The per-protocol part of the manip is made to map into 257 /* 3) The per-protocol part of the manip is made to map into
255 the range to make a unique tuple. */ 258 the range to make a unique tuple. */
@@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct,
327 if (have_to_hash) { 330 if (have_to_hash) {
328 unsigned int srchash; 331 unsigned int srchash;
329 332
330 srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 333 srchash = hash_by_src(net, nf_ct_zone(ct),
334 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
331 spin_lock_bh(&nf_nat_lock); 335 spin_lock_bh(&nf_nat_lock);
332 /* nf_conntrack_alter_reply might re-allocate exntension aera */ 336 /* nf_conntrack_alter_reply might re-allocate exntension aera */
333 nat = nfct_nat(ct); 337 nat = nfct_nat(ct);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index a1d5d58a58bf..86e0e84ff0a0 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -27,76 +27,29 @@ MODULE_ALIAS("ip_nat_ftp");
27 27
28/* FIXME: Time out? --RR */ 28/* FIXME: Time out? --RR */
29 29
30static int 30static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
31mangle_rfc959_packet(struct sk_buff *skb, 31 char *buffer, size_t buflen,
32 __be32 newip, 32 __be32 addr, u16 port)
33 u_int16_t port,
34 unsigned int matchoff,
35 unsigned int matchlen,
36 struct nf_conn *ct,
37 enum ip_conntrack_info ctinfo)
38{ 33{
39 char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; 34 switch (type) {
40 35 case NF_CT_FTP_PORT:
41 sprintf(buffer, "%u,%u,%u,%u,%u,%u", 36 case NF_CT_FTP_PASV:
42 NIPQUAD(newip), port>>8, port&0xFF); 37 return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
43 38 ((unsigned char *)&addr)[0],
44 pr_debug("calling nf_nat_mangle_tcp_packet\n"); 39 ((unsigned char *)&addr)[1],
45 40 ((unsigned char *)&addr)[2],
46 return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, 41 ((unsigned char *)&addr)[3],
47 matchlen, buffer, strlen(buffer)); 42 port >> 8,
48} 43 port & 0xFF);
49 44 case NF_CT_FTP_EPRT:
50/* |1|132.235.1.2|6275| */ 45 return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port);
51static int 46 case NF_CT_FTP_EPSV:
52mangle_eprt_packet(struct sk_buff *skb, 47 return snprintf(buffer, buflen, "|||%u|", port);
53 __be32 newip, 48 }
54 u_int16_t port,
55 unsigned int matchoff,
56 unsigned int matchlen,
57 struct nf_conn *ct,
58 enum ip_conntrack_info ctinfo)
59{
60 char buffer[sizeof("|1|255.255.255.255|65535|")];
61
62 sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
63
64 pr_debug("calling nf_nat_mangle_tcp_packet\n");
65
66 return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
67 matchlen, buffer, strlen(buffer));
68}
69
70/* |1|132.235.1.2|6275| */
71static int
72mangle_epsv_packet(struct sk_buff *skb,
73 __be32 newip,
74 u_int16_t port,
75 unsigned int matchoff,
76 unsigned int matchlen,
77 struct nf_conn *ct,
78 enum ip_conntrack_info ctinfo)
79{
80 char buffer[sizeof("|||65535|")];
81
82 sprintf(buffer, "|||%u|", port);
83
84 pr_debug("calling nf_nat_mangle_tcp_packet\n");
85 49
86 return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, 50 return 0;
87 matchlen, buffer, strlen(buffer));
88} 51}
89 52
90static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
91 unsigned int, unsigned int, struct nf_conn *,
92 enum ip_conntrack_info)
93= {
94 [NF_CT_FTP_PORT] = mangle_rfc959_packet,
95 [NF_CT_FTP_PASV] = mangle_rfc959_packet,
96 [NF_CT_FTP_EPRT] = mangle_eprt_packet,
97 [NF_CT_FTP_EPSV] = mangle_epsv_packet
98};
99
100/* So, this packet has hit the connection tracking matching code. 53/* So, this packet has hit the connection tracking matching code.
101 Mangle it, and change the expectation to match the new version. */ 54 Mangle it, and change the expectation to match the new version. */
102static unsigned int nf_nat_ftp(struct sk_buff *skb, 55static unsigned int nf_nat_ftp(struct sk_buff *skb,
@@ -110,6 +63,8 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
110 u_int16_t port; 63 u_int16_t port;
111 int dir = CTINFO2DIR(ctinfo); 64 int dir = CTINFO2DIR(ctinfo);
112 struct nf_conn *ct = exp->master; 65 struct nf_conn *ct = exp->master;
66 char buffer[sizeof("|1|255.255.255.255|65535|")];
67 unsigned int buflen;
113 68
114 pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); 69 pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
115 70
@@ -132,11 +87,21 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
132 if (port == 0) 87 if (port == 0)
133 return NF_DROP; 88 return NF_DROP;
134 89
135 if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { 90 buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port);
136 nf_ct_unexpect_related(exp); 91 if (!buflen)
137 return NF_DROP; 92 goto out;
138 } 93
94 pr_debug("calling nf_nat_mangle_tcp_packet\n");
95
96 if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
97 matchlen, buffer, buflen))
98 goto out;
99
139 return NF_ACCEPT; 100 return NF_ACCEPT;
101
102out:
103 nf_ct_unexpect_related(exp);
104 return NF_DROP;
140} 105}
141 106
142static void __exit nf_nat_ftp_fini(void) 107static void __exit nf_nat_ftp_fini(void)
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 7f10a6be0191..4b6af4bb1f50 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -141,6 +141,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
141 return 1; 141 return 1;
142} 142}
143 143
144void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
145 __be32 seq, s16 off)
146{
147 if (!off)
148 return;
149 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
150 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
151 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
152}
153EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
154
144/* Generic function for mangling variable-length address changes inside 155/* Generic function for mangling variable-length address changes inside
145 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 156 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
146 * command in FTP). 157 * command in FTP).
@@ -149,14 +160,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
149 * skb enlargement, ... 160 * skb enlargement, ...
150 * 161 *
151 * */ 162 * */
152int 163int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
153nf_nat_mangle_tcp_packet(struct sk_buff *skb, 164 struct nf_conn *ct,
154 struct nf_conn *ct, 165 enum ip_conntrack_info ctinfo,
155 enum ip_conntrack_info ctinfo, 166 unsigned int match_offset,
156 unsigned int match_offset, 167 unsigned int match_len,
157 unsigned int match_len, 168 const char *rep_buffer,
158 const char *rep_buffer, 169 unsigned int rep_len, bool adjust)
159 unsigned int rep_len)
160{ 170{
161 struct rtable *rt = skb_rtable(skb); 171 struct rtable *rt = skb_rtable(skb);
162 struct iphdr *iph; 172 struct iphdr *iph;
@@ -202,16 +212,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
202 inet_proto_csum_replace2(&tcph->check, skb, 212 inet_proto_csum_replace2(&tcph->check, skb,
203 htons(oldlen), htons(datalen), 1); 213 htons(oldlen), htons(datalen), 1);
204 214
205 if (rep_len != match_len) { 215 if (adjust && rep_len != match_len)
206 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 216 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
207 adjust_tcp_sequence(ntohl(tcph->seq), 217 (int)rep_len - (int)match_len);
208 (int)rep_len - (int)match_len, 218
209 ct, ctinfo);
210 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
211 }
212 return 1; 219 return 1;
213} 220}
214EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); 221EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
215 222
216/* Generic function for mangling variable-length address changes inside 223/* Generic function for mangling variable-length address changes inside
217 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX 224 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 9eb171056c63..4c060038d29f 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -25,6 +25,7 @@
25#include <net/netfilter/nf_nat_rule.h> 25#include <net/netfilter/nf_nat_rule.h>
26#include <net/netfilter/nf_conntrack_helper.h> 26#include <net/netfilter/nf_conntrack_helper.h>
27#include <net/netfilter/nf_conntrack_expect.h> 27#include <net/netfilter/nf_conntrack_expect.h>
28#include <net/netfilter/nf_conntrack_zones.h>
28#include <linux/netfilter/nf_conntrack_proto_gre.h> 29#include <linux/netfilter/nf_conntrack_proto_gre.h>
29#include <linux/netfilter/nf_conntrack_pptp.h> 30#include <linux/netfilter/nf_conntrack_pptp.h>
30 31
@@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
74 75
75 pr_debug("trying to unexpect other dir: "); 76 pr_debug("trying to unexpect other dir: ");
76 nf_ct_dump_tuple_ip(&t); 77 nf_ct_dump_tuple_ip(&t);
77 other_exp = nf_ct_expect_find_get(net, &t); 78 other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
78 if (other_exp) { 79 if (other_exp) {
79 nf_ct_unexpect_related(other_exp); 80 nf_ct_unexpect_related(other_exp);
80 nf_ct_expect_put(other_exp); 81 nf_ct_expect_put(other_exp);
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 9e81e0dfb4ec..ab74cc0535e2 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,36 +28,6 @@
28 (1 << NF_INET_POST_ROUTING) | \ 28 (1 << NF_INET_POST_ROUTING) | \
29 (1 << NF_INET_LOCAL_OUT)) 29 (1 << NF_INET_LOCAL_OUT))
30 30
31static const struct
32{
33 struct ipt_replace repl;
34 struct ipt_standard entries[3];
35 struct ipt_error term;
36} nat_initial_table __net_initdata = {
37 .repl = {
38 .name = "nat",
39 .valid_hooks = NAT_VALID_HOOKS,
40 .num_entries = 4,
41 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
42 .hook_entry = {
43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
46 },
47 .underflow = {
48 [NF_INET_PRE_ROUTING] = 0,
49 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
50 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
51 },
52 },
53 .entries = {
54 IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
55 IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
56 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
57 },
58 .term = IPT_ERROR_INIT, /* ERROR */
59};
60
61static const struct xt_table nat_table = { 31static const struct xt_table nat_table = {
62 .name = "nat", 32 .name = "nat",
63 .valid_hooks = NAT_VALID_HOOKS, 33 .valid_hooks = NAT_VALID_HOOKS,
@@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
186 156
187static int __net_init nf_nat_rule_net_init(struct net *net) 157static int __net_init nf_nat_rule_net_init(struct net *net)
188{ 158{
189 net->ipv4.nat_table = ipt_register_table(net, &nat_table, 159 struct ipt_replace *repl;
190 &nat_initial_table.repl); 160
161 repl = ipt_alloc_initial_table(&nat_table);
162 if (repl == NULL)
163 return -ENOMEM;
164 net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
165 kfree(repl);
191 if (IS_ERR(net->ipv4.nat_table)) 166 if (IS_ERR(net->ipv4.nat_table))
192 return PTR_ERR(net->ipv4.nat_table); 167 return PTR_ERR(net->ipv4.nat_table);
193 return 0; 168 return 0;
@@ -195,7 +170,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net)
195 170
196static void __net_exit nf_nat_rule_net_exit(struct net *net) 171static void __net_exit nf_nat_rule_net_exit(struct net *net)
197{ 172{
198 ipt_unregister_table(net->ipv4.nat_table); 173 ipt_unregister_table(net, net->ipv4.nat_table);
199} 174}
200 175
201static struct pernet_operations nf_nat_rule_net_ops = { 176static struct pernet_operations nf_nat_rule_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 07d61a57613c..11b538deaaec 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -1,4 +1,4 @@
1/* SIP extension for UDP NAT alteration. 1/* SIP extension for NAT alteration.
2 * 2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> 3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_nat_ftp.c and other modules. 4 * based on RR's ip_nat_ftp.c and other modules.
@@ -15,6 +15,7 @@
15#include <linux/ip.h> 15#include <linux/ip.h>
16#include <net/ip.h> 16#include <net/ip.h>
17#include <linux/udp.h> 17#include <linux/udp.h>
18#include <linux/tcp.h>
18 19
19#include <net/netfilter/nf_nat.h> 20#include <net/netfilter/nf_nat.h>
20#include <net/netfilter/nf_nat_helper.h> 21#include <net/netfilter/nf_nat_helper.h>
@@ -29,25 +30,42 @@ MODULE_DESCRIPTION("SIP NAT helper");
29MODULE_ALIAS("ip_nat_sip"); 30MODULE_ALIAS("ip_nat_sip");
30 31
31 32
32static unsigned int mangle_packet(struct sk_buff *skb, 33static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
33 const char **dptr, unsigned int *datalen, 34 const char **dptr, unsigned int *datalen,
34 unsigned int matchoff, unsigned int matchlen, 35 unsigned int matchoff, unsigned int matchlen,
35 const char *buffer, unsigned int buflen) 36 const char *buffer, unsigned int buflen)
36{ 37{
37 enum ip_conntrack_info ctinfo; 38 enum ip_conntrack_info ctinfo;
38 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 39 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
39 40 struct tcphdr *th;
40 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, 41 unsigned int baseoff;
41 buffer, buflen)) 42
42 return 0; 43 if (nf_ct_protonum(ct) == IPPROTO_TCP) {
44 th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
45 baseoff = ip_hdrlen(skb) + th->doff * 4;
46 matchoff += dataoff - baseoff;
47
48 if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
49 matchoff, matchlen,
50 buffer, buflen, false))
51 return 0;
52 } else {
53 baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
54 matchoff += dataoff - baseoff;
55
56 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
57 matchoff, matchlen,
58 buffer, buflen))
59 return 0;
60 }
43 61
44 /* Reload data pointer and adjust datalen value */ 62 /* Reload data pointer and adjust datalen value */
45 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); 63 *dptr = skb->data + dataoff;
46 *datalen += buflen - matchlen; 64 *datalen += buflen - matchlen;
47 return 1; 65 return 1;
48} 66}
49 67
50static int map_addr(struct sk_buff *skb, 68static int map_addr(struct sk_buff *skb, unsigned int dataoff,
51 const char **dptr, unsigned int *datalen, 69 const char **dptr, unsigned int *datalen,
52 unsigned int matchoff, unsigned int matchlen, 70 unsigned int matchoff, unsigned int matchlen,
53 union nf_inet_addr *addr, __be16 port) 71 union nf_inet_addr *addr, __be16 port)
@@ -76,11 +94,11 @@ static int map_addr(struct sk_buff *skb,
76 94
77 buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); 95 buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
78 96
79 return mangle_packet(skb, dptr, datalen, matchoff, matchlen, 97 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
80 buffer, buflen); 98 buffer, buflen);
81} 99}
82 100
83static int map_sip_addr(struct sk_buff *skb, 101static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
84 const char **dptr, unsigned int *datalen, 102 const char **dptr, unsigned int *datalen,
85 enum sip_header_types type) 103 enum sip_header_types type)
86{ 104{
@@ -93,16 +111,18 @@ static int map_sip_addr(struct sk_buff *skb,
93 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, 111 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
94 &matchoff, &matchlen, &addr, &port) <= 0) 112 &matchoff, &matchlen, &addr, &port) <= 0)
95 return 1; 113 return 1;
96 return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); 114 return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
115 &addr, port);
97} 116}
98 117
99static unsigned int ip_nat_sip(struct sk_buff *skb, 118static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
100 const char **dptr, unsigned int *datalen) 119 const char **dptr, unsigned int *datalen)
101{ 120{
102 enum ip_conntrack_info ctinfo; 121 enum ip_conntrack_info ctinfo;
103 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 122 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
104 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 123 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
105 unsigned int dataoff, matchoff, matchlen; 124 unsigned int coff, matchoff, matchlen;
125 enum sip_header_types hdr;
106 union nf_inet_addr addr; 126 union nf_inet_addr addr;
107 __be16 port; 127 __be16 port;
108 int request, in_header; 128 int request, in_header;
@@ -112,16 +132,21 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
112 if (ct_sip_parse_request(ct, *dptr, *datalen, 132 if (ct_sip_parse_request(ct, *dptr, *datalen,
113 &matchoff, &matchlen, 133 &matchoff, &matchlen,
114 &addr, &port) > 0 && 134 &addr, &port) > 0 &&
115 !map_addr(skb, dptr, datalen, matchoff, matchlen, 135 !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
116 &addr, port)) 136 &addr, port))
117 return NF_DROP; 137 return NF_DROP;
118 request = 1; 138 request = 1;
119 } else 139 } else
120 request = 0; 140 request = 0;
121 141
142 if (nf_ct_protonum(ct) == IPPROTO_TCP)
143 hdr = SIP_HDR_VIA_TCP;
144 else
145 hdr = SIP_HDR_VIA_UDP;
146
122 /* Translate topmost Via header and parameters */ 147 /* Translate topmost Via header and parameters */
123 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, 148 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
124 SIP_HDR_VIA, NULL, &matchoff, &matchlen, 149 hdr, NULL, &matchoff, &matchlen,
125 &addr, &port) > 0) { 150 &addr, &port) > 0) {
126 unsigned int matchend, poff, plen, buflen, n; 151 unsigned int matchend, poff, plen, buflen, n;
127 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; 152 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
@@ -138,7 +163,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
138 goto next; 163 goto next;
139 } 164 }
140 165
141 if (!map_addr(skb, dptr, datalen, matchoff, matchlen, 166 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
142 &addr, port)) 167 &addr, port))
143 return NF_DROP; 168 return NF_DROP;
144 169
@@ -153,8 +178,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
153 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { 178 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
154 buflen = sprintf(buffer, "%pI4", 179 buflen = sprintf(buffer, "%pI4",
155 &ct->tuplehash[!dir].tuple.dst.u3.ip); 180 &ct->tuplehash[!dir].tuple.dst.u3.ip);
156 if (!mangle_packet(skb, dptr, datalen, poff, plen, 181 if (!mangle_packet(skb, dataoff, dptr, datalen,
157 buffer, buflen)) 182 poff, plen, buffer, buflen))
158 return NF_DROP; 183 return NF_DROP;
159 } 184 }
160 185
@@ -167,8 +192,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
167 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { 192 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
168 buflen = sprintf(buffer, "%pI4", 193 buflen = sprintf(buffer, "%pI4",
169 &ct->tuplehash[!dir].tuple.src.u3.ip); 194 &ct->tuplehash[!dir].tuple.src.u3.ip);
170 if (!mangle_packet(skb, dptr, datalen, poff, plen, 195 if (!mangle_packet(skb, dataoff, dptr, datalen,
171 buffer, buflen)) 196 poff, plen, buffer, buflen))
172 return NF_DROP; 197 return NF_DROP;
173 } 198 }
174 199
@@ -181,31 +206,45 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
181 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { 206 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
182 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; 207 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
183 buflen = sprintf(buffer, "%u", ntohs(p)); 208 buflen = sprintf(buffer, "%u", ntohs(p));
184 if (!mangle_packet(skb, dptr, datalen, poff, plen, 209 if (!mangle_packet(skb, dataoff, dptr, datalen,
185 buffer, buflen)) 210 poff, plen, buffer, buflen))
186 return NF_DROP; 211 return NF_DROP;
187 } 212 }
188 } 213 }
189 214
190next: 215next:
191 /* Translate Contact headers */ 216 /* Translate Contact headers */
192 dataoff = 0; 217 coff = 0;
193 in_header = 0; 218 in_header = 0;
194 while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, 219 while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
195 SIP_HDR_CONTACT, &in_header, 220 SIP_HDR_CONTACT, &in_header,
196 &matchoff, &matchlen, 221 &matchoff, &matchlen,
197 &addr, &port) > 0) { 222 &addr, &port) > 0) {
198 if (!map_addr(skb, dptr, datalen, matchoff, matchlen, 223 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
199 &addr, port)) 224 &addr, port))
200 return NF_DROP; 225 return NF_DROP;
201 } 226 }
202 227
203 if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || 228 if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
204 !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) 229 !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
205 return NF_DROP; 230 return NF_DROP;
231
206 return NF_ACCEPT; 232 return NF_ACCEPT;
207} 233}
208 234
235static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
236{
237 enum ip_conntrack_info ctinfo;
238 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
239 const struct tcphdr *th;
240
241 if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
242 return;
243
244 th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
245 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
246}
247
209/* Handles expected signalling connections and media streams */ 248/* Handles expected signalling connections and media streams */
210static void ip_nat_sip_expected(struct nf_conn *ct, 249static void ip_nat_sip_expected(struct nf_conn *ct,
211 struct nf_conntrack_expect *exp) 250 struct nf_conntrack_expect *exp)
@@ -232,7 +271,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct,
232 } 271 }
233} 272}
234 273
235static unsigned int ip_nat_sip_expect(struct sk_buff *skb, 274static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
236 const char **dptr, unsigned int *datalen, 275 const char **dptr, unsigned int *datalen,
237 struct nf_conntrack_expect *exp, 276 struct nf_conntrack_expect *exp,
238 unsigned int matchoff, 277 unsigned int matchoff,
@@ -279,8 +318,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
279 if (exp->tuple.dst.u3.ip != exp->saved_ip || 318 if (exp->tuple.dst.u3.ip != exp->saved_ip ||
280 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { 319 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
281 buflen = sprintf(buffer, "%pI4:%u", &newip, port); 320 buflen = sprintf(buffer, "%pI4:%u", &newip, port);
282 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, 321 if (!mangle_packet(skb, dataoff, dptr, datalen,
283 buffer, buflen)) 322 matchoff, matchlen, buffer, buflen))
284 goto err; 323 goto err;
285 } 324 }
286 return NF_ACCEPT; 325 return NF_ACCEPT;
@@ -290,7 +329,7 @@ err:
290 return NF_DROP; 329 return NF_DROP;
291} 330}
292 331
293static int mangle_content_len(struct sk_buff *skb, 332static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
294 const char **dptr, unsigned int *datalen) 333 const char **dptr, unsigned int *datalen)
295{ 334{
296 enum ip_conntrack_info ctinfo; 335 enum ip_conntrack_info ctinfo;
@@ -312,12 +351,13 @@ static int mangle_content_len(struct sk_buff *skb,
312 return 0; 351 return 0;
313 352
314 buflen = sprintf(buffer, "%u", c_len); 353 buflen = sprintf(buffer, "%u", c_len);
315 return mangle_packet(skb, dptr, datalen, matchoff, matchlen, 354 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
316 buffer, buflen); 355 buffer, buflen);
317} 356}
318 357
319static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, 358static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
320 unsigned int dataoff, unsigned int *datalen, 359 const char **dptr, unsigned int *datalen,
360 unsigned int sdpoff,
321 enum sdp_header_types type, 361 enum sdp_header_types type,
322 enum sdp_header_types term, 362 enum sdp_header_types term,
323 char *buffer, int buflen) 363 char *buffer, int buflen)
@@ -326,16 +366,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
326 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 366 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
327 unsigned int matchlen, matchoff; 367 unsigned int matchlen, matchoff;
328 368
329 if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, 369 if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
330 &matchoff, &matchlen) <= 0) 370 &matchoff, &matchlen) <= 0)
331 return -ENOENT; 371 return -ENOENT;
332 return mangle_packet(skb, dptr, datalen, matchoff, matchlen, 372 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
333 buffer, buflen) ? 0 : -EINVAL; 373 buffer, buflen) ? 0 : -EINVAL;
334} 374}
335 375
336static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, 376static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
337 unsigned int dataoff, 377 const char **dptr, unsigned int *datalen,
338 unsigned int *datalen, 378 unsigned int sdpoff,
339 enum sdp_header_types type, 379 enum sdp_header_types type,
340 enum sdp_header_types term, 380 enum sdp_header_types term,
341 const union nf_inet_addr *addr) 381 const union nf_inet_addr *addr)
@@ -344,16 +384,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
344 unsigned int buflen; 384 unsigned int buflen;
345 385
346 buflen = sprintf(buffer, "%pI4", &addr->ip); 386 buflen = sprintf(buffer, "%pI4", &addr->ip);
347 if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, 387 if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
348 buffer, buflen)) 388 buffer, buflen))
349 return 0; 389 return 0;
350 390
351 return mangle_content_len(skb, dptr, datalen); 391 return mangle_content_len(skb, dataoff, dptr, datalen);
352} 392}
353 393
354static unsigned int ip_nat_sdp_port(struct sk_buff *skb, 394static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
355 const char **dptr, 395 const char **dptr, unsigned int *datalen,
356 unsigned int *datalen,
357 unsigned int matchoff, 396 unsigned int matchoff,
358 unsigned int matchlen, 397 unsigned int matchlen,
359 u_int16_t port) 398 u_int16_t port)
@@ -362,16 +401,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
362 unsigned int buflen; 401 unsigned int buflen;
363 402
364 buflen = sprintf(buffer, "%u", port); 403 buflen = sprintf(buffer, "%u", port);
365 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, 404 if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
366 buffer, buflen)) 405 buffer, buflen))
367 return 0; 406 return 0;
368 407
369 return mangle_content_len(skb, dptr, datalen); 408 return mangle_content_len(skb, dataoff, dptr, datalen);
370} 409}
371 410
372static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, 411static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
373 unsigned int dataoff, 412 const char **dptr, unsigned int *datalen,
374 unsigned int *datalen, 413 unsigned int sdpoff,
375 const union nf_inet_addr *addr) 414 const union nf_inet_addr *addr)
376{ 415{
377 char buffer[sizeof("nnn.nnn.nnn.nnn")]; 416 char buffer[sizeof("nnn.nnn.nnn.nnn")];
@@ -379,12 +418,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
379 418
380 /* Mangle session description owner and contact addresses */ 419 /* Mangle session description owner and contact addresses */
381 buflen = sprintf(buffer, "%pI4", &addr->ip); 420 buflen = sprintf(buffer, "%pI4", &addr->ip);
382 if (mangle_sdp_packet(skb, dptr, dataoff, datalen, 421 if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
383 SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, 422 SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
384 buffer, buflen)) 423 buffer, buflen))
385 return 0; 424 return 0;
386 425
387 switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, 426 switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
388 SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, 427 SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
389 buffer, buflen)) { 428 buffer, buflen)) {
390 case 0: 429 case 0:
@@ -401,14 +440,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
401 return 0; 440 return 0;
402 } 441 }
403 442
404 return mangle_content_len(skb, dptr, datalen); 443 return mangle_content_len(skb, dataoff, dptr, datalen);
405} 444}
406 445
407/* So, this packet has hit the connection tracking matching code. 446/* So, this packet has hit the connection tracking matching code.
408 Mangle it, and change the expectation to match the new version. */ 447 Mangle it, and change the expectation to match the new version. */
409static unsigned int ip_nat_sdp_media(struct sk_buff *skb, 448static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
410 const char **dptr, 449 const char **dptr, unsigned int *datalen,
411 unsigned int *datalen,
412 struct nf_conntrack_expect *rtp_exp, 450 struct nf_conntrack_expect *rtp_exp,
413 struct nf_conntrack_expect *rtcp_exp, 451 struct nf_conntrack_expect *rtcp_exp,
414 unsigned int mediaoff, 452 unsigned int mediaoff,
@@ -456,7 +494,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
456 494
457 /* Update media port. */ 495 /* Update media port. */
458 if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && 496 if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
459 !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) 497 !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
498 mediaoff, medialen, port))
460 goto err2; 499 goto err2;
461 500
462 return NF_ACCEPT; 501 return NF_ACCEPT;
@@ -471,6 +510,7 @@ err1:
471static void __exit nf_nat_sip_fini(void) 510static void __exit nf_nat_sip_fini(void)
472{ 511{
473 rcu_assign_pointer(nf_nat_sip_hook, NULL); 512 rcu_assign_pointer(nf_nat_sip_hook, NULL);
513 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
474 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); 514 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
475 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); 515 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
476 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); 516 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
@@ -482,12 +522,14 @@ static void __exit nf_nat_sip_fini(void)
482static int __init nf_nat_sip_init(void) 522static int __init nf_nat_sip_init(void)
483{ 523{
484 BUG_ON(nf_nat_sip_hook != NULL); 524 BUG_ON(nf_nat_sip_hook != NULL);
525 BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
485 BUG_ON(nf_nat_sip_expect_hook != NULL); 526 BUG_ON(nf_nat_sip_expect_hook != NULL);
486 BUG_ON(nf_nat_sdp_addr_hook != NULL); 527 BUG_ON(nf_nat_sdp_addr_hook != NULL);
487 BUG_ON(nf_nat_sdp_port_hook != NULL); 528 BUG_ON(nf_nat_sdp_port_hook != NULL);
488 BUG_ON(nf_nat_sdp_session_hook != NULL); 529 BUG_ON(nf_nat_sdp_session_hook != NULL);
489 BUG_ON(nf_nat_sdp_media_hook != NULL); 530 BUG_ON(nf_nat_sdp_media_hook != NULL);
490 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); 531 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
532 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
491 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); 533 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
492 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); 534 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
493 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); 535 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index d9521f6f9ed0..0b9c7ce3d6c5 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1038,7 +1038,7 @@ static int snmp_parse_mangle(unsigned char *msg,
1038 unsigned int cls, con, tag, vers, pdutype; 1038 unsigned int cls, con, tag, vers, pdutype;
1039 struct asn1_ctx ctx; 1039 struct asn1_ctx ctx;
1040 struct asn1_octstr comm; 1040 struct asn1_octstr comm;
1041 struct snmp_object **obj; 1041 struct snmp_object *obj;
1042 1042
1043 if (debug > 1) 1043 if (debug > 1)
1044 hex_dump(msg, len); 1044 hex_dump(msg, len);
@@ -1148,43 +1148,34 @@ static int snmp_parse_mangle(unsigned char *msg,
1148 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) 1148 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
1149 return 0; 1149 return 0;
1150 1150
1151 obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
1152 if (obj == NULL) {
1153 if (net_ratelimit())
1154 printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
1155 return 0;
1156 }
1157
1158 while (!asn1_eoc_decode(&ctx, eoc)) { 1151 while (!asn1_eoc_decode(&ctx, eoc)) {
1159 unsigned int i; 1152 unsigned int i;
1160 1153
1161 if (!snmp_object_decode(&ctx, obj)) { 1154 if (!snmp_object_decode(&ctx, &obj)) {
1162 if (*obj) { 1155 if (obj) {
1163 kfree((*obj)->id); 1156 kfree(obj->id);
1164 kfree(*obj); 1157 kfree(obj);
1165 } 1158 }
1166 kfree(obj);
1167 return 0; 1159 return 0;
1168 } 1160 }
1169 1161
1170 if (debug > 1) { 1162 if (debug > 1) {
1171 printk(KERN_DEBUG "bsalg: object: "); 1163 printk(KERN_DEBUG "bsalg: object: ");
1172 for (i = 0; i < (*obj)->id_len; i++) { 1164 for (i = 0; i < obj->id_len; i++) {
1173 if (i > 0) 1165 if (i > 0)
1174 printk("."); 1166 printk(".");
1175 printk("%lu", (*obj)->id[i]); 1167 printk("%lu", obj->id[i]);
1176 } 1168 }
1177 printk(": type=%u\n", (*obj)->type); 1169 printk(": type=%u\n", obj->type);
1178 1170
1179 } 1171 }
1180 1172
1181 if ((*obj)->type == SNMP_IPADDR) 1173 if (obj->type == SNMP_IPADDR)
1182 mangle_address(ctx.begin, ctx.pointer - 4 , map, check); 1174 mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
1183 1175
1184 kfree((*obj)->id); 1176 kfree(obj->id);
1185 kfree(*obj); 1177 kfree(obj);
1186 } 1178 }
1187 kfree(obj);
1188 1179
1189 if (!asn1_eoc_decode(&ctx, eoc)) 1180 if (!asn1_eoc_decode(&ctx, eoc))
1190 return 0; 1181 return 0;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f25542c48b7d..242ed2307370 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
127 SNMP_MIB_SENTINEL 127 SNMP_MIB_SENTINEL
128}; 128};
129 129
130static struct { 130static const struct {
131 char *name; 131 const char *name;
132 int index; 132 int index;
133} icmpmibmap[] = { 133} icmpmibmap[] = {
134 { "DestUnreachs", ICMP_DEST_UNREACH }, 134 { "DestUnreachs", ICMP_DEST_UNREACH },
@@ -280,7 +280,7 @@ static void icmpmsg_put(struct seq_file *seq)
280 280
281 count = 0; 281 count = 0;
282 for (i = 0; i < ICMPMSG_MIB_MAX; i++) { 282 for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
283 val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); 283 val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i);
284 if (val) { 284 if (val) {
285 type[count] = i; 285 type[count] = i;
286 vals[count++] = val; 286 vals[count++] = val;
@@ -307,18 +307,18 @@ static void icmp_put(struct seq_file *seq)
307 for (i=0; icmpmibmap[i].name != NULL; i++) 307 for (i=0; icmpmibmap[i].name != NULL; i++)
308 seq_printf(seq, " Out%s", icmpmibmap[i].name); 308 seq_printf(seq, " Out%s", icmpmibmap[i].name);
309 seq_printf(seq, "\nIcmp: %lu %lu", 309 seq_printf(seq, "\nIcmp: %lu %lu",
310 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), 310 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
311 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); 311 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
312 for (i=0; icmpmibmap[i].name != NULL; i++) 312 for (i=0; icmpmibmap[i].name != NULL; i++)
313 seq_printf(seq, " %lu", 313 seq_printf(seq, " %lu",
314 snmp_fold_field((void **) net->mib.icmpmsg_statistics, 314 snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
315 icmpmibmap[i].index)); 315 icmpmibmap[i].index));
316 seq_printf(seq, " %lu %lu", 316 seq_printf(seq, " %lu %lu",
317 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), 317 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
318 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); 318 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
319 for (i=0; icmpmibmap[i].name != NULL; i++) 319 for (i=0; icmpmibmap[i].name != NULL; i++)
320 seq_printf(seq, " %lu", 320 seq_printf(seq, " %lu",
321 snmp_fold_field((void **) net->mib.icmpmsg_statistics, 321 snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
322 icmpmibmap[i].index | 0x100)); 322 icmpmibmap[i].index | 0x100));
323} 323}
324 324
@@ -341,7 +341,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
341 341
342 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 342 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
343 seq_printf(seq, " %lu", 343 seq_printf(seq, " %lu",
344 snmp_fold_field((void **)net->mib.ip_statistics, 344 snmp_fold_field((void __percpu **)net->mib.ip_statistics,
345 snmp4_ipstats_list[i].entry)); 345 snmp4_ipstats_list[i].entry));
346 346
347 icmp_put(seq); /* RFC 2011 compatibility */ 347 icmp_put(seq); /* RFC 2011 compatibility */
@@ -356,11 +356,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
356 /* MaxConn field is signed, RFC 2012 */ 356 /* MaxConn field is signed, RFC 2012 */
357 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) 357 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
358 seq_printf(seq, " %ld", 358 seq_printf(seq, " %ld",
359 snmp_fold_field((void **)net->mib.tcp_statistics, 359 snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
360 snmp4_tcp_list[i].entry)); 360 snmp4_tcp_list[i].entry));
361 else 361 else
362 seq_printf(seq, " %lu", 362 seq_printf(seq, " %lu",
363 snmp_fold_field((void **)net->mib.tcp_statistics, 363 snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
364 snmp4_tcp_list[i].entry)); 364 snmp4_tcp_list[i].entry));
365 } 365 }
366 366
@@ -371,7 +371,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
371 seq_puts(seq, "\nUdp:"); 371 seq_puts(seq, "\nUdp:");
372 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 372 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
373 seq_printf(seq, " %lu", 373 seq_printf(seq, " %lu",
374 snmp_fold_field((void **)net->mib.udp_statistics, 374 snmp_fold_field((void __percpu **)net->mib.udp_statistics,
375 snmp4_udp_list[i].entry)); 375 snmp4_udp_list[i].entry));
376 376
377 /* the UDP and UDP-Lite MIBs are the same */ 377 /* the UDP and UDP-Lite MIBs are the same */
@@ -382,7 +382,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
382 seq_puts(seq, "\nUdpLite:"); 382 seq_puts(seq, "\nUdpLite:");
383 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 383 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
384 seq_printf(seq, " %lu", 384 seq_printf(seq, " %lu",
385 snmp_fold_field((void **)net->mib.udplite_statistics, 385 snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
386 snmp4_udp_list[i].entry)); 386 snmp4_udp_list[i].entry));
387 387
388 seq_putc(seq, '\n'); 388 seq_putc(seq, '\n');
@@ -419,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
419 seq_puts(seq, "\nTcpExt:"); 419 seq_puts(seq, "\nTcpExt:");
420 for (i = 0; snmp4_net_list[i].name != NULL; i++) 420 for (i = 0; snmp4_net_list[i].name != NULL; i++)
421 seq_printf(seq, " %lu", 421 seq_printf(seq, " %lu",
422 snmp_fold_field((void **)net->mib.net_statistics, 422 snmp_fold_field((void __percpu **)net->mib.net_statistics,
423 snmp4_net_list[i].entry)); 423 snmp4_net_list[i].entry));
424 424
425 seq_puts(seq, "\nIpExt:"); 425 seq_puts(seq, "\nIpExt:");
@@ -429,7 +429,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
429 seq_puts(seq, "\nIpExt:"); 429 seq_puts(seq, "\nIpExt:");
430 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 430 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
431 seq_printf(seq, " %lu", 431 seq_printf(seq, " %lu",
432 snmp_fold_field((void **)net->mib.ip_statistics, 432 snmp_fold_field((void __percpu **)net->mib.ip_statistics,
433 snmp4_ipextstats_list[i].entry)); 433 snmp4_ipextstats_list[i].entry));
434 434
435 seq_putc(seq, '\n'); 435 seq_putc(seq, '\n');
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d62b05d33384..04762d3bef71 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1990,8 +1990,13 @@ static int __mkroute_input(struct sk_buff *skb,
1990 if (skb->protocol != htons(ETH_P_IP)) { 1990 if (skb->protocol != htons(ETH_P_IP)) {
1991 /* Not IP (i.e. ARP). Do not create route, if it is 1991 /* Not IP (i.e. ARP). Do not create route, if it is
1992 * invalid for proxy arp. DNAT routes are always valid. 1992 * invalid for proxy arp. DNAT routes are always valid.
1993 *
1994 * Proxy arp feature have been extended to allow, ARP
1995 * replies back to the same interface, to support
1996 * Private VLAN switch technologies. See arp.c.
1993 */ 1997 */
1994 if (out_dev == in_dev) { 1998 if (out_dev == in_dev &&
1999 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1995 err = -EINVAL; 2000 err = -EINVAL;
1996 goto cleanup; 2001 goto cleanup;
1997 } 2002 }
@@ -3329,7 +3334,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = {
3329 3334
3330 3335
3331#ifdef CONFIG_NET_CLS_ROUTE 3336#ifdef CONFIG_NET_CLS_ROUTE
3332struct ip_rt_acct *ip_rt_acct __read_mostly; 3337struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3333#endif /* CONFIG_NET_CLS_ROUTE */ 3338#endif /* CONFIG_NET_CLS_ROUTE */
3334 3339
3335static __initdata unsigned long rhash_entries; 3340static __initdata unsigned long rhash_entries;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 66fd80ef2473..5c24db4a3c91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
358 358
359 tcp_select_initial_window(tcp_full_space(sk), req->mss, 359 tcp_select_initial_window(tcp_full_space(sk), req->mss,
360 &req->rcv_wnd, &req->window_clamp, 360 &req->rcv_wnd, &req->window_clamp,
361 ireq->wscale_ok, &rcv_wscale); 361 ireq->wscale_ok, &rcv_wscale,
362 dst_metric(&rt->u.dst, RTAX_INITRWND));
362 363
363 ireq->rcv_wscale = rcv_wscale; 364 ireq->rcv_wscale = rcv_wscale;
364 365
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712ce3994..c1bc074f61b7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,20 @@ static struct ctl_table ipv4_table[] = {
576 .proc_handler = proc_dointvec 576 .proc_handler = proc_dointvec
577 }, 577 },
578 { 578 {
579 .procname = "tcp_thin_linear_timeouts",
580 .data = &sysctl_tcp_thin_linear_timeouts,
581 .maxlen = sizeof(int),
582 .mode = 0644,
583 .proc_handler = proc_dointvec
584 },
585 {
586 .procname = "tcp_thin_dupack",
587 .data = &sysctl_tcp_thin_dupack,
588 .maxlen = sizeof(int),
589 .mode = 0644,
590 .proc_handler = proc_dointvec
591 },
592 {
579 .procname = "udp_mem", 593 .procname = "udp_mem",
580 .data = &sysctl_udp_mem, 594 .data = &sysctl_udp_mem,
581 .maxlen = sizeof(sysctl_udp_mem), 595 .maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb25e2e..5901010fad55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
536 tp->nonagle &= ~TCP_NAGLE_PUSH; 536 tp->nonagle &= ~TCP_NAGLE_PUSH;
537} 537}
538 538
539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, 539static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
540 struct sk_buff *skb)
541{ 540{
542 if (flags & MSG_OOB) 541 if (flags & MSG_OOB)
543 tp->snd_up = tp->write_seq; 542 tp->snd_up = tp->write_seq;
@@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
546static inline void tcp_push(struct sock *sk, int flags, int mss_now, 545static inline void tcp_push(struct sock *sk, int flags, int mss_now,
547 int nonagle) 546 int nonagle)
548{ 547{
549 struct tcp_sock *tp = tcp_sk(sk);
550
551 if (tcp_send_head(sk)) { 548 if (tcp_send_head(sk)) {
552 struct sk_buff *skb = tcp_write_queue_tail(sk); 549 struct tcp_sock *tp = tcp_sk(sk);
550
553 if (!(flags & MSG_MORE) || forced_push(tp)) 551 if (!(flags & MSG_MORE) || forced_push(tp))
554 tcp_mark_push(tp, skb); 552 tcp_mark_push(tp, tcp_write_queue_tail(sk));
555 tcp_mark_urg(tp, flags, skb); 553
554 tcp_mark_urg(tp, flags);
556 __tcp_push_pending_frames(sk, mss_now, 555 __tcp_push_pending_frames(sk, mss_now,
557 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 556 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
558 } 557 }
@@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
877#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 876#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
878#define TCP_OFF(sk) (sk->sk_sndmsg_off) 877#define TCP_OFF(sk) (sk->sk_sndmsg_off)
879 878
880static inline int select_size(struct sock *sk) 879static inline int select_size(struct sock *sk, int sg)
881{ 880{
882 struct tcp_sock *tp = tcp_sk(sk); 881 struct tcp_sock *tp = tcp_sk(sk);
883 int tmp = tp->mss_cache; 882 int tmp = tp->mss_cache;
884 883
885 if (sk->sk_route_caps & NETIF_F_SG) { 884 if (sg) {
886 if (sk_can_gso(sk)) 885 if (sk_can_gso(sk))
887 tmp = 0; 886 tmp = 0;
888 else { 887 else {
@@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
906 struct sk_buff *skb; 905 struct sk_buff *skb;
907 int iovlen, flags; 906 int iovlen, flags;
908 int mss_now, size_goal; 907 int mss_now, size_goal;
909 int err, copied; 908 int sg, err, copied;
910 long timeo; 909 long timeo;
911 910
912 lock_sock(sk); 911 lock_sock(sk);
@@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
934 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 933 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
935 goto out_err; 934 goto out_err;
936 935
936 sg = sk->sk_route_caps & NETIF_F_SG;
937
937 while (--iovlen >= 0) { 938 while (--iovlen >= 0) {
938 int seglen = iov->iov_len; 939 int seglen = iov->iov_len;
939 unsigned char __user *from = iov->iov_base; 940 unsigned char __user *from = iov->iov_base;
@@ -959,8 +960,9 @@ new_segment:
959 if (!sk_stream_memory_free(sk)) 960 if (!sk_stream_memory_free(sk))
960 goto wait_for_sndbuf; 961 goto wait_for_sndbuf;
961 962
962 skb = sk_stream_alloc_skb(sk, select_size(sk), 963 skb = sk_stream_alloc_skb(sk,
963 sk->sk_allocation); 964 select_size(sk, sg),
965 sk->sk_allocation);
964 if (!skb) 966 if (!skb)
965 goto wait_for_memory; 967 goto wait_for_memory;
966 968
@@ -997,9 +999,7 @@ new_segment:
997 /* We can extend the last page 999 /* We can extend the last page
998 * fragment. */ 1000 * fragment. */
999 merge = 1; 1001 merge = 1;
1000 } else if (i == MAX_SKB_FRAGS || 1002 } else if (i == MAX_SKB_FRAGS || !sg) {
1001 (!i &&
1002 !(sk->sk_route_caps & NETIF_F_SG))) {
1003 /* Need to add new fragment and cannot 1003 /* Need to add new fragment and cannot
1004 * do this because interface is non-SG, 1004 * do this because interface is non-SG,
1005 * or because all the page slots are 1005 * or because all the page slots are
@@ -2229,6 +2229,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2229 } 2229 }
2230 break; 2230 break;
2231 2231
2232 case TCP_THIN_LINEAR_TIMEOUTS:
2233 if (val < 0 || val > 1)
2234 err = -EINVAL;
2235 else
2236 tp->thin_lto = val;
2237 break;
2238
2239 case TCP_THIN_DUPACK:
2240 if (val < 0 || val > 1)
2241 err = -EINVAL;
2242 else
2243 tp->thin_dupack = val;
2244 break;
2245
2232 case TCP_CORK: 2246 case TCP_CORK:
2233 /* When set indicates to always queue non-full frames. 2247 /* When set indicates to always queue non-full frames.
2234 * Later the user clears this option and we transmit 2248 * Later the user clears this option and we transmit
@@ -2788,10 +2802,10 @@ EXPORT_SYMBOL(tcp_gro_complete);
2788 2802
2789#ifdef CONFIG_TCP_MD5SIG 2803#ifdef CONFIG_TCP_MD5SIG
2790static unsigned long tcp_md5sig_users; 2804static unsigned long tcp_md5sig_users;
2791static struct tcp_md5sig_pool **tcp_md5sig_pool; 2805static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
2792static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); 2806static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2793 2807
2794static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) 2808static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
2795{ 2809{
2796 int cpu; 2810 int cpu;
2797 for_each_possible_cpu(cpu) { 2811 for_each_possible_cpu(cpu) {
@@ -2808,7 +2822,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
2808 2822
2809void tcp_free_md5sig_pool(void) 2823void tcp_free_md5sig_pool(void)
2810{ 2824{
2811 struct tcp_md5sig_pool **pool = NULL; 2825 struct tcp_md5sig_pool * __percpu *pool = NULL;
2812 2826
2813 spin_lock_bh(&tcp_md5sig_pool_lock); 2827 spin_lock_bh(&tcp_md5sig_pool_lock);
2814 if (--tcp_md5sig_users == 0) { 2828 if (--tcp_md5sig_users == 0) {
@@ -2822,10 +2836,11 @@ void tcp_free_md5sig_pool(void)
2822 2836
2823EXPORT_SYMBOL(tcp_free_md5sig_pool); 2837EXPORT_SYMBOL(tcp_free_md5sig_pool);
2824 2838
2825static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) 2839static struct tcp_md5sig_pool * __percpu *
2840__tcp_alloc_md5sig_pool(struct sock *sk)
2826{ 2841{
2827 int cpu; 2842 int cpu;
2828 struct tcp_md5sig_pool **pool; 2843 struct tcp_md5sig_pool * __percpu *pool;
2829 2844
2830 pool = alloc_percpu(struct tcp_md5sig_pool *); 2845 pool = alloc_percpu(struct tcp_md5sig_pool *);
2831 if (!pool) 2846 if (!pool)
@@ -2852,9 +2867,9 @@ out_free:
2852 return NULL; 2867 return NULL;
2853} 2868}
2854 2869
2855struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) 2870struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2856{ 2871{
2857 struct tcp_md5sig_pool **pool; 2872 struct tcp_md5sig_pool * __percpu *pool;
2858 int alloc = 0; 2873 int alloc = 0;
2859 2874
2860retry: 2875retry:
@@ -2873,7 +2888,9 @@ retry:
2873 2888
2874 if (alloc) { 2889 if (alloc) {
2875 /* we cannot hold spinlock here because this may sleep. */ 2890 /* we cannot hold spinlock here because this may sleep. */
2876 struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); 2891 struct tcp_md5sig_pool * __percpu *p;
2892
2893 p = __tcp_alloc_md5sig_pool(sk);
2877 spin_lock_bh(&tcp_md5sig_pool_lock); 2894 spin_lock_bh(&tcp_md5sig_pool_lock);
2878 if (!p) { 2895 if (!p) {
2879 tcp_md5sig_users--; 2896 tcp_md5sig_users--;
@@ -2897,7 +2914,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2897 2914
2898struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) 2915struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
2899{ 2916{
2900 struct tcp_md5sig_pool **p; 2917 struct tcp_md5sig_pool * __percpu *p;
2901 spin_lock_bh(&tcp_md5sig_pool_lock); 2918 spin_lock_bh(&tcp_md5sig_pool_lock);
2902 p = tcp_md5sig_pool; 2919 p = tcp_md5sig_pool;
2903 if (p) 2920 if (p)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3fddc69ccccc..788851ca8c5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
89int sysctl_tcp_frto_response __read_mostly; 89int sysctl_tcp_frto_response __read_mostly;
90int sysctl_tcp_nometrics_save __read_mostly; 90int sysctl_tcp_nometrics_save __read_mostly;
91 91
92int sysctl_tcp_thin_dupack __read_mostly;
93
92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 94int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
93int sysctl_tcp_abc __read_mostly; 95int sysctl_tcp_abc __read_mostly;
94 96
@@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk)
2447 return 1; 2449 return 1;
2448 } 2450 }
2449 2451
2452 /* If a thin stream is detected, retransmit after first
2453 * received dupack. Employ only if SACK is supported in order
2454 * to avoid possible corner-case series of spurious retransmissions
2455 * Use only if there are no unsent data.
2456 */
2457 if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
2458 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2459 tcp_is_sack(tp) && !tcp_send_head(sk))
2460 return 1;
2461
2450 return 0; 2462 return 0;
2451} 2463}
2452 2464
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebfd078a..c3588b4fd979 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -742,9 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
742 * This still operates on a request_sock only, not on a big 742 * This still operates on a request_sock only, not on a big
743 * socket. 743 * socket.
744 */ 744 */
745static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 745static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
746 struct request_sock *req, 746 struct request_sock *req,
747 struct request_values *rvp) 747 struct request_values *rvp)
748{ 748{
749 const struct inet_request_sock *ireq = inet_rsk(req); 749 const struct inet_request_sock *ireq = inet_rsk(req);
750 int err = -1; 750 int err = -1;
@@ -775,10 +775,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
775 return err; 775 return err;
776} 776}
777 777
778static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 778static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
779 struct request_values *rvp) 779 struct request_values *rvp)
780{ 780{
781 return __tcp_v4_send_synack(sk, NULL, req, rvp); 781 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
782 return tcp_v4_send_synack(sk, NULL, req, rvp);
782} 783}
783 784
784/* 785/*
@@ -1192,10 +1193,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1192struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1193struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1193 .family = PF_INET, 1194 .family = PF_INET,
1194 .obj_size = sizeof(struct tcp_request_sock), 1195 .obj_size = sizeof(struct tcp_request_sock),
1195 .rtx_syn_ack = tcp_v4_send_synack, 1196 .rtx_syn_ack = tcp_v4_rtx_synack,
1196 .send_ack = tcp_v4_reqsk_send_ack, 1197 .send_ack = tcp_v4_reqsk_send_ack,
1197 .destructor = tcp_v4_reqsk_destructor, 1198 .destructor = tcp_v4_reqsk_destructor,
1198 .send_reset = tcp_v4_send_reset, 1199 .send_reset = tcp_v4_send_reset,
1200 .syn_ack_timeout = tcp_syn_ack_timeout,
1199}; 1201};
1200 1202
1201#ifdef CONFIG_TCP_MD5SIG 1203#ifdef CONFIG_TCP_MD5SIG
@@ -1373,8 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1373 } 1375 }
1374 tcp_rsk(req)->snt_isn = isn; 1376 tcp_rsk(req)->snt_isn = isn;
1375 1377
1376 if (__tcp_v4_send_synack(sk, dst, req, 1378 if (tcp_v4_send_synack(sk, dst, req,
1377 (struct request_values *)&tmp_ext) || 1379 (struct request_values *)&tmp_ext) ||
1378 want_cookie) 1380 want_cookie)
1379 goto drop_and_free; 1381 goto drop_and_free;
1380 1382
@@ -1649,6 +1651,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
1649 if (!sk) 1651 if (!sk)
1650 goto no_tcp_socket; 1652 goto no_tcp_socket;
1651 1653
1654 if (iph->ttl < inet_sk(sk)->min_ttl)
1655 goto discard_and_relse;
1656
1652process: 1657process:
1653 if (sk->sk_state == TCP_TIME_WAIT) 1658 if (sk->sk_state == TCP_TIME_WAIT)
1654 goto do_time_wait; 1659 goto do_time_wait;
@@ -2425,12 +2430,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2425 }, 2430 },
2426}; 2431};
2427 2432
2428static int tcp4_proc_init_net(struct net *net) 2433static int __net_init tcp4_proc_init_net(struct net *net)
2429{ 2434{
2430 return tcp_proc_register(net, &tcp4_seq_afinfo); 2435 return tcp_proc_register(net, &tcp4_seq_afinfo);
2431} 2436}
2432 2437
2433static void tcp4_proc_exit_net(struct net *net) 2438static void __net_exit tcp4_proc_exit_net(struct net *net)
2434{ 2439{
2435 tcp_proc_unregister(net, &tcp4_seq_afinfo); 2440 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2436} 2441}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383ce237640f..4a1605d3f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
183 */ 183 */
184void tcp_select_initial_window(int __space, __u32 mss, 184void tcp_select_initial_window(int __space, __u32 mss,
185 __u32 *rcv_wnd, __u32 *window_clamp, 185 __u32 *rcv_wnd, __u32 *window_clamp,
186 int wscale_ok, __u8 *rcv_wscale) 186 int wscale_ok, __u8 *rcv_wscale,
187 __u32 init_rcv_wnd)
187{ 188{
188 unsigned int space = (__space < 0 ? 0 : __space); 189 unsigned int space = (__space < 0 ? 0 : __space);
189 190
@@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
232 init_cwnd = 2; 233 init_cwnd = 2;
233 else if (mss > 1460) 234 else if (mss > 1460)
234 init_cwnd = 3; 235 init_cwnd = 3;
235 if (*rcv_wnd > init_cwnd * mss) 236 /* when initializing use the value from init_rcv_wnd
237 * rather than the default from above
238 */
239 if (init_rcv_wnd &&
240 (*rcv_wnd > init_rcv_wnd * mss))
241 *rcv_wnd = init_rcv_wnd * mss;
242 else if (*rcv_wnd > init_cwnd * mss)
236 *rcv_wnd = init_cwnd * mss; 243 *rcv_wnd = init_cwnd * mss;
237 } 244 }
238 245
@@ -1794,11 +1801,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1794void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, 1801void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1795 int nonagle) 1802 int nonagle)
1796{ 1803{
1797 struct sk_buff *skb = tcp_send_head(sk);
1798
1799 if (!skb)
1800 return;
1801
1802 /* If we are closed, the bytes will have to remain here. 1804 /* If we are closed, the bytes will have to remain here.
1803 * In time closedown will finish, we empty the write queue and 1805 * In time closedown will finish, we empty the write queue and
1804 * all will be happy. 1806 * all will be happy.
@@ -2422,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2422 &req->rcv_wnd, 2424 &req->rcv_wnd,
2423 &req->window_clamp, 2425 &req->window_clamp,
2424 ireq->wscale_ok, 2426 ireq->wscale_ok,
2425 &rcv_wscale); 2427 &rcv_wscale,
2428 dst_metric(dst, RTAX_INITRWND));
2426 ireq->rcv_wscale = rcv_wscale; 2429 ireq->rcv_wscale = rcv_wscale;
2427 } 2430 }
2428 2431
@@ -2549,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk)
2549 &tp->rcv_wnd, 2552 &tp->rcv_wnd,
2550 &tp->window_clamp, 2553 &tp->window_clamp,
2551 sysctl_tcp_window_scaling, 2554 sysctl_tcp_window_scaling,
2552 &rcv_wscale); 2555 &rcv_wscale,
2556 dst_metric(dst, RTAX_INITRWND));
2553 2557
2554 tp->rx_opt.rcv_wscale = rcv_wscale; 2558 tp->rx_opt.rcv_wscale = rcv_wscale;
2555 tp->rcv_ssthresh = tp->rcv_wnd; 2559 tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; 29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; 30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
31int sysctl_tcp_orphan_retries __read_mostly; 31int sysctl_tcp_orphan_retries __read_mostly;
32int sysctl_tcp_thin_linear_timeouts __read_mostly;
32 33
33static void tcp_write_timer(unsigned long); 34static void tcp_write_timer(unsigned long);
34static void tcp_delack_timer(unsigned long); 35static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
415 icsk->icsk_retransmits++; 416 icsk->icsk_retransmits++;
416 417
417out_reset_timer: 418out_reset_timer:
418 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 419 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
420 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
421 * might be increased if the stream oscillates between thin and thick,
422 * thus the old value might already be too high compared to the value
423 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
424 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
425 * exponential backoff behaviour to avoid continue hammering
426 * linear-timeout retransmissions into a black hole
427 */
428 if (sk->sk_state == TCP_ESTABLISHED &&
429 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
430 tcp_stream_is_thin(tp) &&
431 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
432 icsk->icsk_backoff = 0;
433 icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
434 } else {
435 /* Use normal (exponential) backoff */
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 }
419 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
420 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
421 __sk_dst_reset(sk); 440 __sk_dst_reset(sk);
@@ -474,6 +493,12 @@ static void tcp_synack_timer(struct sock *sk)
474 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 493 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
475} 494}
476 495
496void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
497{
498 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
499}
500EXPORT_SYMBOL(tcp_syn_ack_timeout);
501
477void tcp_set_keepalive(struct sock *sk, int val) 502void tcp_set_keepalive(struct sock *sk, int val)
478{ 503{
479 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) 504 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f0126fdd7e04..608a5446d05b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1117,7 +1117,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1117 struct inet_sock *inet = inet_sk(sk); 1117 struct inet_sock *inet = inet_sk(sk);
1118 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 1118 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
1119 struct sk_buff *skb; 1119 struct sk_buff *skb;
1120 unsigned int ulen, copied; 1120 unsigned int ulen;
1121 int peeked; 1121 int peeked;
1122 int err; 1122 int err;
1123 int is_udplite = IS_UDPLITE(sk); 1123 int is_udplite = IS_UDPLITE(sk);
@@ -1138,10 +1138,9 @@ try_again:
1138 goto out; 1138 goto out;
1139 1139
1140 ulen = skb->len - sizeof(struct udphdr); 1140 ulen = skb->len - sizeof(struct udphdr);
1141 copied = len; 1141 if (len > ulen)
1142 if (copied > ulen) 1142 len = ulen;
1143 copied = ulen; 1143 else if (len < ulen)
1144 else if (copied < ulen)
1145 msg->msg_flags |= MSG_TRUNC; 1144 msg->msg_flags |= MSG_TRUNC;
1146 1145
1147 /* 1146 /*
@@ -1150,14 +1149,14 @@ try_again:
1150 * coverage checksum (UDP-Lite), do it before the copy. 1149 * coverage checksum (UDP-Lite), do it before the copy.
1151 */ 1150 */
1152 1151
1153 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 1152 if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
1154 if (udp_lib_checksum_complete(skb)) 1153 if (udp_lib_checksum_complete(skb))
1155 goto csum_copy_err; 1154 goto csum_copy_err;
1156 } 1155 }
1157 1156
1158 if (skb_csum_unnecessary(skb)) 1157 if (skb_csum_unnecessary(skb))
1159 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 1158 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
1160 msg->msg_iov, copied); 1159 msg->msg_iov, len);
1161 else { 1160 else {
1162 err = skb_copy_and_csum_datagram_iovec(skb, 1161 err = skb_copy_and_csum_datagram_iovec(skb,
1163 sizeof(struct udphdr), 1162 sizeof(struct udphdr),
@@ -1186,7 +1185,7 @@ try_again:
1186 if (inet->cmsg_flags) 1185 if (inet->cmsg_flags)
1187 ip_cmsg_recv(msg, skb); 1186 ip_cmsg_recv(msg, skb);
1188 1187
1189 err = copied; 1188 err = len;
1190 if (flags & MSG_TRUNC) 1189 if (flags & MSG_TRUNC)
1191 err = ulen; 1190 err = ulen;
1192 1191
@@ -2027,12 +2026,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = {
2027 }, 2026 },
2028}; 2027};
2029 2028
2030static int udp4_proc_init_net(struct net *net) 2029static int __net_init udp4_proc_init_net(struct net *net)
2031{ 2030{
2032 return udp_proc_register(net, &udp4_seq_afinfo); 2031 return udp_proc_register(net, &udp4_seq_afinfo);
2033} 2032}
2034 2033
2035static void udp4_proc_exit_net(struct net *net) 2034static void __net_exit udp4_proc_exit_net(struct net *net)
2036{ 2035{
2037 udp_proc_unregister(net, &udp4_seq_afinfo); 2036 udp_proc_unregister(net, &udp4_seq_afinfo);
2038} 2037}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 66f79513f4a5..6610bf76369f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -81,12 +81,12 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = {
81 }, 81 },
82}; 82};
83 83
84static int udplite4_proc_init_net(struct net *net) 84static int __net_init udplite4_proc_init_net(struct net *net)
85{ 85{
86 return udp_proc_register(net, &udplite4_seq_afinfo); 86 return udp_proc_register(net, &udplite4_seq_afinfo);
87} 87}
88 88
89static void udplite4_proc_exit_net(struct net *net) 89static void __net_exit udplite4_proc_exit_net(struct net *net)
90{ 90{
91 udp_proc_unregister(net, &udplite4_seq_afinfo); 91 udp_proc_unregister(net, &udplite4_seq_afinfo);
92} 92}