aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorAnton Altaparmakov <aia21@cantab.net>2006-01-19 11:39:33 -0500
committerAnton Altaparmakov <aia21@cantab.net>2006-01-19 11:39:33 -0500
commit944d79559d154c12becde0dab327016cf438f46c (patch)
tree50c101806f4d3b6585222dda060559eb4f3e005a /net/ipv4
parentd087e4bdd24ebe3ae3d0b265b6573ec901af4b4b (diff)
parent0f36b018b2e314d45af86449f1a97facb1fbe300 (diff)
Merge branch 'master' of /usr/src/ntfs-2.6/
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/af_inet.c20
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/fib_hash.c1
-rw-r--r--net/ipv4/fib_rules.c1
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c169
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c251
-rw-r--r--net/ipv4/inet_hashtables.c178
-rw-r--r--net/ipv4/inet_timewait_sock.c5
-rw-r--r--net/ipv4/inetpeer.c7
-rw-r--r--net/ipv4/ip_fragment.c70
-rw-r--r--net/ipv4/ip_gre.c40
-rw-r--r--net/ipv4/ip_input.c16
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c46
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c23
-rw-r--r--net/ipv4/ipmr.c27
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c28
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter.c25
-rw-r--r--net/ipv4/netfilter/Kconfig256
-rw-r--r--net/ipv4/netfilter/Makefile23
-rw-r--r--net/ipv4/netfilter/arp_tables.c502
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c7
-rw-r--r--net/ipv4/netfilter/arptable_filter.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c10
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c36
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c50
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c25
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c57
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c81
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c38
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c34
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c16
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c126
-rw-r--r--net/ipv4/netfilter/ip_tables.c918
-rw-r--r--net/ipv4/netfilter/ipt_CLASSIFY.c90
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c122
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c2
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c3
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_MARK.c172
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c2
-rw-r--r--net/ipv4/netfilter/ipt_NFQUEUE.c70
-rw-r--r--net/ipv4/netfilter/ipt_NOTRACK.c76
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c5
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c2
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c3
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c2
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c12
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c4
-rw-r--r--net/ipv4/netfilter/ipt_ah.c6
-rw-r--r--net/ipv4/netfilter/ipt_comment.c59
-rw-r--r--net/ipv4/netfilter/ipt_connbytes.c161
-rw-r--r--net/ipv4/netfilter/ipt_connmark.c88
-rw-r--r--net/ipv4/netfilter/ipt_conntrack.c232
-rw-r--r--net/ipv4/netfilter/ipt_dccp.c176
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c4
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c5
-rw-r--r--net/ipv4/netfilter/ipt_esp.c6
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c3
-rw-r--r--net/ipv4/netfilter/ipt_helper.c167
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c4
-rw-r--r--net/ipv4/netfilter/ipt_length.c64
-rw-r--r--net/ipv4/netfilter/ipt_limit.c157
-rw-r--r--net/ipv4/netfilter/ipt_mac.c79
-rw-r--r--net/ipv4/netfilter/ipt_mark.c71
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c10
-rw-r--r--net/ipv4/netfilter/ipt_owner.c3
-rw-r--r--net/ipv4/netfilter/ipt_physdev.c134
-rw-r--r--net/ipv4/netfilter/ipt_pkttype.c70
-rw-r--r--net/ipv4/netfilter/ipt_policy.c173
-rw-r--r--net/ipv4/netfilter/ipt_realm.c76
-rw-r--r--net/ipv4/netfilter/ipt_recent.c26
-rw-r--r--net/ipv4/netfilter/ipt_sctp.c203
-rw-r--r--net/ipv4/netfilter/ipt_state.c74
-rw-r--r--net/ipv4/netfilter/ipt_string.c91
-rw-r--r--net/ipv4/netfilter/ipt_tcpmss.c127
-rw-r--r--net/ipv4/netfilter/ipt_tos.c3
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c4
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c83
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c97
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bic.c85
-rw-r--r--net/ipv4/tcp_cong.c28
-rw-r--r--net/ipv4/tcp_cubic.c411
-rw-r--r--net/ipv4/tcp_input.c101
-rw-r--r--net/ipv4/tcp_ipv4.c270
-rw-r--r--net/ipv4/tcp_minisocks.c16
-rw-r--r--net/ipv4/tcp_output.c118
-rw-r--r--net/ipv4/tcp_vegas.c4
-rw-r--r--net/ipv4/udp.c24
-rw-r--r--net/ipv4/xfrm4_input.c31
-rw-r--r--net/ipv4/xfrm4_output.c72
-rw-r--r--net/ipv4/xfrm4_state.c15
147 files changed, 2509 insertions, 5196 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f4..011cca7ae02b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
456 increase provides TCP friendliness. 456 increase provides TCP friendliness.
457 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ 457 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
458 458
459config TCP_CONG_CUBIC
460 tristate "CUBIC TCP"
461 default m
462 ---help---
463 This is version 2.0 of BIC-TCP which uses a cubic growth function
464 among other techniques.
465 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
466
459config TCP_CONG_WESTWOOD 467config TCP_CONG_WESTWOOD
460 tristate "TCP Westwood+" 468 tristate "TCP Westwood+"
461 default m 469 default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6b..35e5f5999092 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
12 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o 12 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
13 13
14obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 14obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
15obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o 15obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -28,12 +28,13 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
28obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o 28obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
29obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o 29obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
30obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o 30obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
31obj-$(CONFIG_NETFILTER) += netfilter/ 31obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
32obj-$(CONFIG_IP_VS) += ipvs/ 32obj-$(CONFIG_IP_VS) += ipvs/
33obj-$(CONFIG_INET_DIAG) += inet_diag.o 33obj-$(CONFIG_INET_DIAG) += inet_diag.o
34obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o 34obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
35obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 35obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
36obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 36obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
37obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
37obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o 38obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
38obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o 39obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
39obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o 40obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d368cf249000..97c276f95b35 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -79,6 +79,7 @@
79#include <linux/string.h> 79#include <linux/string.h>
80#include <linux/sockios.h> 80#include <linux/sockios.h>
81#include <linux/net.h> 81#include <linux/net.h>
82#include <linux/capability.h>
82#include <linux/fcntl.h> 83#include <linux/fcntl.h>
83#include <linux/mm.h> 84#include <linux/mm.h>
84#include <linux/interrupt.h> 85#include <linux/interrupt.h>
@@ -93,6 +94,7 @@
93#include <linux/smp_lock.h> 94#include <linux/smp_lock.h>
94#include <linux/inet.h> 95#include <linux/inet.h>
95#include <linux/igmp.h> 96#include <linux/igmp.h>
97#include <linux/inetdevice.h>
96#include <linux/netdevice.h> 98#include <linux/netdevice.h>
97#include <net/ip.h> 99#include <net/ip.h>
98#include <net/protocol.h> 100#include <net/protocol.h>
@@ -302,6 +304,7 @@ lookup_protocol:
302 sk->sk_reuse = 1; 304 sk->sk_reuse = 1;
303 305
304 inet = inet_sk(sk); 306 inet = inet_sk(sk);
307 inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
305 308
306 if (SOCK_RAW == sock->type) { 309 if (SOCK_RAW == sock->type) {
307 inet->num = protocol; 310 inet->num = protocol;
@@ -775,16 +778,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
775 err = devinet_ioctl(cmd, (void __user *)arg); 778 err = devinet_ioctl(cmd, (void __user *)arg);
776 break; 779 break;
777 default: 780 default:
778 if (!sk->sk_prot->ioctl || 781 if (sk->sk_prot->ioctl)
779 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == 782 err = sk->sk_prot->ioctl(sk, cmd, arg);
780 -ENOIOCTLCMD) 783 else
781 err = dev_ioctl(cmd, (void __user *)arg); 784 err = -ENOIOCTLCMD;
782 break; 785 break;
783 } 786 }
784 return err; 787 return err;
785} 788}
786 789
787struct proto_ops inet_stream_ops = { 790const struct proto_ops inet_stream_ops = {
788 .family = PF_INET, 791 .family = PF_INET,
789 .owner = THIS_MODULE, 792 .owner = THIS_MODULE,
790 .release = inet_release, 793 .release = inet_release,
@@ -805,7 +808,7 @@ struct proto_ops inet_stream_ops = {
805 .sendpage = tcp_sendpage 808 .sendpage = tcp_sendpage
806}; 809};
807 810
808struct proto_ops inet_dgram_ops = { 811const struct proto_ops inet_dgram_ops = {
809 .family = PF_INET, 812 .family = PF_INET,
810 .owner = THIS_MODULE, 813 .owner = THIS_MODULE,
811 .release = inet_release, 814 .release = inet_release,
@@ -830,7 +833,7 @@ struct proto_ops inet_dgram_ops = {
830 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 833 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
831 * udp_poll 834 * udp_poll
832 */ 835 */
833static struct proto_ops inet_sockraw_ops = { 836static const struct proto_ops inet_sockraw_ops = {
834 .family = PF_INET, 837 .family = PF_INET,
835 .owner = THIS_MODULE, 838 .owner = THIS_MODULE,
836 .release = inet_release, 839 .release = inet_release,
@@ -869,7 +872,8 @@ static struct inet_protosw inetsw_array[] =
869 .ops = &inet_stream_ops, 872 .ops = &inet_stream_ops,
870 .capability = -1, 873 .capability = -1,
871 .no_check = 0, 874 .no_check = 0,
872 .flags = INET_PROTOSW_PERMANENT, 875 .flags = INET_PROTOSW_PERMANENT |
876 INET_PROTOSW_ICSK,
873 }, 877 },
874 878
875 { 879 {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1ba..aed537fa2c88 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
6#include <linux/crypto.h> 6#include <linux/crypto.h>
7#include <linux/pfkeyv2.h> 7#include <linux/pfkeyv2.h>
8#include <net/icmp.h> 8#include <net/icmp.h>
9#include <net/protocol.h>
9#include <asm/scatterlist.h> 10#include <asm/scatterlist.h>
10 11
11 12
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d7..accdefedfed7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
79#include <linux/string.h> 79#include <linux/string.h>
80#include <linux/kernel.h> 80#include <linux/kernel.h>
81#include <linux/sched.h> 81#include <linux/sched.h>
82#include <linux/capability.h>
82#include <linux/config.h> 83#include <linux/config.h>
83#include <linux/socket.h> 84#include <linux/socket.h>
84#include <linux/sockios.h> 85#include <linux/sockios.h>
@@ -86,6 +87,7 @@
86#include <linux/in.h> 87#include <linux/in.h>
87#include <linux/mm.h> 88#include <linux/mm.h>
88#include <linux/inet.h> 89#include <linux/inet.h>
90#include <linux/inetdevice.h>
89#include <linux/netdevice.h> 91#include <linux/netdevice.h>
90#include <linux/etherdevice.h> 92#include <linux/etherdevice.h>
91#include <linux/fddidevice.h> 93#include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a2..95b9d81ac488 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
32#include <asm/uaccess.h> 32#include <asm/uaccess.h>
33#include <asm/system.h> 33#include <asm/system.h>
34#include <linux/bitops.h> 34#include <linux/bitops.h>
35#include <linux/capability.h>
35#include <linux/module.h> 36#include <linux/module.h>
36#include <linux/types.h> 37#include <linux/types.h>
37#include <linux/kernel.h> 38#include <linux/kernel.h>
@@ -58,6 +59,7 @@
58#endif 59#endif
59#include <linux/kmod.h> 60#include <linux/kmod.h>
60 61
62#include <net/arp.h>
61#include <net/ip.h> 63#include <net/ip.h>
62#include <net/route.h> 64#include <net/route.h>
63#include <net/ip_fib.h> 65#include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b7..73bfcae8af9c 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
9#include <linux/pfkeyv2.h> 9#include <linux/pfkeyv2.h>
10#include <linux/random.h> 10#include <linux/random.h>
11#include <net/icmp.h> 11#include <net/icmp.h>
12#include <net/protocol.h>
12#include <net/udp.h> 13#include <net/udp.h>
13 14
14/* decapsulation data for use when post-processing */ 15/* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 19b1b984d687..4e3d3811dea2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -20,6 +20,7 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include <asm/system.h> 21#include <asm/system.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/capability.h>
23#include <linux/types.h> 24#include <linux/types.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/sched.h> 26#include <linux/sched.h>
@@ -30,6 +31,7 @@
30#include <linux/errno.h> 31#include <linux/errno.h>
31#include <linux/in.h> 32#include <linux/in.h>
32#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/inetdevice.h>
33#include <linux/netdevice.h> 35#include <linux/netdevice.h>
34#include <linux/if_arp.h> 36#include <linux/if_arp.h>
35#include <linux/skbuff.h> 37#include <linux/skbuff.h>
@@ -287,13 +289,13 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
287{ 289{
288 int i; 290 int i;
289 291
290 for (i=1; i<=RTA_MAX; i++) { 292 for (i=1; i<=RTA_MAX; i++, rta++) {
291 struct rtattr *attr = rta[i-1]; 293 struct rtattr *attr = *rta;
292 if (attr) { 294 if (attr) {
293 if (RTA_PAYLOAD(attr) < 4) 295 if (RTA_PAYLOAD(attr) < 4)
294 return -EINVAL; 296 return -EINVAL;
295 if (i != RTA_MULTIPATH && i != RTA_METRICS) 297 if (i != RTA_MULTIPATH && i != RTA_METRICS)
296 rta[i-1] = (struct rtattr*)RTA_DATA(attr); 298 *rta = (struct rtattr*)RTA_DATA(attr);
297 } 299 }
298 } 300 }
299 return 0; 301 return 0;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb169..e2890ec8159e 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/inet.h> 31#include <linux/inet.h>
32#include <linux/inetdevice.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/if_arp.h> 34#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc1518..0dd4d06e456d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
33#include <linux/errno.h> 33#include <linux/errno.h>
34#include <linux/in.h> 34#include <linux/in.h>
35#include <linux/inet.h> 35#include <linux/inet.h>
36#include <linux/inetdevice.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
37#include <linux/if_arp.h> 38#include <linux/if_arp.h>
38#include <linux/proc_fs.h> 39#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e3..ef4724de7350 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/inet.h> 31#include <linux/inet.h>
32#include <linux/inetdevice.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/if_arp.h> 34#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
36#include <linux/netlink.h> 37#include <linux/netlink.h>
37#include <linux/init.h> 38#include <linux/init.h>
38 39
40#include <net/arp.h>
39#include <net/ip.h> 41#include <net/ip.h>
40#include <net/protocol.h> 42#include <net/protocol.h>
41#include <net/route.h> 43#include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df9..e320b32373e5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
41 * modify it under the terms of the GNU General Public License 41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version 42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
44 */ 51 */
45 52
46#define VERSION "0.404" 53#define VERSION "0.404"
@@ -59,6 +66,7 @@
59#include <linux/errno.h> 66#include <linux/errno.h>
60#include <linux/in.h> 67#include <linux/in.h>
61#include <linux/inet.h> 68#include <linux/inet.h>
69#include <linux/inetdevice.h>
62#include <linux/netdevice.h> 70#include <linux/netdevice.h>
63#include <linux/if_arp.h> 71#include <linux/if_arp.h>
64#include <linux/proc_fs.h> 72#include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d2..105039eb7629 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
73#include <linux/socket.h> 73#include <linux/socket.h>
74#include <linux/in.h> 74#include <linux/in.h>
75#include <linux/inet.h> 75#include <linux/inet.h>
76#include <linux/inetdevice.h>
76#include <linux/netdevice.h> 77#include <linux/netdevice.h>
77#include <linux/string.h> 78#include <linux/string.h>
78#include <linux/netfilter_ipv4.h> 79#include <linux/netfilter_ipv4.h>
@@ -898,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
898 u32 _mask, *mp; 899 u32 _mask, *mp;
899 900
900 mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); 901 mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
901 if (mp == NULL) 902 BUG_ON(mp == NULL);
902 BUG();
903 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 903 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
904 if (*mp == ifa->ifa_mask && 904 if (*mp == ifa->ifa_mask &&
905 inet_ifa_match(rt->rt_src, ifa)) 905 inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4a195c724f01..d8ce7133cd8f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
91#include <linux/if_arp.h> 91#include <linux/if_arp.h>
92#include <linux/rtnetlink.h> 92#include <linux/rtnetlink.h>
93#include <linux/times.h> 93#include <linux/times.h>
94
95#include <net/arp.h>
94#include <net/ip.h> 96#include <net/ip.h>
95#include <net/protocol.h> 97#include <net/protocol.h>
96#include <net/route.h> 98#include <net/route.h>
@@ -231,7 +233,18 @@ static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
231 case IGMPV3_MODE_IS_EXCLUDE: 233 case IGMPV3_MODE_IS_EXCLUDE:
232 if (gdeleted || sdeleted) 234 if (gdeleted || sdeleted)
233 return 0; 235 return 0;
234 return !(pmc->gsquery && !psf->sf_gsresp); 236 if (!(pmc->gsquery && !psf->sf_gsresp)) {
237 if (pmc->sfmode == MCAST_INCLUDE)
238 return 1;
239 /* don't include if this source is excluded
240 * in all filters
241 */
242 if (psf->sf_count[MCAST_INCLUDE])
243 return type == IGMPV3_MODE_IS_INCLUDE;
244 return pmc->sfcount[MCAST_EXCLUDE] ==
245 psf->sf_count[MCAST_EXCLUDE];
246 }
247 return 0;
235 case IGMPV3_CHANGE_TO_INCLUDE: 248 case IGMPV3_CHANGE_TO_INCLUDE:
236 if (gdeleted || sdeleted) 249 if (gdeleted || sdeleted)
237 return 0; 250 return 0;
@@ -383,7 +396,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
383 struct igmpv3_report *pih; 396 struct igmpv3_report *pih;
384 struct igmpv3_grec *pgr = NULL; 397 struct igmpv3_grec *pgr = NULL;
385 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 398 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
386 int scount, first, isquery, truncate; 399 int scount, stotal, first, isquery, truncate;
387 400
388 if (pmc->multiaddr == IGMP_ALL_HOSTS) 401 if (pmc->multiaddr == IGMP_ALL_HOSTS)
389 return skb; 402 return skb;
@@ -393,25 +406,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
393 truncate = type == IGMPV3_MODE_IS_EXCLUDE || 406 truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
394 type == IGMPV3_CHANGE_TO_EXCLUDE; 407 type == IGMPV3_CHANGE_TO_EXCLUDE;
395 408
409 stotal = scount = 0;
410
396 psf_list = sdeleted ? &pmc->tomb : &pmc->sources; 411 psf_list = sdeleted ? &pmc->tomb : &pmc->sources;
397 412
398 if (!*psf_list) { 413 if (!*psf_list)
399 if (type == IGMPV3_ALLOW_NEW_SOURCES || 414 goto empty_source;
400 type == IGMPV3_BLOCK_OLD_SOURCES) 415
401 return skb;
402 if (pmc->crcount || isquery) {
403 /* make sure we have room for group header and at
404 * least one source.
405 */
406 if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)+
407 sizeof(__u32)) {
408 igmpv3_sendpack(skb);
409 skb = NULL; /* add_grhead will get a new one */
410 }
411 skb = add_grhead(skb, pmc, type, &pgr);
412 }
413 return skb;
414 }
415 pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL; 416 pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
416 417
417 /* EX and TO_EX get a fresh packet, if needed */ 418 /* EX and TO_EX get a fresh packet, if needed */
@@ -424,7 +425,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
424 } 425 }
425 } 426 }
426 first = 1; 427 first = 1;
427 scount = 0;
428 psf_prev = NULL; 428 psf_prev = NULL;
429 for (psf=*psf_list; psf; psf=psf_next) { 429 for (psf=*psf_list; psf; psf=psf_next) {
430 u32 *psrc; 430 u32 *psrc;
@@ -458,7 +458,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
458 } 458 }
459 psrc = (u32 *)skb_put(skb, sizeof(u32)); 459 psrc = (u32 *)skb_put(skb, sizeof(u32));
460 *psrc = psf->sf_inaddr; 460 *psrc = psf->sf_inaddr;
461 scount++; 461 scount++; stotal++;
462 if ((type == IGMPV3_ALLOW_NEW_SOURCES || 462 if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
463 type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) { 463 type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
464 psf->sf_crcount--; 464 psf->sf_crcount--;
@@ -473,6 +473,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
473 } 473 }
474 psf_prev = psf; 474 psf_prev = psf;
475 } 475 }
476
477empty_source:
478 if (!stotal) {
479 if (type == IGMPV3_ALLOW_NEW_SOURCES ||
480 type == IGMPV3_BLOCK_OLD_SOURCES)
481 return skb;
482 if (pmc->crcount || isquery) {
483 /* make sure we have room for group header */
484 if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) {
485 igmpv3_sendpack(skb);
486 skb = NULL; /* add_grhead will get a new one */
487 }
488 skb = add_grhead(skb, pmc, type, &pgr);
489 }
490 }
476 if (pgr) 491 if (pgr)
477 pgr->grec_nsrcs = htons(scount); 492 pgr->grec_nsrcs = htons(scount);
478 493
@@ -555,11 +570,11 @@ static void igmpv3_send_cr(struct in_device *in_dev)
555 skb = add_grec(skb, pmc, dtype, 1, 1); 570 skb = add_grec(skb, pmc, dtype, 1, 1);
556 } 571 }
557 if (pmc->crcount) { 572 if (pmc->crcount) {
558 pmc->crcount--;
559 if (pmc->sfmode == MCAST_EXCLUDE) { 573 if (pmc->sfmode == MCAST_EXCLUDE) {
560 type = IGMPV3_CHANGE_TO_INCLUDE; 574 type = IGMPV3_CHANGE_TO_INCLUDE;
561 skb = add_grec(skb, pmc, type, 1, 0); 575 skb = add_grec(skb, pmc, type, 1, 0);
562 } 576 }
577 pmc->crcount--;
563 if (pmc->crcount == 0) { 578 if (pmc->crcount == 0) {
564 igmpv3_clear_zeros(&pmc->tomb); 579 igmpv3_clear_zeros(&pmc->tomb);
565 igmpv3_clear_zeros(&pmc->sources); 580 igmpv3_clear_zeros(&pmc->sources);
@@ -592,12 +607,12 @@ static void igmpv3_send_cr(struct in_device *in_dev)
592 607
593 /* filter mode changes */ 608 /* filter mode changes */
594 if (pmc->crcount) { 609 if (pmc->crcount) {
595 pmc->crcount--;
596 if (pmc->sfmode == MCAST_EXCLUDE) 610 if (pmc->sfmode == MCAST_EXCLUDE)
597 type = IGMPV3_CHANGE_TO_EXCLUDE; 611 type = IGMPV3_CHANGE_TO_EXCLUDE;
598 else 612 else
599 type = IGMPV3_CHANGE_TO_INCLUDE; 613 type = IGMPV3_CHANGE_TO_INCLUDE;
600 skb = add_grec(skb, pmc, type, 0, 0); 614 skb = add_grec(skb, pmc, type, 0, 0);
615 pmc->crcount--;
601 } 616 }
602 spin_unlock_bh(&pmc->lock); 617 spin_unlock_bh(&pmc->lock);
603 } 618 }
@@ -733,7 +748,8 @@ static void igmp_timer_expire(unsigned long data)
733 ip_ma_put(im); 748 ip_ma_put(im);
734} 749}
735 750
736static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) 751/* mark EXCLUDE-mode sources */
752static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
737{ 753{
738 struct ip_sf_list *psf; 754 struct ip_sf_list *psf;
739 int i, scount; 755 int i, scount;
@@ -742,6 +758,37 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
742 for (psf=pmc->sources; psf; psf=psf->sf_next) { 758 for (psf=pmc->sources; psf; psf=psf->sf_next) {
743 if (scount == nsrcs) 759 if (scount == nsrcs)
744 break; 760 break;
761 for (i=0; i<nsrcs; i++) {
762 /* skip inactive filters */
763 if (pmc->sfcount[MCAST_INCLUDE] ||
764 pmc->sfcount[MCAST_EXCLUDE] !=
765 psf->sf_count[MCAST_EXCLUDE])
766 continue;
767 if (srcs[i] == psf->sf_inaddr) {
768 scount++;
769 break;
770 }
771 }
772 }
773 pmc->gsquery = 0;
774 if (scount == nsrcs) /* all sources excluded */
775 return 0;
776 return 1;
777}
778
779static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
780{
781 struct ip_sf_list *psf;
782 int i, scount;
783
784 if (pmc->sfmode == MCAST_EXCLUDE)
785 return igmp_xmarksources(pmc, nsrcs, srcs);
786
787 /* mark INCLUDE-mode sources */
788 scount = 0;
789 for (psf=pmc->sources; psf; psf=psf->sf_next) {
790 if (scount == nsrcs)
791 break;
745 for (i=0; i<nsrcs; i++) 792 for (i=0; i<nsrcs; i++)
746 if (srcs[i] == psf->sf_inaddr) { 793 if (srcs[i] == psf->sf_inaddr) {
747 psf->sf_gsresp = 1; 794 psf->sf_gsresp = 1;
@@ -749,6 +796,12 @@ static void igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
749 break; 796 break;
750 } 797 }
751 } 798 }
799 if (!scount) {
800 pmc->gsquery = 0;
801 return 0;
802 }
803 pmc->gsquery = 1;
804 return 1;
752} 805}
753 806
754static void igmp_heard_report(struct in_device *in_dev, u32 group) 807static void igmp_heard_report(struct in_device *in_dev, u32 group)
@@ -843,6 +896,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
843 */ 896 */
844 read_lock(&in_dev->mc_list_lock); 897 read_lock(&in_dev->mc_list_lock);
845 for (im=in_dev->mc_list; im!=NULL; im=im->next) { 898 for (im=in_dev->mc_list; im!=NULL; im=im->next) {
899 int changed;
900
846 if (group && group != im->multiaddr) 901 if (group && group != im->multiaddr)
847 continue; 902 continue;
848 if (im->multiaddr == IGMP_ALL_HOSTS) 903 if (im->multiaddr == IGMP_ALL_HOSTS)
@@ -852,10 +907,11 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
852 im->gsquery = im->gsquery && mark; 907 im->gsquery = im->gsquery && mark;
853 else 908 else
854 im->gsquery = mark; 909 im->gsquery = mark;
855 if (im->gsquery) 910 changed = !im->gsquery ||
856 igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs); 911 igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
857 spin_unlock_bh(&im->lock); 912 spin_unlock_bh(&im->lock);
858 igmp_mod_timer(im, max_delay); 913 if (changed)
914 igmp_mod_timer(im, max_delay);
859 } 915 }
860 read_unlock(&in_dev->mc_list_lock); 916 read_unlock(&in_dev->mc_list_lock);
861} 917}
@@ -973,7 +1029,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
973 * for deleted items allows change reports to use common code with 1029 * for deleted items allows change reports to use common code with
974 * non-deleted or query-response MCA's. 1030 * non-deleted or query-response MCA's.
975 */ 1031 */
976 pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL); 1032 pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
977 if (!pmc) 1033 if (!pmc)
978 return; 1034 return;
979 memset(pmc, 0, sizeof(*pmc)); 1035 memset(pmc, 0, sizeof(*pmc));
@@ -1153,7 +1209,7 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
1153 } 1209 }
1154 } 1210 }
1155 1211
1156 im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL); 1212 im = kmalloc(sizeof(*im), GFP_KERNEL);
1157 if (!im) 1213 if (!im)
1158 goto out; 1214 goto out;
1159 1215
@@ -1474,7 +1530,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1474 psf_prev = psf; 1530 psf_prev = psf;
1475 } 1531 }
1476 if (!psf) { 1532 if (!psf) {
1477 psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC); 1533 psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
1478 if (!psf) 1534 if (!psf)
1479 return -ENOBUFS; 1535 return -ENOBUFS;
1480 memset(psf, 0, sizeof(*psf)); 1536 memset(psf, 0, sizeof(*psf));
@@ -1508,7 +1564,7 @@ static void sf_markstate(struct ip_mc_list *pmc)
1508 1564
1509static int sf_setstate(struct ip_mc_list *pmc) 1565static int sf_setstate(struct ip_mc_list *pmc)
1510{ 1566{
1511 struct ip_sf_list *psf; 1567 struct ip_sf_list *psf, *dpsf;
1512 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; 1568 int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
1513 int qrv = pmc->interface->mr_qrv; 1569 int qrv = pmc->interface->mr_qrv;
1514 int new_in, rv; 1570 int new_in, rv;
@@ -1520,8 +1576,46 @@ static int sf_setstate(struct ip_mc_list *pmc)
1520 !psf->sf_count[MCAST_INCLUDE]; 1576 !psf->sf_count[MCAST_INCLUDE];
1521 } else 1577 } else
1522 new_in = psf->sf_count[MCAST_INCLUDE] != 0; 1578 new_in = psf->sf_count[MCAST_INCLUDE] != 0;
1523 if (new_in != psf->sf_oldin) { 1579 if (new_in) {
1524 psf->sf_crcount = qrv; 1580 if (!psf->sf_oldin) {
1581 struct ip_sf_list *prev = 0;
1582
1583 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) {
1584 if (dpsf->sf_inaddr == psf->sf_inaddr)
1585 break;
1586 prev = dpsf;
1587 }
1588 if (dpsf) {
1589 if (prev)
1590 prev->sf_next = dpsf->sf_next;
1591 else
1592 pmc->tomb = dpsf->sf_next;
1593 kfree(dpsf);
1594 }
1595 psf->sf_crcount = qrv;
1596 rv++;
1597 }
1598 } else if (psf->sf_oldin) {
1599
1600 psf->sf_crcount = 0;
1601 /*
1602 * add or update "delete" records if an active filter
1603 * is now inactive
1604 */
1605 for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next)
1606 if (dpsf->sf_inaddr == psf->sf_inaddr)
1607 break;
1608 if (!dpsf) {
1609 dpsf = (struct ip_sf_list *)
1610 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1611 if (!dpsf)
1612 continue;
1613 *dpsf = *psf;
1614 /* pmc->lock held by callers */
1615 dpsf->sf_next = pmc->tomb;
1616 pmc->tomb = dpsf;
1617 }
1618 dpsf->sf_crcount = qrv;
1525 rv++; 1619 rv++;
1526 } 1620 }
1527 } 1621 }
@@ -1657,7 +1751,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1657 err = -ENOBUFS; 1751 err = -ENOBUFS;
1658 if (count >= sysctl_igmp_max_memberships) 1752 if (count >= sysctl_igmp_max_memberships)
1659 goto done; 1753 goto done;
1660 iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); 1754 iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
1661 if (iml == NULL) 1755 if (iml == NULL)
1662 goto done; 1756 goto done;
1663 1757
@@ -1821,8 +1915,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1821 1915
1822 if (psl) 1916 if (psl)
1823 count += psl->sl_max; 1917 count += psl->sl_max;
1824 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk, 1918 newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
1825 IP_SFLSIZE(count), GFP_KERNEL);
1826 if (!newpsl) { 1919 if (!newpsl) {
1827 err = -ENOBUFS; 1920 err = -ENOBUFS;
1828 goto done; 1921 goto done;
@@ -1905,8 +1998,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1905 goto done; 1998 goto done;
1906 } 1999 }
1907 if (msf->imsf_numsrc) { 2000 if (msf->imsf_numsrc) {
1908 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk, 2001 newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
1909 IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL); 2002 GFP_KERNEL);
1910 if (!newpsl) { 2003 if (!newpsl) {
1911 err = -ENOBUFS; 2004 err = -ENOBUFS;
1912 goto done; 2005 goto done;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a566..ae20281d8deb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
37 */ 37 */
38int sysctl_local_port_range[2] = { 1024, 4999 }; 38int sysctl_local_port_range[2] = { 1024, 4999 };
39 39
40static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) 40int inet_csk_bind_conflict(const struct sock *sk,
41 const struct inet_bind_bucket *tb)
41{ 42{
42 const u32 sk_rcv_saddr = inet_rcv_saddr(sk); 43 const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
43 struct sock *sk2; 44 struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
62 return node != NULL; 63 return node != NULL;
63} 64}
64 65
66EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
67
65/* Obtain a reference to a local port for the given sock, 68/* Obtain a reference to a local port for the given sock,
66 * if snum is zero it means select any available local port. 69 * if snum is zero it means select any available local port.
67 */ 70 */
68int inet_csk_get_port(struct inet_hashinfo *hashinfo, 71int inet_csk_get_port(struct inet_hashinfo *hashinfo,
69 struct sock *sk, unsigned short snum) 72 struct sock *sk, unsigned short snum,
73 int (*bind_conflict)(const struct sock *sk,
74 const struct inet_bind_bucket *tb))
70{ 75{
71 struct inet_bind_hashbucket *head; 76 struct inet_bind_hashbucket *head;
72 struct hlist_node *node; 77 struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
125 goto success; 130 goto success;
126 } else { 131 } else {
127 ret = 1; 132 ret = 1;
128 if (inet_csk_bind_conflict(sk, tb)) 133 if (bind_conflict(sk, tb))
129 goto fail_unlock; 134 goto fail_unlock;
130 } 135 }
131 } 136 }
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
380EXPORT_SYMBOL_GPL(inet_csk_search_req); 385EXPORT_SYMBOL_GPL(inet_csk_search_req);
381 386
382void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 387void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
383 const unsigned timeout) 388 unsigned long timeout)
384{ 389{
385 struct inet_connection_sock *icsk = inet_csk(sk); 390 struct inet_connection_sock *icsk = inet_csk(sk);
386 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 391 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
631} 636}
632 637
633EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 638EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
639
640void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
641{
642 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
643 const struct inet_sock *inet = inet_sk(sk);
644
645 sin->sin_family = AF_INET;
646 sin->sin_addr.s_addr = inet->daddr;
647 sin->sin_port = inet->dport;
648}
649
650EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cfd..457db99c76df 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
50#define INET_DIAG_PUT(skb, attrtype, attrlen) \ 50#define INET_DIAG_PUT(skb, attrtype, attrlen) \
51 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) 51 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
52 52
53static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, 53static int inet_csk_diag_fill(struct sock *sk,
54 int ext, u32 pid, u32 seq, u16 nlmsg_flags, 54 struct sk_buff *skb,
55 const struct nlmsghdr *unlh) 55 int ext, u32 pid, u32 seq, u16 nlmsg_flags,
56 const struct nlmsghdr *unlh)
56{ 57{
57 const struct inet_sock *inet = inet_sk(sk); 58 const struct inet_sock *inet = inet_sk(sk);
58 const struct inet_connection_sock *icsk = inet_csk(sk); 59 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
70 nlh->nlmsg_flags = nlmsg_flags; 71 nlh->nlmsg_flags = nlmsg_flags;
71 72
72 r = NLMSG_DATA(nlh); 73 r = NLMSG_DATA(nlh);
73 if (sk->sk_state != TCP_TIME_WAIT) { 74 BUG_ON(sk->sk_state == TCP_TIME_WAIT);
74 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) 75
75 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, 76 if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
76 sizeof(*minfo)); 77 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
77 if (ext & (1 << (INET_DIAG_INFO - 1))) 78
78 info = INET_DIAG_PUT(skb, INET_DIAG_INFO, 79 if (ext & (1 << (INET_DIAG_INFO - 1)))
79 handler->idiag_info_size); 80 info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
80 81 handler->idiag_info_size);
81 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { 82
82 size_t len = strlen(icsk->icsk_ca_ops->name); 83 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
83 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), 84 const size_t len = strlen(icsk->icsk_ca_ops->name);
84 icsk->icsk_ca_ops->name); 85
85 } 86 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
87 icsk->icsk_ca_ops->name);
86 } 88 }
89
87 r->idiag_family = sk->sk_family; 90 r->idiag_family = sk->sk_family;
88 r->idiag_state = sk->sk_state; 91 r->idiag_state = sk->sk_state;
89 r->idiag_timer = 0; 92 r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
93 r->id.idiag_cookie[0] = (u32)(unsigned long)sk; 96 r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
94 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); 97 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
95 98
96 if (r->idiag_state == TCP_TIME_WAIT) {
97 const struct inet_timewait_sock *tw = inet_twsk(sk);
98 long tmo = tw->tw_ttd - jiffies;
99 if (tmo < 0)
100 tmo = 0;
101
102 r->id.idiag_sport = tw->tw_sport;
103 r->id.idiag_dport = tw->tw_dport;
104 r->id.idiag_src[0] = tw->tw_rcv_saddr;
105 r->id.idiag_dst[0] = tw->tw_daddr;
106 r->idiag_state = tw->tw_substate;
107 r->idiag_timer = 3;
108 r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
109 r->idiag_rqueue = 0;
110 r->idiag_wqueue = 0;
111 r->idiag_uid = 0;
112 r->idiag_inode = 0;
113#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
114 if (r->idiag_family == AF_INET6) {
115 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
116
117 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
118 &tcp6tw->tw_v6_rcv_saddr);
119 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
120 &tcp6tw->tw_v6_daddr);
121 }
122#endif
123 nlh->nlmsg_len = skb->tail - b;
124 return skb->len;
125 }
126
127 r->id.idiag_sport = inet->sport; 99 r->id.idiag_sport = inet->sport;
128 r->id.idiag_dport = inet->dport; 100 r->id.idiag_dport = inet->dport;
129 r->id.idiag_src[0] = inet->rcv_saddr; 101 r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
185 return -1; 157 return -1;
186} 158}
187 159
188static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) 160static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
161 struct sk_buff *skb, int ext, u32 pid,
162 u32 seq, u16 nlmsg_flags,
163 const struct nlmsghdr *unlh)
164{
165 long tmo;
166 struct inet_diag_msg *r;
167 const unsigned char *previous_tail = skb->tail;
168 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
169 unlh->nlmsg_type, sizeof(*r));
170
171 r = NLMSG_DATA(nlh);
172 BUG_ON(tw->tw_state != TCP_TIME_WAIT);
173
174 nlh->nlmsg_flags = nlmsg_flags;
175
176 tmo = tw->tw_ttd - jiffies;
177 if (tmo < 0)
178 tmo = 0;
179
180 r->idiag_family = tw->tw_family;
181 r->idiag_state = tw->tw_state;
182 r->idiag_timer = 0;
183 r->idiag_retrans = 0;
184 r->id.idiag_if = tw->tw_bound_dev_if;
185 r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
186 r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
187 r->id.idiag_sport = tw->tw_sport;
188 r->id.idiag_dport = tw->tw_dport;
189 r->id.idiag_src[0] = tw->tw_rcv_saddr;
190 r->id.idiag_dst[0] = tw->tw_daddr;
191 r->idiag_state = tw->tw_substate;
192 r->idiag_timer = 3;
193 r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
194 r->idiag_rqueue = 0;
195 r->idiag_wqueue = 0;
196 r->idiag_uid = 0;
197 r->idiag_inode = 0;
198#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
199 if (tw->tw_family == AF_INET6) {
200 const struct inet6_timewait_sock *tw6 =
201 inet6_twsk((struct sock *)tw);
202
203 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
204 &tw6->tw_v6_rcv_saddr);
205 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
206 &tw6->tw_v6_daddr);
207 }
208#endif
209 nlh->nlmsg_len = skb->tail - previous_tail;
210 return skb->len;
211nlmsg_failure:
212 skb_trim(skb, previous_tail - skb->data);
213 return -1;
214}
215
216static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
217 int ext, u32 pid, u32 seq, u16 nlmsg_flags,
218 const struct nlmsghdr *unlh)
219{
220 if (sk->sk_state == TCP_TIME_WAIT)
221 return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
222 skb, ext, pid, seq, nlmsg_flags,
223 unlh);
224 return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
225}
226
227static int inet_diag_get_exact(struct sk_buff *in_skb,
228 const struct nlmsghdr *nlh)
189{ 229{
190 int err; 230 int err;
191 struct sock *sk; 231 struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
235 if (!rep) 275 if (!rep)
236 goto out; 276 goto out;
237 277
238 if (inet_diag_fill(rep, sk, req->idiag_ext, 278 if (sk_diag_fill(sk, rep, req->idiag_ext,
239 NETLINK_CB(in_skb).pid, 279 NETLINK_CB(in_skb).pid,
240 nlh->nlmsg_seq, 0, nlh) <= 0) 280 nlh->nlmsg_seq, 0, nlh) <= 0)
241 BUG(); 281 BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
283 323
284 324
285static int inet_diag_bc_run(const void *bc, int len, 325static int inet_diag_bc_run(const void *bc, int len,
286 const struct inet_diag_entry *entry) 326 const struct inet_diag_entry *entry)
287{ 327{
288 while (len > 0) { 328 while (len > 0) {
289 int yes = 1; 329 int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
322 yes = 0; 362 yes = 0;
323 break; 363 break;
324 } 364 }
325 365
326 if (cond->prefix_len == 0) 366 if (cond->prefix_len == 0)
327 break; 367 break;
328 368
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
331 else 371 else
332 addr = entry->daddr; 372 addr = entry->daddr;
333 373
334 if (bitstring_match(addr, cond->addr, cond->prefix_len)) 374 if (bitstring_match(addr, cond->addr,
375 cond->prefix_len))
335 break; 376 break;
336 if (entry->family == AF_INET6 && 377 if (entry->family == AF_INET6 &&
337 cond->family == AF_INET) { 378 cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
346 } 387 }
347 } 388 }
348 389
349 if (yes) { 390 if (yes) {
350 len -= op->yes; 391 len -= op->yes;
351 bc += op->yes; 392 bc += op->yes;
352 } else { 393 } else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
407 default: 448 default:
408 return -EINVAL; 449 return -EINVAL;
409 } 450 }
410 bc += op->yes; 451 bc += op->yes;
411 len -= op->yes; 452 len -= op->yes;
412 } 453 }
413 return len == 0 ? 0 : -EINVAL; 454 return len == 0 ? 0 : -EINVAL;
414} 455}
415 456
416static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, 457static int inet_csk_diag_dump(struct sock *sk,
417 struct netlink_callback *cb) 458 struct sk_buff *skb,
459 struct netlink_callback *cb)
418{ 460{
419 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 461 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
420 462
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
444 return 0; 486 return 0;
445 } 487 }
446 488
447 return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, 489 return inet_csk_diag_fill(sk, skb, r->idiag_ext,
448 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 490 NETLINK_CB(cb->skb).pid,
491 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
492}
493
494static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
495 struct sk_buff *skb,
496 struct netlink_callback *cb)
497{
498 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
499
500 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
501 struct inet_diag_entry entry;
502 struct rtattr *bc = (struct rtattr *)(r + 1);
503
504 entry.family = tw->tw_family;
505#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
506 if (tw->tw_family == AF_INET6) {
507 struct inet6_timewait_sock *tw6 =
508 inet6_twsk((struct sock *)tw);
509 entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
510 entry.daddr = tw6->tw_v6_daddr.s6_addr32;
511 } else
512#endif
513 {
514 entry.saddr = &tw->tw_rcv_saddr;
515 entry.daddr = &tw->tw_daddr;
516 }
517 entry.sport = tw->tw_num;
518 entry.dport = ntohs(tw->tw_dport);
519 entry.userlocks = 0;
520
521 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
522 return 0;
523 }
524
525 return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
526 NETLINK_CB(cb->skb).pid,
527 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
449} 528}
450 529
451static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, 530static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
452 struct request_sock *req, 531 struct request_sock *req, u32 pid, u32 seq,
453 u32 pid, u32 seq, 532 const struct nlmsghdr *unlh)
454 const struct nlmsghdr *unlh)
455{ 533{
456 const struct inet_request_sock *ireq = inet_rsk(req); 534 const struct inet_request_sock *ireq = inet_rsk(req);
457 struct inet_sock *inet = inet_sk(sk); 535 struct inet_sock *inet = inet_sk(sk);
@@ -489,9 +567,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
489#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 567#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
490 if (r->idiag_family == AF_INET6) { 568 if (r->idiag_family == AF_INET6) {
491 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 569 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
492 &tcp6_rsk(req)->loc_addr); 570 &inet6_rsk(req)->loc_addr);
493 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, 571 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
494 &tcp6_rsk(req)->rmt_addr); 572 &inet6_rsk(req)->rmt_addr);
495 } 573 }
496#endif 574#endif
497 nlh->nlmsg_len = skb->tail - b; 575 nlh->nlmsg_len = skb->tail - b;
@@ -504,7 +582,7 @@ nlmsg_failure:
504} 582}
505 583
506static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 584static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
507 struct netlink_callback *cb) 585 struct netlink_callback *cb)
508{ 586{
509 struct inet_diag_entry entry; 587 struct inet_diag_entry entry;
510 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 588 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -553,13 +631,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
553 entry.saddr = 631 entry.saddr =
554#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 632#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
555 (entry.family == AF_INET6) ? 633 (entry.family == AF_INET6) ?
556 tcp6_rsk(req)->loc_addr.s6_addr32 : 634 inet6_rsk(req)->loc_addr.s6_addr32 :
557#endif 635#endif
558 &ireq->loc_addr; 636 &ireq->loc_addr;
559 entry.daddr = 637 entry.daddr =
560#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 638#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
561 (entry.family == AF_INET6) ? 639 (entry.family == AF_INET6) ?
562 tcp6_rsk(req)->rmt_addr.s6_addr32 : 640 inet6_rsk(req)->rmt_addr.s6_addr32 :
563#endif 641#endif
564 &ireq->rmt_addr; 642 &ireq->rmt_addr;
565 entry.dport = ntohs(ireq->rmt_port); 643 entry.dport = ntohs(ireq->rmt_port);
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
599 handler = inet_diag_table[cb->nlh->nlmsg_type]; 677 handler = inet_diag_table[cb->nlh->nlmsg_type];
600 BUG_ON(handler == NULL); 678 BUG_ON(handler == NULL);
601 hashinfo = handler->idiag_hashinfo; 679 hashinfo = handler->idiag_hashinfo;
602 680
603 s_i = cb->args[1]; 681 s_i = cb->args[1];
604 s_num = num = cb->args[2]; 682 s_num = num = cb->args[2];
605 683
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
630 cb->args[3] > 0) 708 cb->args[3] > 0)
631 goto syn_recv; 709 goto syn_recv;
632 710
633 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 711 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
634 inet_listen_unlock(hashinfo); 712 inet_listen_unlock(hashinfo);
635 goto done; 713 goto done;
636 } 714 }
@@ -672,7 +750,6 @@ skip_listen_ht:
672 s_num = 0; 750 s_num = 0;
673 751
674 read_lock_bh(&head->lock); 752 read_lock_bh(&head->lock);
675
676 num = 0; 753 num = 0;
677 sk_for_each(sk, node, &head->chain) { 754 sk_for_each(sk, node, &head->chain) {
678 struct inet_sock *inet = inet_sk(sk); 755 struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
684 if (r->id.idiag_sport != inet->sport && 761 if (r->id.idiag_sport != inet->sport &&
685 r->id.idiag_sport) 762 r->id.idiag_sport)
686 goto next_normal; 763 goto next_normal;
687 if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) 764 if (r->id.idiag_dport != inet->dport &&
765 r->id.idiag_dport)
688 goto next_normal; 766 goto next_normal;
689 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 767 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
690 read_unlock_bh(&head->lock); 768 read_unlock_bh(&head->lock);
691 goto done; 769 goto done;
692 } 770 }
@@ -695,19 +773,20 @@ next_normal:
695 } 773 }
696 774
697 if (r->idiag_states & TCPF_TIME_WAIT) { 775 if (r->idiag_states & TCPF_TIME_WAIT) {
698 sk_for_each(sk, node, 776 struct inet_timewait_sock *tw;
777
778 inet_twsk_for_each(tw, node,
699 &hashinfo->ehash[i + hashinfo->ehash_size].chain) { 779 &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
700 struct inet_sock *inet = inet_sk(sk);
701 780
702 if (num < s_num) 781 if (num < s_num)
703 goto next_dying; 782 goto next_dying;
704 if (r->id.idiag_sport != inet->sport && 783 if (r->id.idiag_sport != tw->tw_sport &&
705 r->id.idiag_sport) 784 r->id.idiag_sport)
706 goto next_dying; 785 goto next_dying;
707 if (r->id.idiag_dport != inet->dport && 786 if (r->id.idiag_dport != tw->tw_dport &&
708 r->id.idiag_dport) 787 r->id.idiag_dport)
709 goto next_dying; 788 goto next_dying;
710 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 789 if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
711 read_unlock_bh(&head->lock); 790 read_unlock_bh(&head->lock);
712 goto done; 791 goto done;
713 } 792 }
@@ -724,8 +803,7 @@ done:
724 return skb->len; 803 return skb->len;
725} 804}
726 805
727static __inline__ int 806static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
728inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
729{ 807{
730 if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) 808 if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
731 return 0; 809 return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
755 } 833 }
756 return netlink_dump_start(idiagnl, skb, nlh, 834 return netlink_dump_start(idiagnl, skb, nlh,
757 inet_diag_dump, NULL); 835 inet_diag_dump, NULL);
758 } else { 836 } else
759 return inet_diag_get_exact(skb, nlh); 837 return inet_diag_get_exact(skb, nlh);
760 }
761 838
762err_inval: 839err_inval:
763 return -EINVAL; 840 return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
766 843
767static inline void inet_diag_rcv_skb(struct sk_buff *skb) 844static inline void inet_diag_rcv_skb(struct sk_buff *skb)
768{ 845{
769 int err;
770 struct nlmsghdr * nlh;
771
772 if (skb->len >= NLMSG_SPACE(0)) { 846 if (skb->len >= NLMSG_SPACE(0)) {
773 nlh = (struct nlmsghdr *)skb->data; 847 int err;
774 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 848 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
849
850 if (nlh->nlmsg_len < sizeof(*nlh) ||
851 skb->len < nlh->nlmsg_len)
775 return; 852 return;
776 err = inet_diag_rcv_msg(skb, nlh); 853 err = inet_diag_rcv_msg(skb, nlh);
777 if (err || nlh->nlmsg_flags & NLM_F_ACK) 854 if (err || nlh->nlmsg_flags & NLM_F_ACK)
778 netlink_ack(skb, nlh, err); 855 netlink_ack(skb, nlh, err);
779 } 856 }
780} 857}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d2..33228115cda4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/random.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <linux/wait.h> 21#include <linux/wait.h>
21 22
22#include <net/inet_connection_sock.h> 23#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 24#include <net/inet_hashtables.h>
25#include <net/ip.h>
24 26
25/* 27/*
26 * Allocate and initialize a new local port bind bucket. 28 * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
163} 165}
164 166
165EXPORT_SYMBOL_GPL(__inet_lookup_listener); 167EXPORT_SYMBOL_GPL(__inet_lookup_listener);
168
169/* called with local bh disabled */
170static int __inet_check_established(struct inet_timewait_death_row *death_row,
171 struct sock *sk, __u16 lport,
172 struct inet_timewait_sock **twp)
173{
174 struct inet_hashinfo *hinfo = death_row->hashinfo;
175 struct inet_sock *inet = inet_sk(sk);
176 u32 daddr = inet->rcv_saddr;
177 u32 saddr = inet->daddr;
178 int dif = sk->sk_bound_dev_if;
179 INET_ADDR_COOKIE(acookie, saddr, daddr)
180 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
181 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
182 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
183 struct sock *sk2;
184 const struct hlist_node *node;
185 struct inet_timewait_sock *tw;
186
187 prefetch(head->chain.first);
188 write_lock(&head->lock);
189
190 /* Check TIME-WAIT sockets first. */
191 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
192 tw = inet_twsk(sk2);
193
194 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
195 if (twsk_unique(sk, sk2, twp))
196 goto unique;
197 else
198 goto not_unique;
199 }
200 }
201 tw = NULL;
202
203 /* And established part... */
204 sk_for_each(sk2, node, &head->chain) {
205 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
206 goto not_unique;
207 }
208
209unique:
210 /* Must record num and sport now. Otherwise we will see
211 * in hash table socket with a funny identity. */
212 inet->num = lport;
213 inet->sport = htons(lport);
214 sk->sk_hash = hash;
215 BUG_TRAP(sk_unhashed(sk));
216 __sk_add_node(sk, &head->chain);
217 sock_prot_inc_use(sk->sk_prot);
218 write_unlock(&head->lock);
219
220 if (twp) {
221 *twp = tw;
222 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
223 } else if (tw) {
224 /* Silly. Should hash-dance instead... */
225 inet_twsk_deschedule(tw, death_row);
226 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
227
228 inet_twsk_put(tw);
229 }
230
231 return 0;
232
233not_unique:
234 write_unlock(&head->lock);
235 return -EADDRNOTAVAIL;
236}
237
238static inline u32 inet_sk_port_offset(const struct sock *sk)
239{
240 const struct inet_sock *inet = inet_sk(sk);
241 return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr,
242 inet->dport);
243}
244
245/*
246 * Bind a port for a connect operation and hash it.
247 */
248int inet_hash_connect(struct inet_timewait_death_row *death_row,
249 struct sock *sk)
250{
251 struct inet_hashinfo *hinfo = death_row->hashinfo;
252 const unsigned short snum = inet_sk(sk)->num;
253 struct inet_bind_hashbucket *head;
254 struct inet_bind_bucket *tb;
255 int ret;
256
257 if (!snum) {
258 int low = sysctl_local_port_range[0];
259 int high = sysctl_local_port_range[1];
260 int range = high - low;
261 int i;
262 int port;
263 static u32 hint;
264 u32 offset = hint + inet_sk_port_offset(sk);
265 struct hlist_node *node;
266 struct inet_timewait_sock *tw = NULL;
267
268 local_bh_disable();
269 for (i = 1; i <= range; i++) {
270 port = low + (i + offset) % range;
271 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
272 spin_lock(&head->lock);
273
274 /* Does not bother with rcv_saddr checks,
275 * because the established check is already
276 * unique enough.
277 */
278 inet_bind_bucket_for_each(tb, node, &head->chain) {
279 if (tb->port == port) {
280 BUG_TRAP(!hlist_empty(&tb->owners));
281 if (tb->fastreuse >= 0)
282 goto next_port;
283 if (!__inet_check_established(death_row,
284 sk, port,
285 &tw))
286 goto ok;
287 goto next_port;
288 }
289 }
290
291 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
292 if (!tb) {
293 spin_unlock(&head->lock);
294 break;
295 }
296 tb->fastreuse = -1;
297 goto ok;
298
299 next_port:
300 spin_unlock(&head->lock);
301 }
302 local_bh_enable();
303
304 return -EADDRNOTAVAIL;
305
306ok:
307 hint += i;
308
309 /* Head lock still held and bh's disabled */
310 inet_bind_hash(sk, tb, port);
311 if (sk_unhashed(sk)) {
312 inet_sk(sk)->sport = htons(port);
313 __inet_hash(hinfo, sk, 0);
314 }
315 spin_unlock(&head->lock);
316
317 if (tw) {
318 inet_twsk_deschedule(tw, death_row);;
319 inet_twsk_put(tw);
320 }
321
322 ret = 0;
323 goto out;
324 }
325
326 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
327 tb = inet_csk(sk)->icsk_bind_hash;
328 spin_lock_bh(&head->lock);
329 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
330 __inet_hash(hinfo, sk, 0);
331 spin_unlock_bh(&head->lock);
332 return 0;
333 } else {
334 spin_unlock(&head->lock);
335 /* No definite answer... Walk to established hash table */
336 ret = __inet_check_established(death_row, sk, snum, NULL);
337out:
338 local_bh_enable();
339 return ret;
340 }
341}
342
343EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a68811..417f126c749e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
90 90
91struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) 91struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
92{ 92{
93 struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, 93 struct inet_timewait_sock *tw =
94 SLAB_ATOMIC); 94 kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
95 SLAB_ATOMIC);
95 if (tw != NULL) { 96 if (tw != NULL) {
96 const struct inet_sock *inet = inet_sk(sk); 97 const struct inet_sock *inet = inet_sk(sk);
97 98
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924f..2160874ce7aa 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
304 /* look for a node to insert instead of p */ 304 /* look for a node to insert instead of p */
305 struct inet_peer *t; 305 struct inet_peer *t;
306 t = lookup_rightempty(p); 306 t = lookup_rightempty(p);
307 if (*stackptr[-1] != t) 307 BUG_ON(*stackptr[-1] != t);
308 BUG();
309 **--stackptr = t->avl_left; 308 **--stackptr = t->avl_left;
310 /* t is removed, t->v4daddr > x->v4daddr for any 309 /* t is removed, t->v4daddr > x->v4daddr for any
311 * x in p->avl_left subtree. 310 * x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
314 t->avl_left = p->avl_left; 313 t->avl_left = p->avl_left;
315 t->avl_right = p->avl_right; 314 t->avl_right = p->avl_right;
316 t->avl_height = p->avl_height; 315 t->avl_height = p->avl_height;
317 if (delp[1] != &p->avl_left) 316 BUG_ON(delp[1] != &p->avl_left);
318 BUG();
319 delp[1] = &t->avl_left; /* was &p->avl_left */ 317 delp[1] = &t->avl_left; /* was &p->avl_left */
320 } 318 }
321 peer_avl_rebalance(stack, stackptr); 319 peer_avl_rebalance(stack, stackptr);
@@ -401,6 +399,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
401 return NULL; 399 return NULL;
402 n->v4daddr = daddr; 400 n->v4daddr = daddr;
403 atomic_set(&n->refcnt, 1); 401 atomic_set(&n->refcnt, 1);
402 atomic_set(&n->rid, 0);
404 n->ip_id_count = secure_ip_id(daddr); 403 n->ip_id_count = secure_ip_id(daddr);
405 n->tcp_ts_stamp = 0; 404 n->tcp_ts_stamp = 0;
406 405
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48e..2a8adda15e11 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
22 * Patrick McHardy : LRU queue of frag heads for evictor. 22 * Patrick McHardy : LRU queue of frag heads for evictor.
23 */ 23 */
24 24
25#include <linux/compiler.h>
25#include <linux/config.h> 26#include <linux/config.h>
26#include <linux/module.h> 27#include <linux/module.h>
27#include <linux/types.h> 28#include <linux/types.h>
@@ -38,6 +39,7 @@
38#include <net/ip.h> 39#include <net/ip.h>
39#include <net/icmp.h> 40#include <net/icmp.h>
40#include <net/checksum.h> 41#include <net/checksum.h>
42#include <net/inetpeer.h>
41#include <linux/tcp.h> 43#include <linux/tcp.h>
42#include <linux/udp.h> 44#include <linux/udp.h>
43#include <linux/inet.h> 45#include <linux/inet.h>
@@ -56,6 +58,8 @@
56int sysctl_ipfrag_high_thresh = 256*1024; 58int sysctl_ipfrag_high_thresh = 256*1024;
57int sysctl_ipfrag_low_thresh = 192*1024; 59int sysctl_ipfrag_low_thresh = 192*1024;
58 60
61int sysctl_ipfrag_max_dist = 64;
62
59/* Important NOTE! Fragment queue must be destroyed before MSL expires. 63/* Important NOTE! Fragment queue must be destroyed before MSL expires.
60 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. 64 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
61 */ 65 */
@@ -89,8 +93,10 @@ struct ipq {
89 spinlock_t lock; 93 spinlock_t lock;
90 atomic_t refcnt; 94 atomic_t refcnt;
91 struct timer_list timer; /* when will this queue expire? */ 95 struct timer_list timer; /* when will this queue expire? */
92 int iif;
93 struct timeval stamp; 96 struct timeval stamp;
97 int iif;
98 unsigned int rid;
99 struct inet_peer *peer;
94}; 100};
95 101
96/* Hash table. */ 102/* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
195 BUG_TRAP(qp->last_in&COMPLETE); 201 BUG_TRAP(qp->last_in&COMPLETE);
196 BUG_TRAP(del_timer(&qp->timer) == 0); 202 BUG_TRAP(del_timer(&qp->timer) == 0);
197 203
204 if (qp->peer)
205 inet_putpeer(qp->peer);
206
198 /* Release all fragment data. */ 207 /* Release all fragment data. */
199 fp = qp->fragments; 208 fp = qp->fragments;
200 while (fp) { 209 while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
353 qp->meat = 0; 362 qp->meat = 0;
354 qp->fragments = NULL; 363 qp->fragments = NULL;
355 qp->iif = 0; 364 qp->iif = 0;
365 qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
356 366
357 /* Initialize a timer for this entry. */ 367 /* Initialize a timer for this entry. */
358 init_timer(&qp->timer); 368 init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
373 */ 383 */
374static inline struct ipq *ip_find(struct iphdr *iph, u32 user) 384static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
375{ 385{
376 __u16 id = iph->id; 386 __be16 id = iph->id;
377 __u32 saddr = iph->saddr; 387 __u32 saddr = iph->saddr;
378 __u32 daddr = iph->daddr; 388 __u32 daddr = iph->daddr;
379 __u8 protocol = iph->protocol; 389 __u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
398 return ip_frag_create(hash, iph, user); 408 return ip_frag_create(hash, iph, user);
399} 409}
400 410
411/* Is the fragment too far ahead to be part of ipq? */
412static inline int ip_frag_too_far(struct ipq *qp)
413{
414 struct inet_peer *peer = qp->peer;
415 unsigned int max = sysctl_ipfrag_max_dist;
416 unsigned int start, end;
417
418 int rc;
419
420 if (!peer || !max)
421 return 0;
422
423 start = qp->rid;
424 end = atomic_inc_return(&peer->rid);
425 qp->rid = end;
426
427 rc = qp->fragments && (end - start) > max;
428
429 if (rc) {
430 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
431 }
432
433 return rc;
434}
435
436static int ip_frag_reinit(struct ipq *qp)
437{
438 struct sk_buff *fp;
439
440 if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
441 atomic_inc(&qp->refcnt);
442 return -ETIMEDOUT;
443 }
444
445 fp = qp->fragments;
446 do {
447 struct sk_buff *xp = fp->next;
448 frag_kfree_skb(fp, NULL);
449 fp = xp;
450 } while (fp);
451
452 qp->last_in = 0;
453 qp->len = 0;
454 qp->meat = 0;
455 qp->fragments = NULL;
456 qp->iif = 0;
457
458 return 0;
459}
460
401/* Add new segment to existing queue. */ 461/* Add new segment to existing queue. */
402static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) 462static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
403{ 463{
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
408 if (qp->last_in & COMPLETE) 468 if (qp->last_in & COMPLETE)
409 goto err; 469 goto err;
410 470
471 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
472 unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
473 ipq_kill(qp);
474 goto err;
475 }
476
411 offset = ntohs(skb->nh.iph->frag_off); 477 offset = ntohs(skb->nh.iph->frag_off);
412 flags = offset & ~IP_OFFSET; 478 flags = offset & ~IP_OFFSET;
413 offset &= IP_OFFSET; 479 offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 46f9d9cf7a5f..abe23923e4e7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,7 @@
10 * 10 *
11 */ 11 */
12 12
13#include <linux/capability.h>
13#include <linux/config.h> 14#include <linux/config.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/types.h> 16#include <linux/types.h>
@@ -28,6 +29,7 @@
28#include <linux/inetdevice.h> 29#include <linux/inetdevice.h>
29#include <linux/igmp.h> 30#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h> 31#include <linux/netfilter_ipv4.h>
32#include <linux/if_ether.h>
31 33
32#include <net/sock.h> 34#include <net/sock.h>
33#include <net/ip.h> 35#include <net/ip.h>
@@ -187,7 +189,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
187 } 189 }
188 190
189 if (ipgre_fb_tunnel_dev->flags&IFF_UP) 191 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
190 return ipgre_fb_tunnel_dev->priv; 192 return netdev_priv(ipgre_fb_tunnel_dev);
191 return NULL; 193 return NULL;
192} 194}
193 195
@@ -277,7 +279,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
277 return NULL; 279 return NULL;
278 280
279 dev->init = ipgre_tunnel_init; 281 dev->init = ipgre_tunnel_init;
280 nt = dev->priv; 282 nt = netdev_priv(dev);
281 nt->parms = *parms; 283 nt->parms = *parms;
282 284
283 if (register_netdevice(dev) < 0) { 285 if (register_netdevice(dev) < 0) {
@@ -285,9 +287,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
285 goto failed; 287 goto failed;
286 } 288 }
287 289
288 nt = dev->priv;
289 nt->parms = *parms;
290
291 dev_hold(dev); 290 dev_hold(dev);
292 ipgre_tunnel_link(nt); 291 ipgre_tunnel_link(nt);
293 return nt; 292 return nt;
@@ -298,7 +297,7 @@ failed:
298 297
299static void ipgre_tunnel_uninit(struct net_device *dev) 298static void ipgre_tunnel_uninit(struct net_device *dev)
300{ 299{
301 ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv); 300 ipgre_tunnel_unlink(netdev_priv(dev));
302 dev_put(dev); 301 dev_put(dev);
303} 302}
304 303
@@ -517,7 +516,7 @@ out:
517 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 516 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
518 rel_info = htonl(rel_info); 517 rel_info = htonl(rel_info);
519 } else if (type == ICMP_TIME_EXCEEDED) { 518 } else if (type == ICMP_TIME_EXCEEDED) {
520 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; 519 struct ip_tunnel *t = netdev_priv(skb2->dev);
521 if (t->parms.iph.ttl) { 520 if (t->parms.iph.ttl) {
522 rel_type = ICMP_DEST_UNREACH; 521 rel_type = ICMP_DEST_UNREACH;
523 rel_code = ICMP_HOST_UNREACH; 522 rel_code = ICMP_HOST_UNREACH;
@@ -668,7 +667,7 @@ drop_nolock:
668 667
669static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 668static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
670{ 669{
671 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 670 struct ip_tunnel *tunnel = netdev_priv(dev);
672 struct net_device_stats *stats = &tunnel->stat; 671 struct net_device_stats *stats = &tunnel->stat;
673 struct iphdr *old_iph = skb->nh.iph; 672 struct iphdr *old_iph = skb->nh.iph;
674 struct iphdr *tiph; 673 struct iphdr *tiph;
@@ -831,6 +830,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
831 skb->h.raw = skb->nh.raw; 830 skb->h.raw = skb->nh.raw;
832 skb->nh.raw = skb_push(skb, gre_hlen); 831 skb->nh.raw = skb_push(skb, gre_hlen);
833 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 832 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
833 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
834 dst_release(skb->dst); 834 dst_release(skb->dst);
835 skb->dst = &rt->u.dst; 835 skb->dst = &rt->u.dst;
836 836
@@ -913,7 +913,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
913 t = ipgre_tunnel_locate(&p, 0); 913 t = ipgre_tunnel_locate(&p, 0);
914 } 914 }
915 if (t == NULL) 915 if (t == NULL)
916 t = (struct ip_tunnel*)dev->priv; 916 t = netdev_priv(dev);
917 memcpy(&p, &t->parms, sizeof(p)); 917 memcpy(&p, &t->parms, sizeof(p));
918 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 918 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
919 err = -EFAULT; 919 err = -EFAULT;
@@ -953,7 +953,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
953 } else { 953 } else {
954 unsigned nflags=0; 954 unsigned nflags=0;
955 955
956 t = (struct ip_tunnel*)dev->priv; 956 t = netdev_priv(dev);
957 957
958 if (MULTICAST(p.iph.daddr)) 958 if (MULTICAST(p.iph.daddr))
959 nflags = IFF_BROADCAST; 959 nflags = IFF_BROADCAST;
@@ -1002,7 +1002,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1002 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) 1002 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1003 goto done; 1003 goto done;
1004 err = -EPERM; 1004 err = -EPERM;
1005 if (t == ipgre_fb_tunnel_dev->priv) 1005 if (t == netdev_priv(ipgre_fb_tunnel_dev))
1006 goto done; 1006 goto done;
1007 dev = t->dev; 1007 dev = t->dev;
1008 } 1008 }
@@ -1019,12 +1019,12 @@ done:
1019 1019
1020static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) 1020static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1021{ 1021{
1022 return &(((struct ip_tunnel*)dev->priv)->stat); 1022 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1023} 1023}
1024 1024
1025static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1025static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1026{ 1026{
1027 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1027 struct ip_tunnel *tunnel = netdev_priv(dev);
1028 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) 1028 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1029 return -EINVAL; 1029 return -EINVAL;
1030 dev->mtu = new_mtu; 1030 dev->mtu = new_mtu;
@@ -1064,7 +1064,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1064static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, 1064static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1065 void *daddr, void *saddr, unsigned len) 1065 void *daddr, void *saddr, unsigned len)
1066{ 1066{
1067 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1067 struct ip_tunnel *t = netdev_priv(dev);
1068 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1068 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1069 u16 *p = (u16*)(iph+1); 1069 u16 *p = (u16*)(iph+1);
1070 1070
@@ -1091,7 +1091,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
1091 1091
1092static int ipgre_open(struct net_device *dev) 1092static int ipgre_open(struct net_device *dev)
1093{ 1093{
1094 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1094 struct ip_tunnel *t = netdev_priv(dev);
1095 1095
1096 if (MULTICAST(t->parms.iph.daddr)) { 1096 if (MULTICAST(t->parms.iph.daddr)) {
1097 struct flowi fl = { .oif = t->parms.link, 1097 struct flowi fl = { .oif = t->parms.link,
@@ -1115,7 +1115,7 @@ static int ipgre_open(struct net_device *dev)
1115 1115
1116static int ipgre_close(struct net_device *dev) 1116static int ipgre_close(struct net_device *dev)
1117{ 1117{
1118 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1118 struct ip_tunnel *t = netdev_priv(dev);
1119 if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1119 if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
1120 struct in_device *in_dev = inetdev_by_index(t->mlink); 1120 struct in_device *in_dev = inetdev_by_index(t->mlink);
1121 if (in_dev) { 1121 if (in_dev) {
@@ -1140,7 +1140,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1140 1140
1141 dev->type = ARPHRD_IPGRE; 1141 dev->type = ARPHRD_IPGRE;
1142 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1142 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1143 dev->mtu = 1500 - sizeof(struct iphdr) - 4; 1143 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1144 dev->flags = IFF_NOARP; 1144 dev->flags = IFF_NOARP;
1145 dev->iflink = 0; 1145 dev->iflink = 0;
1146 dev->addr_len = 4; 1146 dev->addr_len = 4;
@@ -1152,10 +1152,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1152 struct ip_tunnel *tunnel; 1152 struct ip_tunnel *tunnel;
1153 struct iphdr *iph; 1153 struct iphdr *iph;
1154 int hlen = LL_MAX_HEADER; 1154 int hlen = LL_MAX_HEADER;
1155 int mtu = 1500; 1155 int mtu = ETH_DATA_LEN;
1156 int addend = sizeof(struct iphdr) + 4; 1156 int addend = sizeof(struct iphdr) + 4;
1157 1157
1158 tunnel = (struct ip_tunnel*)dev->priv; 1158 tunnel = netdev_priv(dev);
1159 iph = &tunnel->parms.iph; 1159 iph = &tunnel->parms.iph;
1160 1160
1161 tunnel->dev = dev; 1161 tunnel->dev = dev;
@@ -1219,7 +1219,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
1219 1219
1220static int __init ipgre_fb_tunnel_init(struct net_device *dev) 1220static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1221{ 1221{
1222 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1222 struct ip_tunnel *tunnel = netdev_priv(dev);
1223 struct iphdr *iph = &tunnel->parms.iph; 1223 struct iphdr *iph = &tunnel->parms.iph;
1224 1224
1225 tunnel->dev = dev; 1225 tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0d..18d7fad474d7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
128#include <linux/sockios.h> 128#include <linux/sockios.h>
129#include <linux/in.h> 129#include <linux/in.h>
130#include <linux/inet.h> 130#include <linux/inet.h>
131#include <linux/inetdevice.h>
131#include <linux/netdevice.h> 132#include <linux/netdevice.h>
132#include <linux/etherdevice.h> 133#include <linux/etherdevice.h>
133 134
@@ -184,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
184 raw_rcv(last, skb2); 185 raw_rcv(last, skb2);
185 } 186 }
186 last = sk; 187 last = sk;
187 nf_reset(skb);
188 } 188 }
189 } 189 }
190 190
@@ -203,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
203 203
204 __skb_pull(skb, ihl); 204 __skb_pull(skb, ihl);
205 205
206 /* Free reference early: we don't need it any more, and it may
207 hold ip_conntrack module loaded indefinitely. */
208 nf_reset(skb);
209
210 /* Point into the IP datagram, just past the header. */ 206 /* Point into the IP datagram, just past the header. */
211 skb->h.raw = skb->data; 207 skb->h.raw = skb->data;
212 208
@@ -231,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
231 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 227 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
232 int ret; 228 int ret;
233 229
234 if (!ipprot->no_policy && 230 if (!ipprot->no_policy) {
235 !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 231 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
236 kfree_skb(skb); 232 kfree_skb(skb);
237 goto out; 233 goto out;
234 }
235 nf_reset(skb);
238 } 236 }
239 ret = ipprot->handler(skb); 237 ret = ipprot->handler(skb);
240 if (ret < 0) { 238 if (ret < 0) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b3..9bebad07bf2e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/capability.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
@@ -22,6 +23,7 @@
22#include <net/sock.h> 23#include <net/sock.h>
23#include <net/ip.h> 24#include <net/ip.h>
24#include <net/icmp.h> 25#include <net/icmp.h>
26#include <net/route.h>
25 27
26/* 28/*
27 * Write options to IP header, record destination address to 29 * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd397..3324fbfe528a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
69#include <net/ip.h> 69#include <net/ip.h>
70#include <net/protocol.h> 70#include <net/protocol.h>
71#include <net/route.h> 71#include <net/route.h>
72#include <net/xfrm.h>
72#include <linux/skbuff.h> 73#include <linux/skbuff.h>
73#include <net/sock.h> 74#include <net/sock.h>
74#include <net/arp.h> 75#include <net/arp.h>
@@ -85,6 +86,8 @@
85 86
86int sysctl_ip_default_ttl = IPDEFTTL; 87int sysctl_ip_default_ttl = IPDEFTTL;
87 88
89static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
90
88/* Generate a checksum for an outgoing IP datagram. */ 91/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph) 92__inline__ void ip_send_check(struct iphdr *iph)
90{ 93{
@@ -202,13 +205,16 @@ static inline int ip_finish_output2(struct sk_buff *skb)
202 205
203static inline int ip_finish_output(struct sk_buff *skb) 206static inline int ip_finish_output(struct sk_buff *skb)
204{ 207{
205 struct net_device *dev = skb->dst->dev; 208#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
206 209 /* Policy lookup after SNAT yielded a new policy */
207 skb->dev = dev; 210 if (skb->dst->xfrm != NULL)
208 skb->protocol = htons(ETH_P_IP); 211 return xfrm4_output_finish(skb);
209 212#endif
210 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, 213 if (skb->len > dst_mtu(skb->dst) &&
211 ip_finish_output2); 214 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
215 return ip_fragment(skb, ip_finish_output2);
216 else
217 return ip_finish_output2(skb);
212} 218}
213 219
214int ip_mc_output(struct sk_buff *skb) 220int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +271,21 @@ int ip_mc_output(struct sk_buff *skb)
265 newskb->dev, ip_dev_loopback_xmit); 271 newskb->dev, ip_dev_loopback_xmit);
266 } 272 }
267 273
268 if (skb->len > dst_mtu(&rt->u.dst)) 274 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
269 return ip_fragment(skb, ip_finish_output); 275 ip_finish_output);
270 else
271 return ip_finish_output(skb);
272} 276}
273 277
274int ip_output(struct sk_buff *skb) 278int ip_output(struct sk_buff *skb)
275{ 279{
280 struct net_device *dev = skb->dst->dev;
281
276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 282 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277 283
278 if (skb->len > dst_mtu(skb->dst) && 284 skb->dev = dev;
279 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) 285 skb->protocol = htons(ETH_P_IP);
280 return ip_fragment(skb, ip_finish_output); 286
281 else 287 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
282 return ip_finish_output(skb); 288 ip_finish_output);
283} 289}
284 290
285int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 291int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -411,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
411 * single device frame, and queue such a frame for sending. 417 * single device frame, and queue such a frame for sending.
412 */ 418 */
413 419
414int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) 420static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
415{ 421{
416 struct iphdr *iph; 422 struct iphdr *iph;
417 int raw = 0; 423 int raw = 0;
@@ -420,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
420 struct sk_buff *skb2; 426 struct sk_buff *skb2;
421 unsigned int mtu, hlen, left, len, ll_rs; 427 unsigned int mtu, hlen, left, len, ll_rs;
422 int offset; 428 int offset;
423 int not_last_frag; 429 __be16 not_last_frag;
424 struct rtable *rt = (struct rtable*)skb->dst; 430 struct rtable *rt = (struct rtable*)skb->dst;
425 int err = 0; 431 int err = 0;
426 432
@@ -445,6 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
445 451
446 hlen = iph->ihl * 4; 452 hlen = iph->ihl * 4;
447 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 453 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
454 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
448 455
449 /* When frag_list is given, use it. First, check its validity: 456 /* When frag_list is given, use it. First, check its validity:
450 * some transformers could create wrong frag_list or break existing 457 * some transformers could create wrong frag_list or break existing
@@ -1181,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
1181 struct ip_options *opt = NULL; 1188 struct ip_options *opt = NULL;
1182 struct rtable *rt = inet->cork.rt; 1189 struct rtable *rt = inet->cork.rt;
1183 struct iphdr *iph; 1190 struct iphdr *iph;
1184 int df = 0; 1191 __be16 df = 0;
1185 __u8 ttl; 1192 __u8 ttl;
1186 int err = 0; 1193 int err = 0;
1187 1194
@@ -1392,7 +1399,6 @@ void __init ip_init(void)
1392#endif 1399#endif
1393} 1400}
1394 1401
1395EXPORT_SYMBOL(ip_fragment);
1396EXPORT_SYMBOL(ip_generic_getfrag); 1402EXPORT_SYMBOL(ip_generic_getfrag);
1397EXPORT_SYMBOL(ip_queue_xmit); 1403EXPORT_SYMBOL(ip_queue_xmit);
1398EXPORT_SYMBOL(ip_send_check); 1404EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d87257309..2bf8d782f678 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <linux/ip.h> 26#include <linux/ip.h>
27#include <linux/icmp.h> 27#include <linux/icmp.h>
28#include <linux/inetdevice.h>
28#include <linux/netdevice.h> 29#include <linux/netdevice.h>
29#include <net/sock.h> 30#include <net/sock.h>
30#include <net/ip.h> 31#include <net/ip.h>
31#include <net/icmp.h> 32#include <net/icmp.h>
32#include <net/tcp.h> 33#include <net/tcp_states.h>
33#include <linux/tcp.h>
34#include <linux/udp.h> 34#include <linux/udp.h>
35#include <linux/igmp.h> 35#include <linux/igmp.h>
36#include <linux/netfilter.h> 36#include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
427 err = ip_options_get_from_user(&opt, optval, optlen); 427 err = ip_options_get_from_user(&opt, optval, optlen);
428 if (err) 428 if (err)
429 break; 429 break;
430 if (sk->sk_type == SOCK_STREAM) { 430 if (inet->is_icsk) {
431 struct tcp_sock *tp = tcp_sk(sk); 431 struct inet_connection_sock *icsk = inet_csk(sk);
432#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 432#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
433 if (sk->sk_family == PF_INET || 433 if (sk->sk_family == PF_INET ||
434 (!((1 << sk->sk_state) & 434 (!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
436 inet->daddr != LOOPBACK4_IPV6)) { 436 inet->daddr != LOOPBACK4_IPV6)) {
437#endif 437#endif
438 if (inet->opt) 438 if (inet->opt)
439 tp->ext_header_len -= inet->opt->optlen; 439 icsk->icsk_ext_hdr_len -= inet->opt->optlen;
440 if (opt) 440 if (opt)
441 tp->ext_header_len += opt->optlen; 441 icsk->icsk_ext_hdr_len += opt->optlen;
442 tcp_sync_mss(sk, tp->pmtu_cookie); 442 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
443#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 443#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
444 } 444 }
445#endif 445#endif
@@ -621,7 +621,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
621 err = -ENOBUFS; 621 err = -ENOBUFS;
622 break; 622 break;
623 } 623 }
624 msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL); 624 msf = kmalloc(optlen, GFP_KERNEL);
625 if (msf == 0) { 625 if (msf == 0) {
626 err = -ENOBUFS; 626 err = -ENOBUFS;
627 break; 627 break;
@@ -778,7 +778,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
778 err = -ENOBUFS; 778 err = -ENOBUFS;
779 break; 779 break;
780 } 780 }
781 gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL); 781 gsf = kmalloc(optlen,GFP_KERNEL);
782 if (gsf == 0) { 782 if (gsf == 0) {
783 err = -ENOBUFS; 783 err = -ENOBUFS;
784 break; 784 break;
@@ -798,7 +798,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
798 goto mc_msf_out; 798 goto mc_msf_out;
799 } 799 }
800 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); 800 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
801 msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL); 801 msf = kmalloc(msize,GFP_KERNEL);
802 if (msf == 0) { 802 if (msf == 0) {
803 err = -ENOBUFS; 803 err = -ENOBUFS;
804 goto mc_msf_out; 804 goto mc_msf_out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b40..d64e2ec8da7b 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29#include <net/icmp.h> 29#include <net/icmp.h>
30#include <net/ipcomp.h> 30#include <net/ipcomp.h>
31#include <net/protocol.h>
31 32
32struct ipcomp_tfms { 33struct ipcomp_tfms {
33 struct list_head list; 34 struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d9..bb3613ec448c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
42#include <linux/in.h> 42#include <linux/in.h>
43#include <linux/if.h> 43#include <linux/if.h>
44#include <linux/inet.h> 44#include <linux/inet.h>
45#include <linux/inetdevice.h>
45#include <linux/netdevice.h> 46#include <linux/netdevice.h>
46#include <linux/if_arp.h> 47#include <linux/if_arp.h>
47#include <linux/skbuff.h> 48#include <linux/skbuff.h>
@@ -58,6 +59,7 @@
58#include <net/arp.h> 59#include <net/arp.h>
59#include <net/ip.h> 60#include <net/ip.h>
60#include <net/ipconfig.h> 61#include <net/ipconfig.h>
62#include <net/route.h>
61 63
62#include <asm/uaccess.h> 64#include <asm/uaccess.h>
63#include <net/checksum.h> 65#include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb04..e5cbe72c6b80 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -93,6 +93,7 @@
93 */ 93 */
94 94
95 95
96#include <linux/capability.h>
96#include <linux/config.h> 97#include <linux/config.h>
97#include <linux/module.h> 98#include <linux/module.h>
98#include <linux/types.h> 99#include <linux/types.h>
@@ -108,6 +109,7 @@
108#include <linux/mroute.h> 109#include <linux/mroute.h>
109#include <linux/init.h> 110#include <linux/init.h>
110#include <linux/netfilter_ipv4.h> 111#include <linux/netfilter_ipv4.h>
112#include <linux/if_ether.h>
111 113
112#include <net/sock.h> 114#include <net/sock.h>
113#include <net/ip.h> 115#include <net/ip.h>
@@ -243,7 +245,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
243 if (dev == NULL) 245 if (dev == NULL)
244 return NULL; 246 return NULL;
245 247
246 nt = dev->priv; 248 nt = netdev_priv(dev);
247 SET_MODULE_OWNER(dev); 249 SET_MODULE_OWNER(dev);
248 dev->init = ipip_tunnel_init; 250 dev->init = ipip_tunnel_init;
249 nt->parms = *parms; 251 nt->parms = *parms;
@@ -268,7 +270,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
268 tunnels_wc[0] = NULL; 270 tunnels_wc[0] = NULL;
269 write_unlock_bh(&ipip_lock); 271 write_unlock_bh(&ipip_lock);
270 } else 272 } else
271 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv); 273 ipip_tunnel_unlink(netdev_priv(dev));
272 dev_put(dev); 274 dev_put(dev);
273} 275}
274 276
@@ -442,7 +444,7 @@ out:
442 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 444 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
443 rel_info = htonl(rel_info); 445 rel_info = htonl(rel_info);
444 } else if (type == ICMP_TIME_EXCEEDED) { 446 } else if (type == ICMP_TIME_EXCEEDED) {
445 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; 447 struct ip_tunnel *t = netdev_priv(skb2->dev);
446 if (t->parms.iph.ttl) { 448 if (t->parms.iph.ttl) {
447 rel_type = ICMP_DEST_UNREACH; 449 rel_type = ICMP_DEST_UNREACH;
448 rel_code = ICMP_HOST_UNREACH; 450 rel_code = ICMP_HOST_UNREACH;
@@ -513,7 +515,7 @@ out:
513 515
514static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 516static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515{ 517{
516 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 518 struct ip_tunnel *tunnel = netdev_priv(dev);
517 struct net_device_stats *stats = &tunnel->stat; 519 struct net_device_stats *stats = &tunnel->stat;
518 struct iphdr *tiph = &tunnel->parms.iph; 520 struct iphdr *tiph = &tunnel->parms.iph;
519 u8 tos = tunnel->parms.iph.tos; 521 u8 tos = tunnel->parms.iph.tos;
@@ -620,6 +622,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
620 skb->h.raw = skb->nh.raw; 622 skb->h.raw = skb->nh.raw;
621 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 623 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 624 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
625 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
623 dst_release(skb->dst); 626 dst_release(skb->dst);
624 skb->dst = &rt->u.dst; 627 skb->dst = &rt->u.dst;
625 628
@@ -672,7 +675,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
672 t = ipip_tunnel_locate(&p, 0); 675 t = ipip_tunnel_locate(&p, 0);
673 } 676 }
674 if (t == NULL) 677 if (t == NULL)
675 t = (struct ip_tunnel*)dev->priv; 678 t = netdev_priv(dev);
676 memcpy(&p, &t->parms, sizeof(p)); 679 memcpy(&p, &t->parms, sizeof(p));
677 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 680 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
678 err = -EFAULT; 681 err = -EFAULT;
@@ -709,7 +712,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
709 err = -EINVAL; 712 err = -EINVAL;
710 break; 713 break;
711 } 714 }
712 t = (struct ip_tunnel*)dev->priv; 715 t = netdev_priv(dev);
713 ipip_tunnel_unlink(t); 716 ipip_tunnel_unlink(t);
714 t->parms.iph.saddr = p.iph.saddr; 717 t->parms.iph.saddr = p.iph.saddr;
715 t->parms.iph.daddr = p.iph.daddr; 718 t->parms.iph.daddr = p.iph.daddr;
@@ -763,7 +766,7 @@ done:
763 766
764static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) 767static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
765{ 768{
766 return &(((struct ip_tunnel*)dev->priv)->stat); 769 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
767} 770}
768 771
769static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 772static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -786,7 +789,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
786 789
787 dev->type = ARPHRD_TUNNEL; 790 dev->type = ARPHRD_TUNNEL;
788 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 791 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
789 dev->mtu = 1500 - sizeof(struct iphdr); 792 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
790 dev->flags = IFF_NOARP; 793 dev->flags = IFF_NOARP;
791 dev->iflink = 0; 794 dev->iflink = 0;
792 dev->addr_len = 4; 795 dev->addr_len = 4;
@@ -798,7 +801,7 @@ static int ipip_tunnel_init(struct net_device *dev)
798 struct ip_tunnel *tunnel; 801 struct ip_tunnel *tunnel;
799 struct iphdr *iph; 802 struct iphdr *iph;
800 803
801 tunnel = (struct ip_tunnel*)dev->priv; 804 tunnel = netdev_priv(dev);
802 iph = &tunnel->parms.iph; 805 iph = &tunnel->parms.iph;
803 806
804 tunnel->dev = dev; 807 tunnel->dev = dev;
@@ -836,7 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev)
836 839
837static int __init ipip_fb_tunnel_init(struct net_device *dev) 840static int __init ipip_fb_tunnel_init(struct net_device *dev)
838{ 841{
839 struct ip_tunnel *tunnel = dev->priv; 842 struct ip_tunnel *tunnel = netdev_priv(dev);
840 struct iphdr *iph = &tunnel->parms.iph; 843 struct iphdr *iph = &tunnel->parms.iph;
841 844
842 tunnel->dev = dev; 845 tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c9..5c94c222e3f3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -33,6 +33,7 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/capability.h>
36#include <linux/errno.h> 37#include <linux/errno.h>
37#include <linux/timer.h> 38#include <linux/timer.h>
38#include <linux/mm.h> 39#include <linux/mm.h>
@@ -49,9 +50,11 @@
49#include <linux/seq_file.h> 50#include <linux/seq_file.h>
50#include <linux/mroute.h> 51#include <linux/mroute.h>
51#include <linux/init.h> 52#include <linux/init.h>
53#include <linux/if_ether.h>
52#include <net/ip.h> 54#include <net/ip.h>
53#include <net/protocol.h> 55#include <net/protocol.h>
54#include <linux/skbuff.h> 56#include <linux/skbuff.h>
57#include <net/route.h>
55#include <net/sock.h> 58#include <net/sock.h>
56#include <net/icmp.h> 59#include <net/icmp.h>
57#include <net/udp.h> 60#include <net/udp.h>
@@ -176,8 +179,8 @@ static int reg_vif_num = -1;
176static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 179static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
177{ 180{
178 read_lock(&mrt_lock); 181 read_lock(&mrt_lock);
179 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; 182 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
180 ((struct net_device_stats*)dev->priv)->tx_packets++; 183 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
181 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 184 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
182 read_unlock(&mrt_lock); 185 read_unlock(&mrt_lock);
183 kfree_skb(skb); 186 kfree_skb(skb);
@@ -186,13 +189,13 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
186 189
187static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) 190static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
188{ 191{
189 return (struct net_device_stats*)dev->priv; 192 return (struct net_device_stats*)netdev_priv(dev);
190} 193}
191 194
192static void reg_vif_setup(struct net_device *dev) 195static void reg_vif_setup(struct net_device *dev)
193{ 196{
194 dev->type = ARPHRD_PIMREG; 197 dev->type = ARPHRD_PIMREG;
195 dev->mtu = 1500 - sizeof(struct iphdr) - 8; 198 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
196 dev->flags = IFF_NOARP; 199 dev->flags = IFF_NOARP;
197 dev->hard_start_xmit = reg_vif_xmit; 200 dev->hard_start_xmit = reg_vif_xmit;
198 dev->get_stats = reg_vif_get_stats; 201 dev->get_stats = reg_vif_get_stats;
@@ -1147,8 +1150,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1147 if (vif->flags & VIFF_REGISTER) { 1150 if (vif->flags & VIFF_REGISTER) {
1148 vif->pkt_out++; 1151 vif->pkt_out++;
1149 vif->bytes_out+=skb->len; 1152 vif->bytes_out+=skb->len;
1150 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; 1153 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1151 ((struct net_device_stats*)vif->dev->priv)->tx_packets++; 1154 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1152 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1155 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1153 kfree_skb(skb); 1156 kfree_skb(skb);
1154 return; 1157 return;
@@ -1208,8 +1211,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1208 if (vif->flags & VIFF_TUNNEL) { 1211 if (vif->flags & VIFF_TUNNEL) {
1209 ip_encap(skb, vif->local, vif->remote); 1212 ip_encap(skb, vif->local, vif->remote);
1210 /* FIXME: extra output firewall step used to be here. --RR */ 1213 /* FIXME: extra output firewall step used to be here. --RR */
1211 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; 1214 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1212 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len; 1215 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1213 } 1216 }
1214 1217
1215 IPCB(skb)->flags |= IPSKB_FORWARDED; 1218 IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1465,8 +1468,8 @@ int pim_rcv_v1(struct sk_buff * skb)
1465 skb->pkt_type = PACKET_HOST; 1468 skb->pkt_type = PACKET_HOST;
1466 dst_release(skb->dst); 1469 dst_release(skb->dst);
1467 skb->dst = NULL; 1470 skb->dst = NULL;
1468 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1471 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1469 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1472 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1470 nf_reset(skb); 1473 nf_reset(skb);
1471 netif_rx(skb); 1474 netif_rx(skb);
1472 dev_put(reg_dev); 1475 dev_put(reg_dev);
@@ -1520,8 +1523,8 @@ static int pim_rcv(struct sk_buff * skb)
1520 skb->ip_summed = 0; 1523 skb->ip_summed = 0;
1521 skb->pkt_type = PACKET_HOST; 1524 skb->pkt_type = PACKET_HOST;
1522 dst_release(skb->dst); 1525 dst_release(skb->dst);
1523 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1526 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1524 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1527 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1525 skb->dst = NULL; 1528 skb->dst = NULL;
1526 nf_reset(skb); 1529 nf_reset(skb);
1527 netif_rx(skb); 1530 netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c2..9b176a942ac5 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
224} 224}
225 225
226 226
227#if 0000
228/*
229 * Get reference to app by name (called from user context)
230 */
231struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
232{
233 struct ip_vs_app *app, *a = NULL;
234
235 down(&__ip_vs_app_mutex);
236
237 list_for_each_entry(ent, &ip_vs_app_list, a_list) {
238 if (strcmp(app->name, appname))
239 continue;
240
241 /* softirq may call ip_vs_app_get too, so the caller
242 must disable softirq on the current CPU */
243 if (ip_vs_app_get(app))
244 a = app;
245 break;
246 }
247
248 up(&__ip_vs_app_mutex);
249
250 return a;
251}
252#endif
253
254
255/* 227/*
256 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 228 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
257 */ 229 */
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c655..87b83813cf2c 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/interrupt.h>
28#include <linux/in.h>
29#include <linux/net.h>
27#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/module.h>
28#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
29#include <linux/proc_fs.h> /* for proc_net_* */ 33#include <linux/proc_fs.h> /* for proc_net_* */
30#include <linux/seq_file.h> 34#include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
219 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) 223 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
220 cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); 224 cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
221 225
222 IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 226 IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
223 ip_vs_proto_name(protocol), 227 ip_vs_proto_name(protocol),
224 NIPQUAD(s_addr), ntohs(s_port), 228 NIPQUAD(s_addr), ntohs(s_port),
225 NIPQUAD(d_addr), ntohs(d_port), 229 NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
254 out: 258 out:
255 ct_read_unlock(hash); 259 ct_read_unlock(hash);
256 260
257 IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 261 IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
258 ip_vs_proto_name(protocol), 262 ip_vs_proto_name(protocol),
259 NIPQUAD(s_addr), ntohs(s_port), 263 NIPQUAD(s_addr), ntohs(s_port),
260 NIPQUAD(d_addr), ntohs(d_port), 264 NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
295 299
296 ct_read_unlock(hash); 300 ct_read_unlock(hash);
297 301
298 IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 302 IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
299 ip_vs_proto_name(protocol), 303 ip_vs_proto_name(protocol),
300 NIPQUAD(s_addr), ntohs(s_port), 304 NIPQUAD(s_addr), ntohs(s_port),
301 NIPQUAD(d_addr), ntohs(d_port), 305 NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
391 cp->flags |= atomic_read(&dest->conn_flags); 395 cp->flags |= atomic_read(&dest->conn_flags);
392 cp->dest = dest; 396 cp->dest = dest;
393 397
394 IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 398 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
395 "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", 399 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
400 "dest->refcnt:%d\n",
396 ip_vs_proto_name(cp->protocol), 401 ip_vs_proto_name(cp->protocol),
397 NIPQUAD(cp->caddr), ntohs(cp->cport), 402 NIPQUAD(cp->caddr), ntohs(cp->cport),
398 NIPQUAD(cp->vaddr), ntohs(cp->vport), 403 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
430 if (!dest) 435 if (!dest)
431 return; 436 return;
432 437
433 IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 438 IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
434 "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", 439 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
440 "dest->refcnt:%d\n",
435 ip_vs_proto_name(cp->protocol), 441 ip_vs_proto_name(cp->protocol),
436 NIPQUAD(cp->caddr), ntohs(cp->cport), 442 NIPQUAD(cp->caddr), ntohs(cp->cport),
437 NIPQUAD(cp->vaddr), ntohs(cp->vport), 443 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
571 ip_vs_conn_hash(cp); 577 ip_vs_conn_hash(cp);
572 578
573 expire_later: 579 expire_later:
574 IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n", 580 IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
575 atomic_read(&cp->refcnt)-1, 581 atomic_read(&cp->refcnt)-1,
576 atomic_read(&cp->n_control)); 582 atomic_read(&cp->n_control));
577 583
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a9..3f47ad8e1cad 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
426 return NULL; 426 return NULL;
427 427
428 IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " 428 IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
429 "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n", 429 "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
430 ip_vs_fwd_tag(cp), 430 ip_vs_fwd_tag(cp),
431 NIPQUAD(cp->caddr), ntohs(cp->cport), 431 NIPQUAD(cp->caddr), ntohs(cp->cport),
432 NIPQUAD(cp->vaddr), ntohs(cp->vport), 432 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
532{ 532{
533 if (!((*pskb)->ipvs_property)) 533 if (!((*pskb)->ipvs_property))
534 return NF_ACCEPT; 534 return NF_ACCEPT;
535
536 /* The packet was sent from IPVS, exit this chain */ 535 /* The packet was sent from IPVS, exit this chain */
537 (*okfn)(*pskb); 536 return NF_STOP;
538
539 return NF_STOLEN;
540} 537}
541 538
542u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 539u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760e..7f0288b25fa1 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -23,6 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/capability.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27#include <linux/sysctl.h> 28#include <linux/sysctl.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
@@ -35,6 +36,7 @@
35#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
36 37
37#include <net/ip.h> 38#include <net/ip.h>
39#include <net/route.h>
38#include <net/sock.h> 40#include <net/sock.h>
39 41
40#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -447,7 +449,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
447 out: 449 out:
448 read_unlock(&__ip_vs_svc_lock); 450 read_unlock(&__ip_vs_svc_lock);
449 451
450 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", 452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol), 453 fwmark, ip_vs_proto_name(protocol),
452 NIPQUAD(vaddr), ntohs(vport), 454 NIPQUAD(vaddr), ntohs(vport),
453 svc?"hit":"not hit"); 455 svc?"hit":"not hit");
@@ -597,7 +599,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
597 */ 599 */
598 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 600 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
599 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " 601 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
600 "refcnt=%d\n", 602 "dest->refcnt=%d\n",
601 dest->vfwmark, 603 dest->vfwmark,
602 NIPQUAD(dest->addr), ntohs(dest->port), 604 NIPQUAD(dest->addr), ntohs(dest->port),
603 atomic_read(&dest->refcnt)); 605 atomic_read(&dest->refcnt));
@@ -804,7 +806,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
804 dest = ip_vs_trash_get_dest(svc, daddr, dport); 806 dest = ip_vs_trash_get_dest(svc, daddr, dport);
805 if (dest != NULL) { 807 if (dest != NULL) {
806 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " 808 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
807 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n", 809 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
808 NIPQUAD(daddr), ntohs(dport), 810 NIPQUAD(daddr), ntohs(dport),
809 atomic_read(&dest->refcnt), 811 atomic_read(&dest->refcnt),
810 dest->vfwmark, 812 dest->vfwmark,
@@ -949,7 +951,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
949 atomic_dec(&dest->svc->refcnt); 951 atomic_dec(&dest->svc->refcnt);
950 kfree(dest); 952 kfree(dest);
951 } else { 953 } else {
952 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n", 954 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
955 "dest->refcnt=%d\n",
953 NIPQUAD(dest->addr), ntohs(dest->port), 956 NIPQUAD(dest->addr), ntohs(dest->port),
954 atomic_read(&dest->refcnt)); 957 atomic_read(&dest->refcnt));
955 list_add(&dest->n_list, &ip_vs_dest_trash); 958 list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce93..9fee19c4c617 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
37 * 37 *
38 */ 38 */
39 39
40#include <linux/ip.h>
40#include <linux/module.h> 41#include <linux/module.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
43#include <linux/skbuff.h>
42 44
43#include <net/ip_vs.h> 45#include <net/ip_vs.h>
44 46
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa1..c453e1e57f4b 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
13 * Changes: 13 * Changes:
14 * 14 *
15 */ 15 */
16#include <linux/config.h>
16#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/jiffies.h>
19#include <linux/slab.h>
17#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/interrupt.h>
18 22
19#include <net/ip_vs.h> 23#include <net/ip_vs.h>
20 24
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa8..6e5cb92a5c83 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
41 * me to write this module. 41 * me to write this module.
42 */ 42 */
43 43
44#include <linux/ip.h>
44#include <linux/module.h> 45#include <linux/module.h>
45#include <linux/kernel.h> 46#include <linux/kernel.h>
47#include <linux/skbuff.h>
46 48
47/* for sysctl */ 49/* for sysctl */
48#include <linux/fs.h> 50#include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
228} 230}
229 231
230 232
231#if 0000
232/*
233 * Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
234 * returns bool success.
235 */
236static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
237 struct ip_vs_lblc_entry *en)
238{
239 if (list_empty(&en->list)) {
240 IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
241 "called from %p\n", __builtin_return_address(0));
242 return 0;
243 }
244
245 /*
246 * Remove it from the table
247 */
248 write_lock(&tbl->lock);
249 list_del(&en->list);
250 INIT_LIST_HEAD(&en->list);
251 write_unlock(&tbl->lock);
252
253 return 1;
254}
255#endif
256
257
258/* 233/*
259 * Get ip_vs_lblc_entry associated with supplied parameters. 234 * Get ip_vs_lblc_entry associated with supplied parameters.
260 */ 235 */
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a5..32ba37ba72d8 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
39 * 39 *
40 */ 40 */
41 41
42#include <linux/ip.h>
42#include <linux/module.h> 43#include <linux/module.h>
43#include <linux/kernel.h> 44#include <linux/kernel.h>
45#include <linux/skbuff.h>
44 46
45/* for sysctl */ 47/* for sysctl */
46#include <linux/fs.h> 48#include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
414} 416}
415 417
416 418
417#if 0000
418/*
419 * Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
420 * returns bool success.
421 */
422static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
423 struct ip_vs_lblcr_entry *en)
424{
425 if (list_empty(&en->list)) {
426 IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
427 "called from %p\n", __builtin_return_address(0));
428 return 0;
429 }
430
431 /*
432 * Remove it from the table
433 */
434 write_lock(&tbl->lock);
435 list_del(&en->list);
436 INIT_LIST_HEAD(&en->list);
437 write_unlock(&tbl->lock);
438
439 return 1;
440}
441#endif
442
443
444/* 419/*
445 * Get ip_vs_lblcr_entry associated with supplied parameters. 420 * Get ip_vs_lblcr_entry associated with supplied parameters.
446 */ 421 */
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd7..8b0505b09317 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/in.h>
16#include <linux/ip.h>
15#include <linux/module.h> 17#include <linux/module.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
17#include <linux/netfilter.h> 19#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8e..c36ccf057a19 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/in.h>
16#include <linux/ip.h>
15#include <linux/module.h> 17#include <linux/module.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
17#include <linux/netfilter.h> 19#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215c..bc28b1160a3a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
275 [IP_VS_TCP_S_LAST] = 2*HZ, 275 [IP_VS_TCP_S_LAST] = 2*HZ,
276}; 276};
277 277
278
279#if 0
280
281/* FIXME: This is going to die */
282
283static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
284 [IP_VS_TCP_S_NONE] = 2*HZ,
285 [IP_VS_TCP_S_ESTABLISHED] = 8*60*HZ,
286 [IP_VS_TCP_S_SYN_SENT] = 60*HZ,
287 [IP_VS_TCP_S_SYN_RECV] = 10*HZ,
288 [IP_VS_TCP_S_FIN_WAIT] = 60*HZ,
289 [IP_VS_TCP_S_TIME_WAIT] = 60*HZ,
290 [IP_VS_TCP_S_CLOSE] = 10*HZ,
291 [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
292 [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
293 [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
294 [IP_VS_TCP_S_SYNACK] = 100*HZ,
295 [IP_VS_TCP_S_LAST] = 2*HZ,
296};
297
298#endif
299
300static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { 278static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
301 [IP_VS_TCP_S_NONE] = "NONE", 279 [IP_VS_TCP_S_NONE] = "NONE",
302 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED", 280 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
448 struct ip_vs_dest *dest = cp->dest; 426 struct ip_vs_dest *dest = cp->dest;
449 427
450 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" 428 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
451 "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n", 429 "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
452 pp->name, 430 pp->name,
453 (state_off==TCP_DIR_OUTPUT)?"output ":"input ", 431 (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
454 th->syn? 'S' : '.', 432 th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aefa..89d9175d8f28 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
15 * 15 *
16 */ 16 */
17 17
18#include <linux/in.h>
19#include <linux/ip.h>
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/netfilter_ipv4.h> 21#include <linux/netfilter_ipv4.h>
22#include <linux/udp.h>
20 23
21#include <net/ip_vs.h> 24#include <net/ip_vs.h>
22 25
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225bd..8bc42b76223d 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/sched.h> 23#include <linux/sched.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/interrupt.h>
25#include <asm/string.h> 26#include <asm/string.h>
26#include <linux/kmod.h> 27#include <linux/kmod.h>
27 28
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a39..7775e6cc68be 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
34 * 34 *
35 */ 35 */
36 36
37#include <linux/ip.h>
37#include <linux/module.h> 38#include <linux/module.h>
38#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/skbuff.h>
39 41
40#include <net/ip_vs.h> 42#include <net/ip_vs.h>
41 43
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d8062..1bca714bda3d 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
21 21
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/inetdevice.h>
24#include <linux/net.h> 25#include <linux/net.h>
25#include <linux/completion.h> 26#include <linux/completion.h>
26#include <linux/delay.h> 27#include <linux/delay.h>
27#include <linux/skbuff.h> 28#include <linux/skbuff.h>
28#include <linux/in.h> 29#include <linux/in.h>
29#include <linux/igmp.h> /* for ip_mc_join_group */ 30#include <linux/igmp.h> /* for ip_mc_join_group */
31#include <linux/udp.h>
30 32
31#include <net/ip.h> 33#include <net/ip.h>
32#include <net/sock.h> 34#include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049cf..52c12e9edbbc 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
322 struct net_device *tdev; /* Device to other host */ 322 struct net_device *tdev; /* Device to other host */
323 struct iphdr *old_iph = skb->nh.iph; 323 struct iphdr *old_iph = skb->nh.iph;
324 u8 tos = old_iph->tos; 324 u8 tos = old_iph->tos;
325 u16 df = old_iph->frag_off; 325 __be16 df = old_iph->frag_off;
326 struct iphdr *iph; /* Our new IP header */ 326 struct iphdr *iph; /* Our new IP header */
327 int max_headroom; /* The extra header space needed */ 327 int max_headroom; /* The extra header space needed */
328 int mtu; 328 int mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5d..52a3d7c57907 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,17 +1,11 @@
1/* IPv4 specific functions of netfilter core */ 1/* IPv4 specific functions of netfilter core */
2
3#include <linux/config.h>
4#ifdef CONFIG_NETFILTER
5
6#include <linux/kernel.h> 2#include <linux/kernel.h>
7#include <linux/netfilter.h> 3#include <linux/netfilter.h>
8#include <linux/netfilter_ipv4.h> 4#include <linux/netfilter_ipv4.h>
9
10#include <linux/tcp.h>
11#include <linux/udp.h>
12#include <linux/icmp.h>
13#include <net/route.h>
14#include <linux/ip.h> 5#include <linux/ip.h>
6#include <net/route.h>
7#include <net/xfrm.h>
8#include <net/ip.h>
15 9
16/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
17int ip_route_me_harder(struct sk_buff **pskb) 11int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +27,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
33#ifdef CONFIG_IP_ROUTE_FWMARK 27#ifdef CONFIG_IP_ROUTE_FWMARK
34 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; 28 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
35#endif 29#endif
36 fl.proto = iph->protocol;
37 if (ip_route_output_key(&rt, &fl) != 0) 30 if (ip_route_output_key(&rt, &fl) != 0)
38 return -1; 31 return -1;
39 32
@@ -60,6 +53,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
60 if ((*pskb)->dst->error) 53 if ((*pskb)->dst->error)
61 return -1; 54 return -1;
62 55
56#ifdef CONFIG_XFRM
57 if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
58 xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
59 if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
60 return -1;
61#endif
62
63 /* Change in oif may mean change in hh_len. */ 63 /* Change in oif may mean change in hh_len. */
64 hh_len = (*pskb)->dst->dev->hard_header_len; 64 hh_len = (*pskb)->dst->dev->hard_header_len;
65 if (skb_headroom(*pskb) < hh_len) { 65 if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +78,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
78} 78}
79EXPORT_SYMBOL(ip_route_me_harder); 79EXPORT_SYMBOL(ip_route_me_harder);
80 80
81void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
82EXPORT_SYMBOL(ip_nat_decode_session);
83
81/* 84/*
82 * Extra routing may needed on local out, as the QUEUE target never 85 * Extra routing may needed on local out, as the QUEUE target never
83 * returns control to the table. 86 * returns control to the table.
@@ -135,5 +138,3 @@ static void fini(void)
135 138
136module_init(init); 139module_init(init);
137module_exit(fini); 140module_exit(fini);
138
139#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 88a60650e6b8..db783036e4d8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -182,6 +182,7 @@ config IP_NF_QUEUE
182 182
183config IP_NF_IPTABLES 183config IP_NF_IPTABLES
184 tristate "IP tables support (required for filtering/masq/NAT)" 184 tristate "IP tables support (required for filtering/masq/NAT)"
185 depends on NETFILTER_XTABLES
185 help 186 help
186 iptables is a general, extensible packet identification framework. 187 iptables is a general, extensible packet identification framework.
187 The packet filtering and full NAT (masquerading, port forwarding, 188 The packet filtering and full NAT (masquerading, port forwarding,
@@ -191,16 +192,6 @@ config IP_NF_IPTABLES
191 To compile it as a module, choose M here. If unsure, say N. 192 To compile it as a module, choose M here. If unsure, say N.
192 193
193# The matches. 194# The matches.
194config IP_NF_MATCH_LIMIT
195 tristate "limit match support"
196 depends on IP_NF_IPTABLES
197 help
198 limit matching allows you to control the rate at which a rule can be
199 matched: mainly useful in combination with the LOG target ("LOG
200 target support", below) and to avoid some Denial of Service attacks.
201
202 To compile it as a module, choose M here. If unsure, say N.
203
204config IP_NF_MATCH_IPRANGE 195config IP_NF_MATCH_IPRANGE
205 tristate "IP range match support" 196 tristate "IP range match support"
206 depends on IP_NF_IPTABLES 197 depends on IP_NF_IPTABLES
@@ -210,37 +201,6 @@ config IP_NF_MATCH_IPRANGE
210 201
211 To compile it as a module, choose M here. If unsure, say N. 202 To compile it as a module, choose M here. If unsure, say N.
212 203
213config IP_NF_MATCH_MAC
214 tristate "MAC address match support"
215 depends on IP_NF_IPTABLES
216 help
217 MAC matching allows you to match packets based on the source
218 Ethernet address of the packet.
219
220 To compile it as a module, choose M here. If unsure, say N.
221
222config IP_NF_MATCH_PKTTYPE
223 tristate "Packet type match support"
224 depends on IP_NF_IPTABLES
225 help
226 Packet type matching allows you to match a packet by
227 its "class", eg. BROADCAST, MULTICAST, ...
228
229 Typical usage:
230 iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
231
232 To compile it as a module, choose M here. If unsure, say N.
233
234config IP_NF_MATCH_MARK
235 tristate "netfilter MARK match support"
236 depends on IP_NF_IPTABLES
237 help
238 Netfilter mark matching allows you to match packets based on the
239 `nfmark' value in the packet. This can be set by the MARK target
240 (see below).
241
242 To compile it as a module, choose M here. If unsure, say N.
243
244config IP_NF_MATCH_MULTIPORT 204config IP_NF_MATCH_MULTIPORT
245 tristate "Multiple port match support" 205 tristate "Multiple port match support"
246 depends on IP_NF_IPTABLES 206 depends on IP_NF_IPTABLES
@@ -301,15 +261,6 @@ config IP_NF_MATCH_AH_ESP
301 261
302 To compile it as a module, choose M here. If unsure, say N. 262 To compile it as a module, choose M here. If unsure, say N.
303 263
304config IP_NF_MATCH_LENGTH
305 tristate "LENGTH match support"
306 depends on IP_NF_IPTABLES
307 help
308 This option allows you to match the length of a packet against a
309 specific value or range of values.
310
311 To compile it as a module, choose M here. If unsure, say N.
312
313config IP_NF_MATCH_TTL 264config IP_NF_MATCH_TTL
314 tristate "TTL match support" 265 tristate "TTL match support"
315 depends on IP_NF_IPTABLES 266 depends on IP_NF_IPTABLES
@@ -319,50 +270,6 @@ config IP_NF_MATCH_TTL
319 270
320 To compile it as a module, choose M here. If unsure, say N. 271 To compile it as a module, choose M here. If unsure, say N.
321 272
322config IP_NF_MATCH_TCPMSS
323 tristate "tcpmss match support"
324 depends on IP_NF_IPTABLES
325 help
326 This option adds a `tcpmss' match, which allows you to examine the
327 MSS value of TCP SYN packets, which control the maximum packet size
328 for that connection.
329
330 To compile it as a module, choose M here. If unsure, say N.
331
332config IP_NF_MATCH_HELPER
333 tristate "Helper match support"
334 depends on IP_NF_IPTABLES
335 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
336 help
337 Helper matching allows you to match packets in dynamic connections
338 tracked by a conntrack-helper, ie. ip_conntrack_ftp
339
340 To compile it as a module, choose M here. If unsure, say Y.
341
342config IP_NF_MATCH_STATE
343 tristate "Connection state match support"
344 depends on IP_NF_IPTABLES
345 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
346 help
347 Connection state matching allows you to match packets based on their
348 relationship to a tracked connection (ie. previous packets). This
349 is a powerful tool for packet classification.
350
351 To compile it as a module, choose M here. If unsure, say N.
352
353config IP_NF_MATCH_CONNTRACK
354 tristate "Connection tracking match support"
355 depends on IP_NF_IPTABLES
356 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
357 help
358 This is a general conntrack match module, a superset of the state match.
359
360 It allows matching on additional conntrack information, which is
361 useful in complex configurations, such as NAT gateways with multiple
362 internet links or tunnels.
363
364 To compile it as a module, choose M here. If unsure, say N.
365
366config IP_NF_MATCH_OWNER 273config IP_NF_MATCH_OWNER
367 tristate "Owner match support" 274 tristate "Owner match support"
368 depends on IP_NF_IPTABLES 275 depends on IP_NF_IPTABLES
@@ -372,15 +279,6 @@ config IP_NF_MATCH_OWNER
372 279
373 To compile it as a module, choose M here. If unsure, say N. 280 To compile it as a module, choose M here. If unsure, say N.
374 281
375config IP_NF_MATCH_PHYSDEV
376 tristate "Physdev match support"
377 depends on IP_NF_IPTABLES && BRIDGE_NETFILTER
378 help
379 Physdev packet matching matches against the physical bridge ports
380 the IP packet arrived on or will leave by.
381
382 To compile it as a module, choose M here. If unsure, say N.
383
384config IP_NF_MATCH_ADDRTYPE 282config IP_NF_MATCH_ADDRTYPE
385 tristate 'address type match support' 283 tristate 'address type match support'
386 depends on IP_NF_IPTABLES 284 depends on IP_NF_IPTABLES
@@ -391,75 +289,6 @@ config IP_NF_MATCH_ADDRTYPE
391 If you want to compile it as a module, say M here and read 289 If you want to compile it as a module, say M here and read
392 <file:Documentation/modules.txt>. If unsure, say `N'. 290 <file:Documentation/modules.txt>. If unsure, say `N'.
393 291
394config IP_NF_MATCH_REALM
395 tristate 'realm match support'
396 depends on IP_NF_IPTABLES
397 select NET_CLS_ROUTE
398 help
399 This option adds a `realm' match, which allows you to use the realm
400 key from the routing subsystem inside iptables.
401
402 This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
403 in tc world.
404
405 If you want to compile it as a module, say M here and read
406 <file:Documentation/modules.txt>. If unsure, say `N'.
407
408config IP_NF_MATCH_SCTP
409 tristate 'SCTP protocol match support'
410 depends on IP_NF_IPTABLES
411 help
412 With this option enabled, you will be able to use the iptables
413 `sctp' match in order to match on SCTP source/destination ports
414 and SCTP chunk types.
415
416 If you want to compile it as a module, say M here and read
417 <file:Documentation/modules.txt>. If unsure, say `N'.
418
419config IP_NF_MATCH_DCCP
420 tristate 'DCCP protocol match support'
421 depends on IP_NF_IPTABLES
422 help
423 With this option enabled, you will be able to use the iptables
424 `dccp' match in order to match on DCCP source/destination ports
425 and DCCP flags.
426
427 If you want to compile it as a module, say M here and read
428 <file:Documentation/modules.txt>. If unsure, say `N'.
429
430config IP_NF_MATCH_COMMENT
431 tristate 'comment match support'
432 depends on IP_NF_IPTABLES
433 help
434 This option adds a `comment' dummy-match, which allows you to put
435 comments in your iptables ruleset.
436
437 If you want to compile it as a module, say M here and read
438 <file:Documentation/modules.txt>. If unsure, say `N'.
439
440config IP_NF_MATCH_CONNMARK
441 tristate 'Connection mark match support'
442 depends on IP_NF_IPTABLES
443 depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
444 help
445 This option adds a `connmark' match, which allows you to match the
446 connection mark value previously set for the session by `CONNMARK'.
447
448 If you want to compile it as a module, say M here and read
449 <file:Documentation/modules.txt>. The module will be called
450 ipt_connmark.o. If unsure, say `N'.
451
452config IP_NF_MATCH_CONNBYTES
453 tristate 'Connection byte/packet counter match support'
454 depends on IP_NF_IPTABLES
455 depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
456 help
457 This option adds a `connbytes' match, which allows you to match the
458 number of bytes and/or packets for each direction within a connection.
459
460 If you want to compile it as a module, say M here and read
461 <file:Documentation/modules.txt>. If unsure, say `N'.
462
463config IP_NF_MATCH_HASHLIMIT 292config IP_NF_MATCH_HASHLIMIT
464 tristate 'hashlimit match support' 293 tristate 'hashlimit match support'
465 depends on IP_NF_IPTABLES 294 depends on IP_NF_IPTABLES
@@ -474,18 +303,15 @@ config IP_NF_MATCH_HASHLIMIT
474 destination IP' or `500pps from any given source IP' with a single 303 destination IP' or `500pps from any given source IP' with a single
475 IPtables rule. 304 IPtables rule.
476 305
477config IP_NF_MATCH_STRING 306config IP_NF_MATCH_POLICY
478 tristate 'string match support' 307 tristate "IPsec policy match support"
479 depends on IP_NF_IPTABLES 308 depends on IP_NF_IPTABLES && XFRM
480 select TEXTSEARCH 309 help
481 select TEXTSEARCH_KMP 310 Policy matching allows you to match packets based on the
482 select TEXTSEARCH_BM 311 IPsec policy that was used during decapsulation/will
483 select TEXTSEARCH_FSM 312 be used during encapsulation.
484 help
485 This option adds a `string' match, which allows you to look for
486 pattern matchings in packets.
487 313
488 To compile it as a module, choose M here. If unsure, say N. 314 To compile it as a module, choose M here. If unsure, say N.
489 315
490# `filter', generic and specific targets 316# `filter', generic and specific targets
491config IP_NF_FILTER 317config IP_NF_FILTER
@@ -562,17 +388,6 @@ config IP_NF_TARGET_TCPMSS
562 388
563 To compile it as a module, choose M here. If unsure, say N. 389 To compile it as a module, choose M here. If unsure, say N.
564 390
565config IP_NF_TARGET_NFQUEUE
566 tristate "NFQUEUE Target Support"
567 depends on IP_NF_IPTABLES
568 help
569 This Target replaced the old obsolete QUEUE target.
570
571 As opposed to QUEUE, it supports 65535 different queues,
572 not just one.
573
574 To compile it as a module, choose M here. If unsure, say N.
575
576# NAT + specific targets 391# NAT + specific targets
577config IP_NF_NAT 392config IP_NF_NAT
578 tristate "Full NAT" 393 tristate "Full NAT"
@@ -725,31 +540,6 @@ config IP_NF_TARGET_DSCP
725 540
726 To compile it as a module, choose M here. If unsure, say N. 541 To compile it as a module, choose M here. If unsure, say N.
727 542
728config IP_NF_TARGET_MARK
729 tristate "MARK target support"
730 depends on IP_NF_MANGLE
731 help
732 This option adds a `MARK' target, which allows you to create rules
733 in the `mangle' table which alter the netfilter mark (nfmark) field
734 associated with the packet prior to routing. This can change
735 the routing method (see `Use netfilter MARK value as routing
736 key') and can also be used by other subsystems to change their
737 behavior.
738
739 To compile it as a module, choose M here. If unsure, say N.
740
741config IP_NF_TARGET_CLASSIFY
742 tristate "CLASSIFY target support"
743 depends on IP_NF_MANGLE
744 help
745 This option adds a `CLASSIFY' target, which enables the user to set
746 the priority of a packet. Some qdiscs can use this value for
747 classification, among these are:
748
749 atm, cbq, dsmark, pfifo_fast, htb, prio
750
751 To compile it as a module, choose M here. If unsure, say N.
752
753config IP_NF_TARGET_TTL 543config IP_NF_TARGET_TTL
754 tristate 'TTL target support' 544 tristate 'TTL target support'
755 depends on IP_NF_MANGLE 545 depends on IP_NF_MANGLE
@@ -764,19 +554,6 @@ config IP_NF_TARGET_TTL
764 554
765 To compile it as a module, choose M here. If unsure, say N. 555 To compile it as a module, choose M here. If unsure, say N.
766 556
767config IP_NF_TARGET_CONNMARK
768 tristate 'CONNMARK target support'
769 depends on IP_NF_MANGLE
770 depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
771 help
772 This option adds a `CONNMARK' target, which allows one to manipulate
773 the connection mark value. Similar to the MARK target, but
774 affects the connection mark value rather than the packet mark value.
775
776 If you want to compile it as a module, say M here and read
777 <file:Documentation/modules.txt>. The module will be called
778 ipt_CONNMARK.o. If unsure, say `N'.
779
780config IP_NF_TARGET_CLUSTERIP 557config IP_NF_TARGET_CLUSTERIP
781 tristate "CLUSTERIP target support (EXPERIMENTAL)" 558 tristate "CLUSTERIP target support (EXPERIMENTAL)"
782 depends on IP_NF_MANGLE && EXPERIMENTAL 559 depends on IP_NF_MANGLE && EXPERIMENTAL
@@ -800,23 +577,10 @@ config IP_NF_RAW
800 If you want to compile it as a module, say M here and read 577 If you want to compile it as a module, say M here and read
801 <file:Documentation/modules.txt>. If unsure, say `N'. 578 <file:Documentation/modules.txt>. If unsure, say `N'.
802 579
803config IP_NF_TARGET_NOTRACK
804 tristate 'NOTRACK target support'
805 depends on IP_NF_RAW
806 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
807 help
808 The NOTRACK target allows a select rule to specify
809 which packets *not* to enter the conntrack/NAT
810 subsystem with all the consequences (no ICMP error tracking,
811 no protocol helpers for the selected packets).
812
813 If you want to compile it as a module, say M here and read
814 <file:Documentation/modules.txt>. If unsure, say `N'.
815
816
817# ARP tables 580# ARP tables
818config IP_NF_ARPTABLES 581config IP_NF_ARPTABLES
819 tristate "ARP tables support" 582 tristate "ARP tables support"
583 depends on NETFILTER_XTABLES
820 help 584 help
821 arptables is a general, extensible packet identification framework. 585 arptables is a general, extensible packet identification framework.
822 The ARP packet filtering and mangling (manipulation)subsystems 586 The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d0a447e520a2..e5c5b3202f02 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,15 +46,8 @@ obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
46obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 46obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
47 47
48# matches 48# matches
49obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
50obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
51obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o 49obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
52obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
53obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
54obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
55obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
56obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o 50obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
57obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
58obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o 51obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
59obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o 52obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
60obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o 53obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
@@ -62,39 +55,25 @@ obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
62obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 55obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
63obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o 56obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
64obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o 57obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
65obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
66obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o 58obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
67obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
68obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
69obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
70obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
71obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
72obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
73obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 59obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
74obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o 60obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
75obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
76obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
77 61
78# targets 62# targets
79obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 63obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
80obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o 64obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
81obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 65obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
82obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o 66obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
83obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
84obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 67obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
85obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 68obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
86obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 69obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
87obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o 70obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
88obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
89obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o 71obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
90obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o 72obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
91obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
92obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 73obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
93obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o 74obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
94obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
95obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o 75obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
96obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o 76obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
97obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
98 77
99# generic ARP tables 78# generic ARP tables
100obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o 79obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba6..afe3d8f8177d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/capability.h>
16#include <linux/if_arp.h> 17#include <linux/if_arp.h>
17#include <linux/kmod.h> 18#include <linux/kmod.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
@@ -23,6 +24,7 @@
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24#include <asm/semaphore.h> 25#include <asm/semaphore.h>
25 26
27#include <linux/netfilter/x_tables.h>
26#include <linux/netfilter_arp/arp_tables.h> 28#include <linux/netfilter_arp/arp_tables.h>
27 29
28MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
@@ -54,33 +56,9 @@ do { \
54#else 56#else
55#define ARP_NF_ASSERT(x) 57#define ARP_NF_ASSERT(x)
56#endif 58#endif
57#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
58 59
59static DECLARE_MUTEX(arpt_mutex);
60
61#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
62#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
63#include <linux/netfilter_ipv4/listhelp.h> 60#include <linux/netfilter_ipv4/listhelp.h>
64 61
65struct arpt_table_info {
66 unsigned int size;
67 unsigned int number;
68 unsigned int initial_entries;
69 unsigned int hook_entry[NF_ARP_NUMHOOKS];
70 unsigned int underflow[NF_ARP_NUMHOOKS];
71 char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));
72};
73
74static LIST_HEAD(arpt_target);
75static LIST_HEAD(arpt_tables);
76#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
77
78#ifdef CONFIG_SMP
79#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
80#else
81#define TABLE_OFFSET(t,p) 0
82#endif
83
84static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 62static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
85 char *hdr_addr, int len) 63 char *hdr_addr, int len)
86{ 64{
@@ -227,9 +205,9 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
227} 205}
228 206
229static unsigned int arpt_error(struct sk_buff **pskb, 207static unsigned int arpt_error(struct sk_buff **pskb,
230 unsigned int hooknum,
231 const struct net_device *in, 208 const struct net_device *in,
232 const struct net_device *out, 209 const struct net_device *out,
210 unsigned int hooknum,
233 const void *targinfo, 211 const void *targinfo,
234 void *userinfo) 212 void *userinfo)
235{ 213{
@@ -258,6 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
258 struct arpt_entry *e, *back; 236 struct arpt_entry *e, *back;
259 const char *indev, *outdev; 237 const char *indev, *outdev;
260 void *table_base; 238 void *table_base;
239 struct xt_table_info *private = table->private;
261 240
262 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 241 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
263 if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + 242 if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -269,11 +248,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
269 outdev = out ? out->name : nulldevname; 248 outdev = out ? out->name : nulldevname;
270 249
271 read_lock_bh(&table->lock); 250 read_lock_bh(&table->lock);
272 table_base = (void *)table->private->entries 251 table_base = (void *)private->entries[smp_processor_id()];
273 + TABLE_OFFSET(table->private, 252 e = get_entry(table_base, private->hook_entry[hook]);
274 smp_processor_id()); 253 back = get_entry(table_base, private->underflow[hook]);
275 e = get_entry(table_base, table->private->hook_entry[hook]);
276 back = get_entry(table_base, table->private->underflow[hook]);
277 254
278 arp = (*pskb)->nh.arph; 255 arp = (*pskb)->nh.arph;
279 do { 256 do {
@@ -321,8 +298,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
321 * abs. verdicts 298 * abs. verdicts
322 */ 299 */
323 verdict = t->u.kernel.target->target(pskb, 300 verdict = t->u.kernel.target->target(pskb,
324 hook,
325 in, out, 301 in, out,
302 hook,
326 t->data, 303 t->data,
327 userdata); 304 userdata);
328 305
@@ -347,106 +324,6 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
347 return verdict; 324 return verdict;
348} 325}
349 326
350/*
351 * These are weird, but module loading must not be done with mutex
352 * held (since they will register), and we have to have a single
353 * function to use try_then_request_module().
354 */
355
356/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
357static inline struct arpt_table *find_table_lock(const char *name)
358{
359 struct arpt_table *t;
360
361 if (down_interruptible(&arpt_mutex) != 0)
362 return ERR_PTR(-EINTR);
363
364 list_for_each_entry(t, &arpt_tables, list)
365 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
366 return t;
367 up(&arpt_mutex);
368 return NULL;
369}
370
371
372/* Find target, grabs ref. Returns ERR_PTR() on error. */
373static inline struct arpt_target *find_target(const char *name, u8 revision)
374{
375 struct arpt_target *t;
376 int err = 0;
377
378 if (down_interruptible(&arpt_mutex) != 0)
379 return ERR_PTR(-EINTR);
380
381 list_for_each_entry(t, &arpt_target, list) {
382 if (strcmp(t->name, name) == 0) {
383 if (t->revision == revision) {
384 if (try_module_get(t->me)) {
385 up(&arpt_mutex);
386 return t;
387 }
388 } else
389 err = -EPROTOTYPE; /* Found something. */
390 }
391 }
392 up(&arpt_mutex);
393 return ERR_PTR(err);
394}
395
396struct arpt_target *arpt_find_target(const char *name, u8 revision)
397{
398 struct arpt_target *target;
399
400 target = try_then_request_module(find_target(name, revision),
401 "arpt_%s", name);
402 if (IS_ERR(target) || !target)
403 return NULL;
404 return target;
405}
406
407static int target_revfn(const char *name, u8 revision, int *bestp)
408{
409 struct arpt_target *t;
410 int have_rev = 0;
411
412 list_for_each_entry(t, &arpt_target, list) {
413 if (strcmp(t->name, name) == 0) {
414 if (t->revision > *bestp)
415 *bestp = t->revision;
416 if (t->revision == revision)
417 have_rev =1;
418 }
419 }
420 return have_rev;
421}
422
423/* Returns true or false (if no such extension at all) */
424static inline int find_revision(const char *name, u8 revision,
425 int (*revfn)(const char *, u8, int *),
426 int *err)
427{
428 int have_rev, best = -1;
429
430 if (down_interruptible(&arpt_mutex) != 0) {
431 *err = -EINTR;
432 return 1;
433 }
434 have_rev = revfn(name, revision, &best);
435 up(&arpt_mutex);
436
437 /* Nothing at all? Return 0 to try loading module. */
438 if (best == -1) {
439 *err = -ENOENT;
440 return 0;
441 }
442
443 *err = best;
444 if (!have_rev)
445 *err = -EPROTONOSUPPORT;
446 return 1;
447}
448
449
450/* All zeroes == unconditional rule. */ 327/* All zeroes == unconditional rule. */
451static inline int unconditional(const struct arpt_arp *arp) 328static inline int unconditional(const struct arpt_arp *arp)
452{ 329{
@@ -462,7 +339,8 @@ static inline int unconditional(const struct arpt_arp *arp)
462/* Figures out from what hook each rule can be called: returns 0 if 339/* Figures out from what hook each rule can be called: returns 0 if
463 * there are loops. Puts hook bitmask in comefrom. 340 * there are loops. Puts hook bitmask in comefrom.
464 */ 341 */
465static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks) 342static int mark_source_chains(struct xt_table_info *newinfo,
343 unsigned int valid_hooks, void *entry0)
466{ 344{
467 unsigned int hook; 345 unsigned int hook;
468 346
@@ -472,7 +350,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
472 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { 350 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
473 unsigned int pos = newinfo->hook_entry[hook]; 351 unsigned int pos = newinfo->hook_entry[hook];
474 struct arpt_entry *e 352 struct arpt_entry *e
475 = (struct arpt_entry *)(newinfo->entries + pos); 353 = (struct arpt_entry *)(entry0 + pos);
476 354
477 if (!(valid_hooks & (1 << hook))) 355 if (!(valid_hooks & (1 << hook)))
478 continue; 356 continue;
@@ -514,13 +392,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
514 goto next; 392 goto next;
515 393
516 e = (struct arpt_entry *) 394 e = (struct arpt_entry *)
517 (newinfo->entries + pos); 395 (entry0 + pos);
518 } while (oldpos == pos + e->next_offset); 396 } while (oldpos == pos + e->next_offset);
519 397
520 /* Move along one */ 398 /* Move along one */
521 size = e->next_offset; 399 size = e->next_offset;
522 e = (struct arpt_entry *) 400 e = (struct arpt_entry *)
523 (newinfo->entries + pos + size); 401 (entry0 + pos + size);
524 e->counters.pcnt = pos; 402 e->counters.pcnt = pos;
525 pos += size; 403 pos += size;
526 } else { 404 } else {
@@ -537,7 +415,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
537 newpos = pos + e->next_offset; 415 newpos = pos + e->next_offset;
538 } 416 }
539 e = (struct arpt_entry *) 417 e = (struct arpt_entry *)
540 (newinfo->entries + newpos); 418 (entry0 + newpos);
541 e->counters.pcnt = pos; 419 e->counters.pcnt = pos;
542 pos = newpos; 420 pos = newpos;
543 } 421 }
@@ -592,8 +470,8 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
592 } 470 }
593 471
594 t = arpt_get_target(e); 472 t = arpt_get_target(e);
595 target = try_then_request_module(find_target(t->u.user.name, 473 target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
596 t->u.user.revision), 474 t->u.user.revision),
597 "arpt_%s", t->u.user.name); 475 "arpt_%s", t->u.user.name);
598 if (IS_ERR(target) || !target) { 476 if (IS_ERR(target) || !target) {
599 duprintf("check_entry: `%s' not found\n", t->u.user.name); 477 duprintf("check_entry: `%s' not found\n", t->u.user.name);
@@ -627,7 +505,7 @@ out:
627} 505}
628 506
629static inline int check_entry_size_and_hooks(struct arpt_entry *e, 507static inline int check_entry_size_and_hooks(struct arpt_entry *e,
630 struct arpt_table_info *newinfo, 508 struct xt_table_info *newinfo,
631 unsigned char *base, 509 unsigned char *base,
632 unsigned char *limit, 510 unsigned char *limit,
633 const unsigned int *hook_entries, 511 const unsigned int *hook_entries,
@@ -661,7 +539,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
661 < 0 (not ARPT_RETURN). --RR */ 539 < 0 (not ARPT_RETURN). --RR */
662 540
663 /* Clear counters and comefrom */ 541 /* Clear counters and comefrom */
664 e->counters = ((struct arpt_counters) { 0, 0 }); 542 e->counters = ((struct xt_counters) { 0, 0 });
665 e->comefrom = 0; 543 e->comefrom = 0;
666 544
667 (*i)++; 545 (*i)++;
@@ -688,7 +566,8 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
688 */ 566 */
689static int translate_table(const char *name, 567static int translate_table(const char *name,
690 unsigned int valid_hooks, 568 unsigned int valid_hooks,
691 struct arpt_table_info *newinfo, 569 struct xt_table_info *newinfo,
570 void *entry0,
692 unsigned int size, 571 unsigned int size,
693 unsigned int number, 572 unsigned int number,
694 const unsigned int *hook_entries, 573 const unsigned int *hook_entries,
@@ -710,11 +589,11 @@ static int translate_table(const char *name,
710 i = 0; 589 i = 0;
711 590
712 /* Walk through entries, checking offsets. */ 591 /* Walk through entries, checking offsets. */
713 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 592 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
714 check_entry_size_and_hooks, 593 check_entry_size_and_hooks,
715 newinfo, 594 newinfo,
716 newinfo->entries, 595 entry0,
717 newinfo->entries + size, 596 entry0 + size,
718 hook_entries, underflows, &i); 597 hook_entries, underflows, &i);
719 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 598 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
720 if (ret != 0) 599 if (ret != 0)
@@ -743,79 +622,78 @@ static int translate_table(const char *name,
743 } 622 }
744 } 623 }
745 624
746 if (!mark_source_chains(newinfo, valid_hooks)) { 625 if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
747 duprintf("Looping hook\n"); 626 duprintf("Looping hook\n");
748 return -ELOOP; 627 return -ELOOP;
749 } 628 }
750 629
751 /* Finally, each sanity check must pass */ 630 /* Finally, each sanity check must pass */
752 i = 0; 631 i = 0;
753 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 632 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
754 check_entry, name, size, &i); 633 check_entry, name, size, &i);
755 634
756 if (ret != 0) { 635 if (ret != 0) {
757 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 636 ARPT_ENTRY_ITERATE(entry0, newinfo->size,
758 cleanup_entry, &i); 637 cleanup_entry, &i);
759 return ret; 638 return ret;
760 } 639 }
761 640
762 /* And one copy for every other CPU */ 641 /* And one copy for every other CPU */
763 for_each_cpu(i) { 642 for_each_cpu(i) {
764 if (i == 0) 643 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
765 continue; 644 memcpy(newinfo->entries[i], entry0, newinfo->size);
766 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
767 newinfo->entries,
768 SMP_ALIGN(newinfo->size));
769 } 645 }
770 646
771 return ret; 647 return ret;
772} 648}
773 649
774static struct arpt_table_info *replace_table(struct arpt_table *table, 650/* Gets counters. */
775 unsigned int num_counters, 651static inline int add_entry_to_counter(const struct arpt_entry *e,
776 struct arpt_table_info *newinfo, 652 struct xt_counters total[],
777 int *error) 653 unsigned int *i)
778{ 654{
779 struct arpt_table_info *oldinfo; 655 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
780
781 /* Do the substitution. */
782 write_lock_bh(&table->lock);
783 /* Check inside lock: is the old number correct? */
784 if (num_counters != table->private->number) {
785 duprintf("num_counters != table->private->number (%u/%u)\n",
786 num_counters, table->private->number);
787 write_unlock_bh(&table->lock);
788 *error = -EAGAIN;
789 return NULL;
790 }
791 oldinfo = table->private;
792 table->private = newinfo;
793 newinfo->initial_entries = oldinfo->initial_entries;
794 write_unlock_bh(&table->lock);
795 656
796 return oldinfo; 657 (*i)++;
658 return 0;
797} 659}
798 660
799/* Gets counters. */ 661static inline int set_entry_to_counter(const struct arpt_entry *e,
800static inline int add_entry_to_counter(const struct arpt_entry *e, 662 struct xt_counters total[],
801 struct arpt_counters total[],
802 unsigned int *i) 663 unsigned int *i)
803{ 664{
804 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); 665 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
805 666
806 (*i)++; 667 (*i)++;
807 return 0; 668 return 0;
808} 669}
809 670
810static void get_counters(const struct arpt_table_info *t, 671static void get_counters(const struct xt_table_info *t,
811 struct arpt_counters counters[]) 672 struct xt_counters counters[])
812{ 673{
813 unsigned int cpu; 674 unsigned int cpu;
814 unsigned int i; 675 unsigned int i;
676 unsigned int curcpu;
677
678 /* Instead of clearing (by a previous call to memset())
679 * the counters and using adds, we set the counters
680 * with data used by 'current' CPU
681 * We dont care about preemption here.
682 */
683 curcpu = raw_smp_processor_id();
684
685 i = 0;
686 ARPT_ENTRY_ITERATE(t->entries[curcpu],
687 t->size,
688 set_entry_to_counter,
689 counters,
690 &i);
815 691
816 for_each_cpu(cpu) { 692 for_each_cpu(cpu) {
693 if (cpu == curcpu)
694 continue;
817 i = 0; 695 i = 0;
818 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 696 ARPT_ENTRY_ITERATE(t->entries[cpu],
819 t->size, 697 t->size,
820 add_entry_to_counter, 698 add_entry_to_counter,
821 counters, 699 counters,
@@ -829,27 +707,29 @@ static int copy_entries_to_user(unsigned int total_size,
829{ 707{
830 unsigned int off, num, countersize; 708 unsigned int off, num, countersize;
831 struct arpt_entry *e; 709 struct arpt_entry *e;
832 struct arpt_counters *counters; 710 struct xt_counters *counters;
711 struct xt_table_info *private = table->private;
833 int ret = 0; 712 int ret = 0;
713 void *loc_cpu_entry;
834 714
835 /* We need atomic snapshot of counters: rest doesn't change 715 /* We need atomic snapshot of counters: rest doesn't change
836 * (other than comefrom, which userspace doesn't care 716 * (other than comefrom, which userspace doesn't care
837 * about). 717 * about).
838 */ 718 */
839 countersize = sizeof(struct arpt_counters) * table->private->number; 719 countersize = sizeof(struct xt_counters) * private->number;
840 counters = vmalloc(countersize); 720 counters = vmalloc_node(countersize, numa_node_id());
841 721
842 if (counters == NULL) 722 if (counters == NULL)
843 return -ENOMEM; 723 return -ENOMEM;
844 724
845 /* First, sum counters... */ 725 /* First, sum counters... */
846 memset(counters, 0, countersize);
847 write_lock_bh(&table->lock); 726 write_lock_bh(&table->lock);
848 get_counters(table->private, counters); 727 get_counters(private, counters);
849 write_unlock_bh(&table->lock); 728 write_unlock_bh(&table->lock);
850 729
851 /* ... then copy entire thing from CPU 0... */ 730 loc_cpu_entry = private->entries[raw_smp_processor_id()];
852 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 731 /* ... then copy entire thing ... */
732 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
853 ret = -EFAULT; 733 ret = -EFAULT;
854 goto free_counters; 734 goto free_counters;
855 } 735 }
@@ -859,7 +739,7 @@ static int copy_entries_to_user(unsigned int total_size,
859 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 739 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
860 struct arpt_entry_target *t; 740 struct arpt_entry_target *t;
861 741
862 e = (struct arpt_entry *)(table->private->entries + off); 742 e = (struct arpt_entry *)(loc_cpu_entry + off);
863 if (copy_to_user(userptr + off 743 if (copy_to_user(userptr + off
864 + offsetof(struct arpt_entry, counters), 744 + offsetof(struct arpt_entry, counters),
865 &counters[num], 745 &counters[num],
@@ -890,21 +770,21 @@ static int get_entries(const struct arpt_get_entries *entries,
890 int ret; 770 int ret;
891 struct arpt_table *t; 771 struct arpt_table *t;
892 772
893 t = find_table_lock(entries->name); 773 t = xt_find_table_lock(NF_ARP, entries->name);
894 if (t || !IS_ERR(t)) { 774 if (t || !IS_ERR(t)) {
775 struct xt_table_info *private = t->private;
895 duprintf("t->private->number = %u\n", 776 duprintf("t->private->number = %u\n",
896 t->private->number); 777 private->number);
897 if (entries->size == t->private->size) 778 if (entries->size == private->size)
898 ret = copy_entries_to_user(t->private->size, 779 ret = copy_entries_to_user(private->size,
899 t, uptr->entrytable); 780 t, uptr->entrytable);
900 else { 781 else {
901 duprintf("get_entries: I've got %u not %u!\n", 782 duprintf("get_entries: I've got %u not %u!\n",
902 t->private->size, 783 private->size, entries->size);
903 entries->size);
904 ret = -EINVAL; 784 ret = -EINVAL;
905 } 785 }
906 module_put(t->me); 786 module_put(t->me);
907 up(&arpt_mutex); 787 xt_table_unlock(t);
908 } else 788 } else
909 ret = t ? PTR_ERR(t) : -ENOENT; 789 ret = t ? PTR_ERR(t) : -ENOENT;
910 790
@@ -916,8 +796,9 @@ static int do_replace(void __user *user, unsigned int len)
916 int ret; 796 int ret;
917 struct arpt_replace tmp; 797 struct arpt_replace tmp;
918 struct arpt_table *t; 798 struct arpt_table *t;
919 struct arpt_table_info *newinfo, *oldinfo; 799 struct xt_table_info *newinfo, *oldinfo;
920 struct arpt_counters *counters; 800 struct xt_counters *counters;
801 void *loc_cpu_entry, *loc_cpu_old_entry;
921 802
922 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 803 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
923 return -EFAULT; 804 return -EFAULT;
@@ -926,38 +807,33 @@ static int do_replace(void __user *user, unsigned int len)
926 if (len != sizeof(tmp) + tmp.size) 807 if (len != sizeof(tmp) + tmp.size)
927 return -ENOPROTOOPT; 808 return -ENOPROTOOPT;
928 809
929 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 810 newinfo = xt_alloc_table_info(tmp.size);
930 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
931 return -ENOMEM;
932
933 newinfo = vmalloc(sizeof(struct arpt_table_info)
934 + SMP_ALIGN(tmp.size) *
935 (highest_possible_processor_id()+1));
936 if (!newinfo) 811 if (!newinfo)
937 return -ENOMEM; 812 return -ENOMEM;
938 813
939 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 814 /* choose the copy that is on our node/cpu */
815 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
816 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
940 tmp.size) != 0) { 817 tmp.size) != 0) {
941 ret = -EFAULT; 818 ret = -EFAULT;
942 goto free_newinfo; 819 goto free_newinfo;
943 } 820 }
944 821
945 counters = vmalloc(tmp.num_counters * sizeof(struct arpt_counters)); 822 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
946 if (!counters) { 823 if (!counters) {
947 ret = -ENOMEM; 824 ret = -ENOMEM;
948 goto free_newinfo; 825 goto free_newinfo;
949 } 826 }
950 memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
951 827
952 ret = translate_table(tmp.name, tmp.valid_hooks, 828 ret = translate_table(tmp.name, tmp.valid_hooks,
953 newinfo, tmp.size, tmp.num_entries, 829 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
954 tmp.hook_entry, tmp.underflow); 830 tmp.hook_entry, tmp.underflow);
955 if (ret != 0) 831 if (ret != 0)
956 goto free_newinfo_counters; 832 goto free_newinfo_counters;
957 833
958 duprintf("arp_tables: Translated table\n"); 834 duprintf("arp_tables: Translated table\n");
959 835
960 t = try_then_request_module(find_table_lock(tmp.name), 836 t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
961 "arptable_%s", tmp.name); 837 "arptable_%s", tmp.name);
962 if (!t || IS_ERR(t)) { 838 if (!t || IS_ERR(t)) {
963 ret = t ? PTR_ERR(t) : -ENOENT; 839 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -972,7 +848,7 @@ static int do_replace(void __user *user, unsigned int len)
972 goto put_module; 848 goto put_module;
973 } 849 }
974 850
975 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); 851 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
976 if (!oldinfo) 852 if (!oldinfo)
977 goto put_module; 853 goto put_module;
978 854
@@ -989,24 +865,26 @@ static int do_replace(void __user *user, unsigned int len)
989 /* Get the old counters. */ 865 /* Get the old counters. */
990 get_counters(oldinfo, counters); 866 get_counters(oldinfo, counters);
991 /* Decrease module usage counts and free resource */ 867 /* Decrease module usage counts and free resource */
992 ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 868 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
993 vfree(oldinfo); 869 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
870
871 xt_free_table_info(oldinfo);
994 if (copy_to_user(tmp.counters, counters, 872 if (copy_to_user(tmp.counters, counters,
995 sizeof(struct arpt_counters) * tmp.num_counters) != 0) 873 sizeof(struct xt_counters) * tmp.num_counters) != 0)
996 ret = -EFAULT; 874 ret = -EFAULT;
997 vfree(counters); 875 vfree(counters);
998 up(&arpt_mutex); 876 xt_table_unlock(t);
999 return ret; 877 return ret;
1000 878
1001 put_module: 879 put_module:
1002 module_put(t->me); 880 module_put(t->me);
1003 up(&arpt_mutex); 881 xt_table_unlock(t);
1004 free_newinfo_counters_untrans: 882 free_newinfo_counters_untrans:
1005 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); 883 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1006 free_newinfo_counters: 884 free_newinfo_counters:
1007 vfree(counters); 885 vfree(counters);
1008 free_newinfo: 886 free_newinfo:
1009 vfree(newinfo); 887 xt_free_table_info(newinfo);
1010 return ret; 888 return ret;
1011} 889}
1012 890
@@ -1014,7 +892,7 @@ static int do_replace(void __user *user, unsigned int len)
1014 * and everything is OK. 892 * and everything is OK.
1015 */ 893 */
1016static inline int add_counter_to_entry(struct arpt_entry *e, 894static inline int add_counter_to_entry(struct arpt_entry *e,
1017 const struct arpt_counters addme[], 895 const struct xt_counters addme[],
1018 unsigned int *i) 896 unsigned int *i)
1019{ 897{
1020 898
@@ -1027,14 +905,16 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
1027static int do_add_counters(void __user *user, unsigned int len) 905static int do_add_counters(void __user *user, unsigned int len)
1028{ 906{
1029 unsigned int i; 907 unsigned int i;
1030 struct arpt_counters_info tmp, *paddc; 908 struct xt_counters_info tmp, *paddc;
1031 struct arpt_table *t; 909 struct arpt_table *t;
910 struct xt_table_info *private;
1032 int ret = 0; 911 int ret = 0;
912 void *loc_cpu_entry;
1033 913
1034 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 914 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1035 return -EFAULT; 915 return -EFAULT;
1036 916
1037 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct arpt_counters)) 917 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1038 return -EINVAL; 918 return -EINVAL;
1039 919
1040 paddc = vmalloc(len); 920 paddc = vmalloc(len);
@@ -1046,27 +926,30 @@ static int do_add_counters(void __user *user, unsigned int len)
1046 goto free; 926 goto free;
1047 } 927 }
1048 928
1049 t = find_table_lock(tmp.name); 929 t = xt_find_table_lock(NF_ARP, tmp.name);
1050 if (!t || IS_ERR(t)) { 930 if (!t || IS_ERR(t)) {
1051 ret = t ? PTR_ERR(t) : -ENOENT; 931 ret = t ? PTR_ERR(t) : -ENOENT;
1052 goto free; 932 goto free;
1053 } 933 }
1054 934
1055 write_lock_bh(&t->lock); 935 write_lock_bh(&t->lock);
1056 if (t->private->number != paddc->num_counters) { 936 private = t->private;
937 if (private->number != paddc->num_counters) {
1057 ret = -EINVAL; 938 ret = -EINVAL;
1058 goto unlock_up_free; 939 goto unlock_up_free;
1059 } 940 }
1060 941
1061 i = 0; 942 i = 0;
1062 ARPT_ENTRY_ITERATE(t->private->entries, 943 /* Choose the copy that is on our node */
1063 t->private->size, 944 loc_cpu_entry = private->entries[smp_processor_id()];
945 ARPT_ENTRY_ITERATE(loc_cpu_entry,
946 private->size,
1064 add_counter_to_entry, 947 add_counter_to_entry,
1065 paddc->counters, 948 paddc->counters,
1066 &i); 949 &i);
1067 unlock_up_free: 950 unlock_up_free:
1068 write_unlock_bh(&t->lock); 951 write_unlock_bh(&t->lock);
1069 up(&arpt_mutex); 952 xt_table_unlock(t);
1070 module_put(t->me); 953 module_put(t->me);
1071 free: 954 free:
1072 vfree(paddc); 955 vfree(paddc);
@@ -1123,25 +1006,26 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1123 } 1006 }
1124 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 1007 name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
1125 1008
1126 t = try_then_request_module(find_table_lock(name), 1009 t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
1127 "arptable_%s", name); 1010 "arptable_%s", name);
1128 if (t && !IS_ERR(t)) { 1011 if (t && !IS_ERR(t)) {
1129 struct arpt_getinfo info; 1012 struct arpt_getinfo info;
1013 struct xt_table_info *private = t->private;
1130 1014
1131 info.valid_hooks = t->valid_hooks; 1015 info.valid_hooks = t->valid_hooks;
1132 memcpy(info.hook_entry, t->private->hook_entry, 1016 memcpy(info.hook_entry, private->hook_entry,
1133 sizeof(info.hook_entry)); 1017 sizeof(info.hook_entry));
1134 memcpy(info.underflow, t->private->underflow, 1018 memcpy(info.underflow, private->underflow,
1135 sizeof(info.underflow)); 1019 sizeof(info.underflow));
1136 info.num_entries = t->private->number; 1020 info.num_entries = private->number;
1137 info.size = t->private->size; 1021 info.size = private->size;
1138 strcpy(info.name, name); 1022 strcpy(info.name, name);
1139 1023
1140 if (copy_to_user(user, &info, *len) != 0) 1024 if (copy_to_user(user, &info, *len) != 0)
1141 ret = -EFAULT; 1025 ret = -EFAULT;
1142 else 1026 else
1143 ret = 0; 1027 ret = 0;
1144 up(&arpt_mutex); 1028 xt_table_unlock(t);
1145 module_put(t->me); 1029 module_put(t->me);
1146 } else 1030 } else
1147 ret = t ? PTR_ERR(t) : -ENOENT; 1031 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1166,7 +1050,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1166 } 1050 }
1167 1051
1168 case ARPT_SO_GET_REVISION_TARGET: { 1052 case ARPT_SO_GET_REVISION_TARGET: {
1169 struct arpt_get_revision rev; 1053 struct xt_get_revision rev;
1170 1054
1171 if (*len != sizeof(rev)) { 1055 if (*len != sizeof(rev)) {
1172 ret = -EINVAL; 1056 ret = -EINVAL;
@@ -1177,8 +1061,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1177 break; 1061 break;
1178 } 1062 }
1179 1063
1180 try_then_request_module(find_revision(rev.name, rev.revision, 1064 try_then_request_module(xt_find_revision(NF_ARP, rev.name,
1181 target_revfn, &ret), 1065 rev.revision, 1, &ret),
1182 "arpt_%s", rev.name); 1066 "arpt_%s", rev.name);
1183 break; 1067 break;
1184 } 1068 }
@@ -1191,101 +1075,57 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1191 return ret; 1075 return ret;
1192} 1076}
1193 1077
1194/* Registration hooks for targets. */
1195int arpt_register_target(struct arpt_target *target)
1196{
1197 int ret;
1198
1199 ret = down_interruptible(&arpt_mutex);
1200 if (ret != 0)
1201 return ret;
1202
1203 list_add(&target->list, &arpt_target);
1204 up(&arpt_mutex);
1205
1206 return ret;
1207}
1208
1209void arpt_unregister_target(struct arpt_target *target)
1210{
1211 down(&arpt_mutex);
1212 LIST_DELETE(&arpt_target, target);
1213 up(&arpt_mutex);
1214}
1215
1216int arpt_register_table(struct arpt_table *table, 1078int arpt_register_table(struct arpt_table *table,
1217 const struct arpt_replace *repl) 1079 const struct arpt_replace *repl)
1218{ 1080{
1219 int ret; 1081 int ret;
1220 struct arpt_table_info *newinfo; 1082 struct xt_table_info *newinfo;
1221 static struct arpt_table_info bootstrap 1083 static struct xt_table_info bootstrap
1222 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1084 = { 0, 0, 0, { 0 }, { 0 }, { } };
1085 void *loc_cpu_entry;
1223 1086
1224 newinfo = vmalloc(sizeof(struct arpt_table_info) 1087 newinfo = xt_alloc_table_info(repl->size);
1225 + SMP_ALIGN(repl->size) *
1226 (highest_possible_processor_id()+1));
1227 if (!newinfo) { 1088 if (!newinfo) {
1228 ret = -ENOMEM; 1089 ret = -ENOMEM;
1229 return ret; 1090 return ret;
1230 } 1091 }
1231 memcpy(newinfo->entries, repl->entries, repl->size); 1092
1093 /* choose the copy on our node/cpu */
1094 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1095 memcpy(loc_cpu_entry, repl->entries, repl->size);
1232 1096
1233 ret = translate_table(table->name, table->valid_hooks, 1097 ret = translate_table(table->name, table->valid_hooks,
1234 newinfo, repl->size, 1098 newinfo, loc_cpu_entry, repl->size,
1235 repl->num_entries, 1099 repl->num_entries,
1236 repl->hook_entry, 1100 repl->hook_entry,
1237 repl->underflow); 1101 repl->underflow);
1102
1238 duprintf("arpt_register_table: translate table gives %d\n", ret); 1103 duprintf("arpt_register_table: translate table gives %d\n", ret);
1239 if (ret != 0) { 1104 if (ret != 0) {
1240 vfree(newinfo); 1105 xt_free_table_info(newinfo);
1241 return ret; 1106 return ret;
1242 } 1107 }
1243 1108
1244 ret = down_interruptible(&arpt_mutex); 1109 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1245 if (ret != 0) { 1110 xt_free_table_info(newinfo);
1246 vfree(newinfo);
1247 return ret; 1111 return ret;
1248 } 1112 }
1249 1113
1250 /* Don't autoload: we'd eat our tail... */ 1114 return 0;
1251 if (list_named_find(&arpt_tables, table->name)) {
1252 ret = -EEXIST;
1253 goto free_unlock;
1254 }
1255
1256 /* Simplifies replace_table code. */
1257 table->private = &bootstrap;
1258 if (!replace_table(table, 0, newinfo, &ret))
1259 goto free_unlock;
1260
1261 duprintf("table->private->number = %u\n",
1262 table->private->number);
1263
1264 /* save number of initial entries */
1265 table->private->initial_entries = table->private->number;
1266
1267 rwlock_init(&table->lock);
1268 list_prepend(&arpt_tables, table);
1269
1270 unlock:
1271 up(&arpt_mutex);
1272 return ret;
1273
1274 free_unlock:
1275 vfree(newinfo);
1276 goto unlock;
1277} 1115}
1278 1116
1279void arpt_unregister_table(struct arpt_table *table) 1117void arpt_unregister_table(struct arpt_table *table)
1280{ 1118{
1281 down(&arpt_mutex); 1119 struct xt_table_info *private;
1282 LIST_DELETE(&arpt_tables, table); 1120 void *loc_cpu_entry;
1283 up(&arpt_mutex); 1121
1122 private = xt_unregister_table(table);
1284 1123
1285 /* Decrease module usage counts and free resources */ 1124 /* Decrease module usage counts and free resources */
1286 ARPT_ENTRY_ITERATE(table->private->entries, table->private->size, 1125 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1126 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
1287 cleanup_entry, NULL); 1127 cleanup_entry, NULL);
1288 vfree(table->private); 1128 xt_free_table_info(private);
1289} 1129}
1290 1130
1291/* The built-in targets: standard (NULL) and error. */ 1131/* The built-in targets: standard (NULL) and error. */
@@ -1308,52 +1148,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
1308 .get = do_arpt_get_ctl, 1148 .get = do_arpt_get_ctl,
1309}; 1149};
1310 1150
1311#ifdef CONFIG_PROC_FS
1312static inline int print_name(const struct arpt_table *t,
1313 off_t start_offset, char *buffer, int length,
1314 off_t *pos, unsigned int *count)
1315{
1316 if ((*count)++ >= start_offset) {
1317 unsigned int namelen;
1318
1319 namelen = sprintf(buffer + *pos, "%s\n", t->name);
1320 if (*pos + namelen > length) {
1321 /* Stop iterating */
1322 return 1;
1323 }
1324 *pos += namelen;
1325 }
1326 return 0;
1327}
1328
1329static int arpt_get_tables(char *buffer, char **start, off_t offset, int length)
1330{
1331 off_t pos = 0;
1332 unsigned int count = 0;
1333
1334 if (down_interruptible(&arpt_mutex) != 0)
1335 return 0;
1336
1337 LIST_FIND(&arpt_tables, print_name, struct arpt_table *,
1338 offset, buffer, length, &pos, &count);
1339
1340 up(&arpt_mutex);
1341
1342 /* `start' hack - see fs/proc/generic.c line ~105 */
1343 *start=(char *)((unsigned long)count-offset);
1344 return pos;
1345}
1346#endif /*CONFIG_PROC_FS*/
1347
1348static int __init init(void) 1151static int __init init(void)
1349{ 1152{
1350 int ret; 1153 int ret;
1351 1154
1155 xt_proto_init(NF_ARP);
1156
1352 /* Noone else will be downing sem now, so we won't sleep */ 1157 /* Noone else will be downing sem now, so we won't sleep */
1353 down(&arpt_mutex); 1158 xt_register_target(NF_ARP, &arpt_standard_target);
1354 list_append(&arpt_target, &arpt_standard_target); 1159 xt_register_target(NF_ARP, &arpt_error_target);
1355 list_append(&arpt_target, &arpt_error_target);
1356 up(&arpt_mutex);
1357 1160
1358 /* Register setsockopt */ 1161 /* Register setsockopt */
1359 ret = nf_register_sockopt(&arpt_sockopts); 1162 ret = nf_register_sockopt(&arpt_sockopts);
@@ -1362,19 +1165,6 @@ static int __init init(void)
1362 return ret; 1165 return ret;
1363 } 1166 }
1364 1167
1365#ifdef CONFIG_PROC_FS
1366 {
1367 struct proc_dir_entry *proc;
1368
1369 proc = proc_net_create("arp_tables_names", 0, arpt_get_tables);
1370 if (!proc) {
1371 nf_unregister_sockopt(&arpt_sockopts);
1372 return -ENOMEM;
1373 }
1374 proc->owner = THIS_MODULE;
1375 }
1376#endif
1377
1378 printk("arp_tables: (C) 2002 David S. Miller\n"); 1168 printk("arp_tables: (C) 2002 David S. Miller\n");
1379 return 0; 1169 return 0;
1380} 1170}
@@ -1382,16 +1172,12 @@ static int __init init(void)
1382static void __exit fini(void) 1172static void __exit fini(void)
1383{ 1173{
1384 nf_unregister_sockopt(&arpt_sockopts); 1174 nf_unregister_sockopt(&arpt_sockopts);
1385#ifdef CONFIG_PROC_FS 1175 xt_proto_fini(NF_ARP);
1386 proc_net_remove("arp_tables_names");
1387#endif
1388} 1176}
1389 1177
1390EXPORT_SYMBOL(arpt_register_table); 1178EXPORT_SYMBOL(arpt_register_table);
1391EXPORT_SYMBOL(arpt_unregister_table); 1179EXPORT_SYMBOL(arpt_unregister_table);
1392EXPORT_SYMBOL(arpt_do_table); 1180EXPORT_SYMBOL(arpt_do_table);
1393EXPORT_SYMBOL(arpt_register_target);
1394EXPORT_SYMBOL(arpt_unregister_target);
1395 1181
1396module_init(init); 1182module_init(init);
1397module_exit(fini); 1183module_exit(fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3e592ec86482..c97650a16a5b 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -8,8 +8,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
8MODULE_DESCRIPTION("arptables arp payload mangle target"); 8MODULE_DESCRIPTION("arptables arp payload mangle target");
9 9
10static unsigned int 10static unsigned int
11target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, 11target(struct sk_buff **pskb, const struct net_device *in,
12 const struct net_device *out, const void *targinfo, void *userinfo) 12 const struct net_device *out, unsigned int hooknum, const void *targinfo,
13 void *userinfo)
13{ 14{
14 const struct arpt_mangle *mangle = targinfo; 15 const struct arpt_mangle *mangle = targinfo;
15 struct arphdr *arp; 16 struct arphdr *arp;
@@ -64,7 +65,7 @@ target(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in,
64} 65}
65 66
66static int 67static int
67checkentry(const char *tablename, const struct arpt_entry *e, void *targinfo, 68checkentry(const char *tablename, const void *e, void *targinfo,
68 unsigned int targinfosize, unsigned int hook_mask) 69 unsigned int targinfosize, unsigned int hook_mask)
69{ 70{
70 const struct arpt_mangle *mangle = targinfo; 71 const struct arpt_mangle *mangle = targinfo;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 0d759f5a4ef0..f6ab45f48681 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -145,6 +145,7 @@ static struct arpt_table packet_filter = {
145 .lock = RW_LOCK_UNLOCKED, 145 .lock = RW_LOCK_UNLOCKED,
146 .private = NULL, 146 .private = NULL,
147 .me = THIS_MODULE, 147 .me = THIS_MODULE,
148 .af = NF_ARP,
148}; 149};
149 150
150/* The work comes in here from netfilter.c */ 151/* The work comes in here from netfilter.c */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f5..84e4f79b7ffa 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
18 * 18 *
19 */ 19 */
20 20
21#include <linux/in.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/netfilter.h> 24#include <linux/netfilter.h>
24#include <linux/ip.h> 25#include <linux/ip.h>
25#include <linux/moduleparam.h> 26#include <linux/moduleparam.h>
27#include <linux/udp.h>
26#include <net/checksum.h> 28#include <net/checksum.h>
27#include <net/udp.h> 29#include <net/udp.h>
28 30
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
34MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); 36MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
35MODULE_DESCRIPTION("Amanda connection tracking module"); 37MODULE_DESCRIPTION("Amanda connection tracking module");
36MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
37module_param(master_timeout, int, 0600); 39module_param(master_timeout, uint, 0600);
38MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); 40MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
39 41
40static const char *conns[] = { "DATA ", "MESG ", "INDEX " }; 42static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda60..e627e5856172 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
34module_param_array(ports, ushort, &ports_c, 0400); 34module_param_array(ports, ushort, &ports_c, 0400);
35 35
36static int loose; 36static int loose;
37module_param(loose, int, 0600); 37module_param(loose, bool, 0600);
38 38
39unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, 39unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
40 enum ip_conntrack_info ctinfo, 40 enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 4108a5e12b3c..d716bba798f2 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -762,7 +762,7 @@ static struct ip_conntrack_helper pptp = {
762 .help = conntrack_pptp_help 762 .help = conntrack_pptp_help
763}; 763};
764 764
765extern void __exit ip_ct_proto_gre_fini(void); 765extern void ip_ct_proto_gre_fini(void);
766extern int __init ip_ct_proto_gre_init(void); 766extern int __init ip_ct_proto_gre_init(void);
767 767
768/* ip_conntrack_pptp initialization */ 768/* ip_conntrack_pptp initialization */
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d1..c51a2cf71b4b 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
36#define MAX_PORTS 8 36#define MAX_PORTS 8
37static unsigned short ports[MAX_PORTS]; 37static unsigned short ports[MAX_PORTS];
38static int ports_c; 38static int ports_c;
39static int max_dcc_channels = 8; 39static unsigned int max_dcc_channels = 8;
40static unsigned int dcc_timeout = 300; 40static unsigned int dcc_timeout = 300;
41/* This is slow, but it's simple. --RR */ 41/* This is slow, but it's simple. --RR */
42static char *irc_buffer; 42static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
54MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");
55module_param_array(ports, ushort, &ports_c, 0400); 55module_param_array(ports, ushort, &ports_c, 0400);
56MODULE_PARM_DESC(ports, "port numbers of IRC servers"); 56MODULE_PARM_DESC(ports, "port numbers of IRC servers");
57module_param(max_dcc_channels, int, 0400); 57module_param(max_dcc_channels, uint, 0400);
58MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); 58MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
59module_param(dcc_timeout, int, 0400); 59module_param(dcc_timeout, uint, 0400);
60MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); 60MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
61 61
62static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; 62static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
254 printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n"); 254 printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
255 return -EBUSY; 255 return -EBUSY;
256 } 256 }
257 if (dcc_timeout < 0) {
258 printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
259 return -EBUSY;
260 }
261 257
262 irc_buffer = kmalloc(65536, GFP_KERNEL); 258 irc_buffer = kmalloc(65536, GFP_KERNEL);
263 if (!irc_buffer) 259 if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249f..4e68e16a2612 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
37MODULE_LICENSE("GPL"); 37MODULE_LICENSE("GPL");
38 38
39static unsigned int timeout = 3; 39static unsigned int timeout = 3;
40module_param(timeout, int, 0600); 40module_param(timeout, uint, 0400);
41MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 41MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
42 42
43static int help(struct sk_buff **pskb, 43static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 91fe8f2e38ff..c9ebbe0d2d9c 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
79 const struct ip_conntrack_tuple *tuple) 79 const struct ip_conntrack_tuple *tuple)
80{ 80{
81 struct nfattr *nest_parms; 81 struct nfattr *nest_parms;
82 int ret;
82 83
83 nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); 84 nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
84 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); 85 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
86 NFA_NEST_END(skb, nest_parms); 87 NFA_NEST_END(skb, nest_parms);
87 88
88 nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); 89 nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
89 ctnetlink_dump_tuples_proto(skb, tuple); 90 ret = ctnetlink_dump_tuples_proto(skb, tuple);
90 NFA_NEST_END(skb, nest_parms); 91 NFA_NEST_END(skb, nest_parms);
91 92
92 return 0; 93 return ret;
93 94
94nfattr_failure: 95nfattr_failure:
95 return -1; 96 return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
160 return 0; 161 return 0;
161 162
162 nest_helper = NFA_NEST(skb, CTA_HELP); 163 nest_helper = NFA_NEST(skb, CTA_HELP);
163 NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); 164 NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
164 165
165 if (ct->helper->to_nfattr) 166 if (ct->helper->to_nfattr)
166 ct->helper->to_nfattr(skb, ct); 167 ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
229static inline int 230static inline int
230ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) 231ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
231{ 232{
232 unsigned int use = htonl(atomic_read(&ct->ct_general.use)); 233 u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
233 234
234 NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); 235 NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
235 return 0; 236 return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
311 if (events & IPCT_DESTROY) { 312 if (events & IPCT_DESTROY) {
312 type = IPCTNL_MSG_CT_DELETE; 313 type = IPCTNL_MSG_CT_DELETE;
313 group = NFNLGRP_CONNTRACK_DESTROY; 314 group = NFNLGRP_CONNTRACK_DESTROY;
314 goto alloc_skb; 315 } else if (events & (IPCT_NEW | IPCT_RELATED)) {
315 }
316 if (events & (IPCT_NEW | IPCT_RELATED)) {
317 type = IPCTNL_MSG_CT_NEW; 316 type = IPCTNL_MSG_CT_NEW;
318 flags = NLM_F_CREATE|NLM_F_EXCL; 317 flags = NLM_F_CREATE|NLM_F_EXCL;
319 /* dump everything */ 318 /* dump everything */
320 events = ~0UL; 319 events = ~0UL;
321 group = NFNLGRP_CONNTRACK_NEW; 320 group = NFNLGRP_CONNTRACK_NEW;
322 goto alloc_skb; 321 } else if (events & (IPCT_STATUS |
323 }
324 if (events & (IPCT_STATUS |
325 IPCT_PROTOINFO | 322 IPCT_PROTOINFO |
326 IPCT_HELPER | 323 IPCT_HELPER |
327 IPCT_HELPINFO | 324 IPCT_HELPINFO |
328 IPCT_NATINFO)) { 325 IPCT_NATINFO)) {
329 type = IPCTNL_MSG_CT_NEW; 326 type = IPCTNL_MSG_CT_NEW;
330 group = NFNLGRP_CONNTRACK_UPDATE; 327 group = NFNLGRP_CONNTRACK_UPDATE;
331 goto alloc_skb; 328 } else
332 } 329 return NOTIFY_DONE;
333 330
334 return NOTIFY_DONE;
335
336alloc_skb:
337 /* FIXME: Check if there are any listeners before, don't hurt performance */ 331 /* FIXME: Check if there are any listeners before, don't hurt performance */
338 332
339 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -1037,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
1037 return err; 1031 return err;
1038 } 1032 }
1039 1033
1034#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1035 if (cda[CTA_MARK-1])
1036 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1037#endif
1038
1040 ct->helper = ip_conntrack_helper_find_get(rtuple); 1039 ct->helper = ip_conntrack_helper_find_get(rtuple);
1041 1040
1042 add_timer(&ct->timeout); 1041 add_timer(&ct->timeout);
@@ -1045,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
1045 if (ct->helper) 1044 if (ct->helper)
1046 ip_conntrack_helper_put(ct->helper); 1045 ip_conntrack_helper_put(ct->helper);
1047 1046
1048#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1049 if (cda[CTA_MARK-1])
1050 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1051#endif
1052
1053 DEBUGP("conntrack with id %u inserted\n", ct->id); 1047 DEBUGP("conntrack with id %u inserted\n", ct->id);
1054 return 0; 1048 return 0;
1055 1049
@@ -1209,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1209 unsigned int type; 1203 unsigned int type;
1210 unsigned char *b; 1204 unsigned char *b;
1211 int flags = 0; 1205 int flags = 0;
1212 u16 proto;
1213 1206
1214 if (events & IPEXP_NEW) { 1207 if (events & IPEXP_NEW) {
1215 type = IPCTNL_MSG_EXP_NEW; 1208 type = IPCTNL_MSG_EXP_NEW;
@@ -1236,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1236 goto nfattr_failure; 1229 goto nfattr_failure;
1237 1230
1238 nlh->nlmsg_len = skb->tail - b; 1231 nlh->nlmsg_len = skb->tail - b;
1239 proto = exp->tuple.dst.protonum;
1240 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); 1232 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
1241 return NOTIFY_DONE; 1233 return NOTIFY_DONE;
1242 1234
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 88c3712bd251..f891308b5e4c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
14 14
15unsigned long ip_ct_generic_timeout = 600*HZ; 15unsigned int ip_ct_generic_timeout = 600*HZ;
16 16
17static int generic_pkt_to_tuple(const struct sk_buff *skb, 17static int generic_pkt_to_tuple(const struct sk_buff *skb,
18 unsigned int dataoff, 18 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377a..56794797d55b 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,8 @@
31#include <linux/ip.h> 31#include <linux/ip.h>
32#include <linux/in.h> 32#include <linux/in.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/seq_file.h>
35#include <linux/interrupt.h>
34 36
35static DEFINE_RWLOCK(ip_ct_gre_lock); 37static DEFINE_RWLOCK(ip_ct_gre_lock);
36#define ASSERT_READ_LOCK(x) 38#define ASSERT_READ_LOCK(x)
@@ -308,7 +310,10 @@ int __init ip_ct_proto_gre_init(void)
308 return ip_conntrack_protocol_register(&gre); 310 return ip_conntrack_protocol_register(&gre);
309} 311}
310 312
311void __exit ip_ct_proto_gre_fini(void) 313/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
314 * init() code on errors.
315 */
316void ip_ct_proto_gre_fini(void)
312{ 317{
313 struct list_head *pos, *n; 318 struct list_head *pos, *n;
314 319
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608e..3021af0910f1 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -16,13 +16,12 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/checksum.h> 18#include <net/checksum.h>
19#include <linux/netfilter.h>
20#include <linux/netfilter_ipv4.h> 19#include <linux/netfilter_ipv4.h>
21#include <linux/netfilter_ipv4/ip_conntrack.h> 20#include <linux/netfilter_ipv4/ip_conntrack.h>
22#include <linux/netfilter_ipv4/ip_conntrack_core.h> 21#include <linux/netfilter_ipv4/ip_conntrack_core.h>
23#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
24 23
25unsigned long ip_ct_icmp_timeout = 30*HZ; 24unsigned int ip_ct_icmp_timeout = 30*HZ;
26 25
27#if 0 26#if 0
28#define DEBUGP printk 27#define DEBUGP printk
@@ -47,20 +46,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
47 return 1; 46 return 1;
48} 47}
49 48
49/* Add 1; spaces filled with 0. */
50static const u_int8_t invmap[] = {
51 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
52 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
53 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
54 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
55 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
56 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
57 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
58 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
59};
60
50static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, 61static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
51 const struct ip_conntrack_tuple *orig) 62 const struct ip_conntrack_tuple *orig)
52{ 63{
53 /* Add 1; spaces filled with 0. */
54 static const u_int8_t invmap[]
55 = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
56 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
57 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
58 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
59 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
60 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
61 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
62 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
63
64 if (orig->dst.u.icmp.type >= sizeof(invmap) 64 if (orig->dst.u.icmp.type >= sizeof(invmap)
65 || !invmap[orig->dst.u.icmp.type]) 65 || !invmap[orig->dst.u.icmp.type])
66 return 0; 66 return 0;
@@ -110,17 +110,17 @@ static int icmp_packet(struct ip_conntrack *ct,
110 return NF_ACCEPT; 110 return NF_ACCEPT;
111} 111}
112 112
113static const u_int8_t valid_new[] = {
114 [ICMP_ECHO] = 1,
115 [ICMP_TIMESTAMP] = 1,
116 [ICMP_INFO_REQUEST] = 1,
117 [ICMP_ADDRESS] = 1
118};
119
120/* Called when a new connection for this protocol found. */ 113/* Called when a new connection for this protocol found. */
121static int icmp_new(struct ip_conntrack *conntrack, 114static int icmp_new(struct ip_conntrack *conntrack,
122 const struct sk_buff *skb) 115 const struct sk_buff *skb)
123{ 116{
117 static const u_int8_t valid_new[] = {
118 [ICMP_ECHO] = 1,
119 [ICMP_TIMESTAMP] = 1,
120 [ICMP_INFO_REQUEST] = 1,
121 [ICMP_ADDRESS] = 1
122 };
123
124 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 124 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
125 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 125 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
126 /* Can't create a new ICMP `conn' with this. */ 126 /* Can't create a new ICMP `conn' with this. */
@@ -279,10 +279,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
279 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), 279 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
280 &t->dst.u.icmp.code); 280 &t->dst.u.icmp.code);
281 281
282 if (t->dst.u.icmp.type >= sizeof(valid_new)
283 || !valid_new[t->dst.u.icmp.type])
284 return -EINVAL;
285
286 return 0; 282 return 0;
287 283
288nfattr_failure: 284nfattr_failure:
@@ -295,7 +291,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
295 if (!tb[CTA_PROTO_ICMP_TYPE-1] 291 if (!tb[CTA_PROTO_ICMP_TYPE-1]
296 || !tb[CTA_PROTO_ICMP_CODE-1] 292 || !tb[CTA_PROTO_ICMP_CODE-1]
297 || !tb[CTA_PROTO_ICMP_ID-1]) 293 || !tb[CTA_PROTO_ICMP_ID-1])
298 return -1; 294 return -EINVAL;
299 295
300 tuple->dst.u.icmp.type = 296 tuple->dst.u.icmp.type =
301 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); 297 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +300,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
304 tuple->src.u.icmp.id = 300 tuple->src.u.icmp.id =
305 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); 301 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
306 302
303 if (tuple->dst.u.icmp.type >= sizeof(invmap)
304 || !invmap[tuple->dst.u.icmp.type])
305 return -EINVAL;
306
307 return 0; 307 return 0;
308} 308}
309#endif 309#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d4563..be602e8aeab0 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/timer.h> 18#include <linux/timer.h>
19#include <linux/interrupt.h>
19#include <linux/netfilter.h> 20#include <linux/netfilter.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/in.h> 22#include <linux/in.h>
@@ -57,15 +58,15 @@ static const char *sctp_conntrack_names[] = {
57#define HOURS * 60 MINS 58#define HOURS * 60 MINS
58#define DAYS * 24 HOURS 59#define DAYS * 24 HOURS
59 60
60static unsigned long ip_ct_sctp_timeout_closed = 10 SECS; 61static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
61static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; 62static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
62static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; 63static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
63static unsigned long ip_ct_sctp_timeout_established = 5 DAYS; 64static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
64static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 65static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
65static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 66static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
66static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
67 68
68static const unsigned long * sctp_timeouts[] 69static const unsigned int * sctp_timeouts[]
69= { NULL, /* SCTP_CONNTRACK_NONE */ 70= { NULL, /* SCTP_CONNTRACK_NONE */
70 &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ 71 &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
71 &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ 72 &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index e7fa29e576dc..e0dc37063545 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,7 +32,6 @@
32 32
33#include <net/tcp.h> 33#include <net/tcp.h>
34 34
35#include <linux/netfilter.h>
36#include <linux/netfilter_ipv4.h> 35#include <linux/netfilter_ipv4.h>
37#include <linux/netfilter_ipv4/ip_conntrack.h> 36#include <linux/netfilter_ipv4/ip_conntrack.h>
38#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 37#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -85,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
85#define HOURS * 60 MINS 84#define HOURS * 60 MINS
86#define DAYS * 24 HOURS 85#define DAYS * 24 HOURS
87 86
88unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS; 87unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
89unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS; 88unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
90unsigned long ip_ct_tcp_timeout_established = 5 DAYS; 89unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
91unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS; 90unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
92unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS; 91unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
93unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; 92unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
94unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS; 93unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
95unsigned long ip_ct_tcp_timeout_close = 10 SECS; 94unsigned int ip_ct_tcp_timeout_close = 10 SECS;
96 95
97/* RFC1122 says the R2 limit should be at least 100 seconds. 96/* RFC1122 says the R2 limit should be at least 100 seconds.
98 Linux uses 15 packets as limit, which corresponds 97 Linux uses 15 packets as limit, which corresponds
99 to ~13-30min depending on RTO. */ 98 to ~13-30min depending on RTO. */
100unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS; 99unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
101 100
102static const unsigned long * tcp_timeouts[] 101static const unsigned int * tcp_timeouts[]
103= { NULL, /* TCP_CONNTRACK_NONE */ 102= { NULL, /* TCP_CONNTRACK_NONE */
104 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 103 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
105 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 104 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
@@ -995,7 +994,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
995 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 994 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
996 && conntrack->proto.tcp.last_index == TCP_ACK_SET)) 995 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
997 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { 996 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
998 /* RST sent to invalid SYN or ACK we had let trough 997 /* RST sent to invalid SYN or ACK we had let through
999 * at a) and c) above: 998 * at a) and c) above:
1000 * 999 *
1001 * a) SYN was in window then 1000 * a) SYN was in window then
@@ -1006,7 +1005,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
1006 * segments we ignored. */ 1005 * segments we ignored. */
1007 goto in_window; 1006 goto in_window;
1008 } 1007 }
1009 /* Just fall trough */ 1008 /* Just fall through */
1010 default: 1009 default:
1011 /* Keep compilers happy. */ 1010 /* Keep compilers happy. */
1012 break; 1011 break;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c7660..55b7d3210adf 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,15 +11,15 @@
11#include <linux/timer.h> 11#include <linux/timer.h>
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/in.h> 13#include <linux/in.h>
14#include <linux/ip.h>
14#include <linux/udp.h> 15#include <linux/udp.h>
15#include <linux/seq_file.h> 16#include <linux/seq_file.h>
16#include <net/checksum.h> 17#include <net/checksum.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter_ipv4.h> 18#include <linux/netfilter_ipv4.h>
19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
20 20
21unsigned long ip_ct_udp_timeout = 30*HZ; 21unsigned int ip_ct_udp_timeout = 30*HZ;
22unsigned long ip_ct_udp_timeout_stream = 180*HZ; 22unsigned int ip_ct_udp_timeout_stream = 180*HZ;
23 23
24static int udp_pkt_to_tuple(const struct sk_buff *skb, 24static int udp_pkt_to_tuple(const struct sk_buff *skb,
25 unsigned int dataoff, 25 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4b..833fcb4be5e7 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
27#endif 27#endif
28#include <net/checksum.h> 28#include <net/checksum.h>
29#include <net/ip.h> 29#include <net/ip.h>
30#include <net/route.h>
30 31
31#define ASSERT_READ_LOCK(x) 32#define ASSERT_READ_LOCK(x)
32#define ASSERT_WRITE_LOCK(x) 33#define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
450 return NF_ACCEPT; 451 return NF_ACCEPT;
451} 452}
452 453
453static unsigned int ip_refrag(unsigned int hooknum,
454 struct sk_buff **pskb,
455 const struct net_device *in,
456 const struct net_device *out,
457 int (*okfn)(struct sk_buff *))
458{
459 struct rtable *rt = (struct rtable *)(*pskb)->dst;
460
461 /* We've seen it coming out the other side: confirm */
462 if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
463 return NF_DROP;
464
465 /* Local packets are never produced too large for their
466 interface. We degfragment them at LOCAL_OUT, however,
467 so we have to refragment them here. */
468 if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
469 !skb_shinfo(*pskb)->tso_size) {
470 /* No hook can be after us, so this should be OK. */
471 ip_fragment(*pskb, okfn);
472 return NF_STOLEN;
473 }
474 return NF_ACCEPT;
475}
476
477static unsigned int ip_conntrack_local(unsigned int hooknum, 454static unsigned int ip_conntrack_local(unsigned int hooknum,
478 struct sk_buff **pskb, 455 struct sk_buff **pskb,
479 const struct net_device *in, 456 const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
543 520
544/* Refragmenter; last chance. */ 521/* Refragmenter; last chance. */
545static struct nf_hook_ops ip_conntrack_out_ops = { 522static struct nf_hook_ops ip_conntrack_out_ops = {
546 .hook = ip_refrag, 523 .hook = ip_confirm,
547 .owner = THIS_MODULE, 524 .owner = THIS_MODULE,
548 .pf = PF_INET, 525 .pf = PF_INET,
549 .hooknum = NF_IP_POST_ROUTING, 526 .hooknum = NF_IP_POST_ROUTING,
@@ -567,28 +544,28 @@ extern int ip_conntrack_max;
567extern unsigned int ip_conntrack_htable_size; 544extern unsigned int ip_conntrack_htable_size;
568 545
569/* From ip_conntrack_proto_tcp.c */ 546/* From ip_conntrack_proto_tcp.c */
570extern unsigned long ip_ct_tcp_timeout_syn_sent; 547extern unsigned int ip_ct_tcp_timeout_syn_sent;
571extern unsigned long ip_ct_tcp_timeout_syn_recv; 548extern unsigned int ip_ct_tcp_timeout_syn_recv;
572extern unsigned long ip_ct_tcp_timeout_established; 549extern unsigned int ip_ct_tcp_timeout_established;
573extern unsigned long ip_ct_tcp_timeout_fin_wait; 550extern unsigned int ip_ct_tcp_timeout_fin_wait;
574extern unsigned long ip_ct_tcp_timeout_close_wait; 551extern unsigned int ip_ct_tcp_timeout_close_wait;
575extern unsigned long ip_ct_tcp_timeout_last_ack; 552extern unsigned int ip_ct_tcp_timeout_last_ack;
576extern unsigned long ip_ct_tcp_timeout_time_wait; 553extern unsigned int ip_ct_tcp_timeout_time_wait;
577extern unsigned long ip_ct_tcp_timeout_close; 554extern unsigned int ip_ct_tcp_timeout_close;
578extern unsigned long ip_ct_tcp_timeout_max_retrans; 555extern unsigned int ip_ct_tcp_timeout_max_retrans;
579extern int ip_ct_tcp_loose; 556extern int ip_ct_tcp_loose;
580extern int ip_ct_tcp_be_liberal; 557extern int ip_ct_tcp_be_liberal;
581extern int ip_ct_tcp_max_retrans; 558extern int ip_ct_tcp_max_retrans;
582 559
583/* From ip_conntrack_proto_udp.c */ 560/* From ip_conntrack_proto_udp.c */
584extern unsigned long ip_ct_udp_timeout; 561extern unsigned int ip_ct_udp_timeout;
585extern unsigned long ip_ct_udp_timeout_stream; 562extern unsigned int ip_ct_udp_timeout_stream;
586 563
587/* From ip_conntrack_proto_icmp.c */ 564/* From ip_conntrack_proto_icmp.c */
588extern unsigned long ip_ct_icmp_timeout; 565extern unsigned int ip_ct_icmp_timeout;
589 566
590/* From ip_conntrack_proto_icmp.c */ 567/* From ip_conntrack_proto_icmp.c */
591extern unsigned long ip_ct_generic_timeout; 568extern unsigned int ip_ct_generic_timeout;
592 569
593/* Log invalid packets of a given protocol */ 570/* Log invalid packets of a given protocol */
594static int log_invalid_proto_min = 0; 571static int log_invalid_proto_min = 0;
@@ -967,7 +944,7 @@ module_exit(fini);
967 944
968/* Some modules need us, but don't depend directly on any symbol. 945/* Some modules need us, but don't depend directly on any symbol.
969 They should call this. */ 946 They should call this. */
970void need_ip_conntrack(void) 947void need_conntrack(void)
971{ 948{
972} 949}
973 950
@@ -985,7 +962,7 @@ EXPORT_SYMBOL(ip_ct_get_tuple);
985EXPORT_SYMBOL(invert_tuplepr); 962EXPORT_SYMBOL(invert_tuplepr);
986EXPORT_SYMBOL(ip_conntrack_alter_reply); 963EXPORT_SYMBOL(ip_conntrack_alter_reply);
987EXPORT_SYMBOL(ip_conntrack_destroyed); 964EXPORT_SYMBOL(ip_conntrack_destroyed);
988EXPORT_SYMBOL(need_ip_conntrack); 965EXPORT_SYMBOL(need_conntrack);
989EXPORT_SYMBOL(ip_conntrack_helper_register); 966EXPORT_SYMBOL(ip_conntrack_helper_register);
990EXPORT_SYMBOL(ip_conntrack_helper_unregister); 967EXPORT_SYMBOL(ip_conntrack_helper_unregister);
991EXPORT_SYMBOL(ip_ct_iterate_cleanup); 968EXPORT_SYMBOL(ip_ct_iterate_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9f..b8daab3c64af 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
171/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 171/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
172static int warn_set(const char *val, struct kernel_param *kp) 172static int warn_set(const char *val, struct kernel_param *kp)
173{ 173{
174 printk(KERN_INFO __stringify(KBUILD_MODNAME) 174 printk(KERN_INFO KBUILD_MODNAME
175 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 175 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
176 return 0; 176 return 0;
177} 177}
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index e546203f5662..ac004895781a 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -148,14 +148,14 @@ pptp_outbound_pkt(struct sk_buff **pskb,
148{ 148{
149 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; 149 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
150 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 150 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
151 151 u_int16_t msg, new_callid;
152 u_int16_t msg, *cid = NULL, new_callid; 152 unsigned int cid_off;
153 153
154 new_callid = htons(ct_pptp_info->pns_call_id); 154 new_callid = htons(ct_pptp_info->pns_call_id);
155 155
156 switch (msg = ntohs(ctlh->messageType)) { 156 switch (msg = ntohs(ctlh->messageType)) {
157 case PPTP_OUT_CALL_REQUEST: 157 case PPTP_OUT_CALL_REQUEST:
158 cid = &pptpReq->ocreq.callID; 158 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
159 /* FIXME: ideally we would want to reserve a call ID 159 /* FIXME: ideally we would want to reserve a call ID
160 * here. current netfilter NAT core is not able to do 160 * here. current netfilter NAT core is not able to do
161 * this :( For now we use TCP source port. This breaks 161 * this :( For now we use TCP source port. This breaks
@@ -172,10 +172,10 @@ pptp_outbound_pkt(struct sk_buff **pskb,
172 ct_pptp_info->pns_call_id = ntohs(new_callid); 172 ct_pptp_info->pns_call_id = ntohs(new_callid);
173 break; 173 break;
174 case PPTP_IN_CALL_REPLY: 174 case PPTP_IN_CALL_REPLY:
175 cid = &pptpReq->icreq.callID; 175 cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
176 break; 176 break;
177 case PPTP_CALL_CLEAR_REQUEST: 177 case PPTP_CALL_CLEAR_REQUEST:
178 cid = &pptpReq->clrreq.callID; 178 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
179 break; 179 break;
180 default: 180 default:
181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, 181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
@@ -197,18 +197,15 @@ pptp_outbound_pkt(struct sk_buff **pskb,
197 197
198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass 198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
199 * down to here */ 199 * down to here */
200
201 IP_NF_ASSERT(cid);
202
203 DEBUGP("altering call id from 0x%04x to 0x%04x\n", 200 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
204 ntohs(*cid), ntohs(new_callid)); 201 ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
205 202
206 /* mangle packet */ 203 /* mangle packet */
207 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 204 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
208 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 205 cid_off + sizeof(struct pptp_pkt_hdr) +
209 sizeof(new_callid), 206 sizeof(struct PptpControlHeader),
210 (char *)&new_callid, 207 sizeof(new_callid), (char *)&new_callid,
211 sizeof(new_callid)) == 0) 208 sizeof(new_callid)) == 0)
212 return NF_DROP; 209 return NF_DROP;
213 210
214 return NF_ACCEPT; 211 return NF_ACCEPT;
@@ -299,31 +296,30 @@ pptp_inbound_pkt(struct sk_buff **pskb,
299 union pptp_ctrl_union *pptpReq) 296 union pptp_ctrl_union *pptpReq)
300{ 297{
301 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 298 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
302 u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; 299 u_int16_t msg, new_cid = 0, new_pcid;
303 300 unsigned int pcid_off, cid_off = 0;
304 int ret = NF_ACCEPT, rv;
305 301
306 new_pcid = htons(nat_pptp_info->pns_call_id); 302 new_pcid = htons(nat_pptp_info->pns_call_id);
307 303
308 switch (msg = ntohs(ctlh->messageType)) { 304 switch (msg = ntohs(ctlh->messageType)) {
309 case PPTP_OUT_CALL_REPLY: 305 case PPTP_OUT_CALL_REPLY:
310 pcid = &pptpReq->ocack.peersCallID; 306 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
311 cid = &pptpReq->ocack.callID; 307 cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
312 break; 308 break;
313 case PPTP_IN_CALL_CONNECT: 309 case PPTP_IN_CALL_CONNECT:
314 pcid = &pptpReq->iccon.peersCallID; 310 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
315 break; 311 break;
316 case PPTP_IN_CALL_REQUEST: 312 case PPTP_IN_CALL_REQUEST:
317 /* only need to nat in case PAC is behind NAT box */ 313 /* only need to nat in case PAC is behind NAT box */
318 break; 314 return NF_ACCEPT;
319 case PPTP_WAN_ERROR_NOTIFY: 315 case PPTP_WAN_ERROR_NOTIFY:
320 pcid = &pptpReq->wanerr.peersCallID; 316 pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
321 break; 317 break;
322 case PPTP_CALL_DISCONNECT_NOTIFY: 318 case PPTP_CALL_DISCONNECT_NOTIFY:
323 pcid = &pptpReq->disc.callID; 319 pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
324 break; 320 break;
325 case PPTP_SET_LINK_INFO: 321 case PPTP_SET_LINK_INFO:
326 pcid = &pptpReq->setlink.peersCallID; 322 pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
327 break; 323 break;
328 324
329 default: 325 default:
@@ -345,35 +341,26 @@ pptp_inbound_pkt(struct sk_buff **pskb,
345 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ 341 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
346 342
347 /* mangle packet */ 343 /* mangle packet */
348 IP_NF_ASSERT(pcid);
349 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", 344 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
350 ntohs(*pcid), ntohs(new_pcid)); 345 ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
351 346
352 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 347 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
353 (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 348 pcid_off + sizeof(struct pptp_pkt_hdr) +
354 sizeof(new_pcid), (char *)&new_pcid, 349 sizeof(struct PptpControlHeader),
355 sizeof(new_pcid)); 350 sizeof(new_pcid), (char *)&new_pcid,
356 if (rv != NF_ACCEPT) 351 sizeof(new_pcid)) == 0)
357 return rv; 352 return NF_DROP;
358 353
359 if (new_cid) { 354 if (new_cid) {
360 IP_NF_ASSERT(cid);
361 DEBUGP("altering call id from 0x%04x to 0x%04x\n", 355 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
362 ntohs(*cid), ntohs(new_cid)); 356 ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
363 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 357 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
364 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 358 cid_off + sizeof(struct pptp_pkt_hdr) +
365 sizeof(new_cid), 359 sizeof(struct PptpControlHeader),
366 (char *)&new_cid, 360 sizeof(new_cid), (char *)&new_cid,
367 sizeof(new_cid)); 361 sizeof(new_cid)) == 0)
368 if (rv != NF_ACCEPT) 362 return NF_DROP;
369 return rv;
370 } 363 }
371
372 /* check for earlier return value of 'switch' above */
373 if (ret != NF_ACCEPT)
374 return ret;
375
376 /* great, at least we don't need to resize packets */
377 return NF_ACCEPT; 364 return NF_ACCEPT;
378} 365}
379 366
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe3..461c833eaca1 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
113/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 113/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
114static int warn_set(const char *val, struct kernel_param *kp) 114static int warn_set(const char *val, struct kernel_param *kp)
115{ 115{
116 printk(KERN_INFO __stringify(KBUILD_MODNAME) 116 printk(KERN_INFO KBUILD_MODNAME
117 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 117 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
118 return 0; 118 return 0;
119} 119}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index f7cad7cf1aec..6c4899d8046a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb,
151 return 1; 151 return 1;
152} 152}
153 153
154/* print out a nat tuple */
155static unsigned int
156gre_print(char *buffer,
157 const struct ip_conntrack_tuple *match,
158 const struct ip_conntrack_tuple *mask)
159{
160 unsigned int len = 0;
161
162 if (mask->src.u.gre.key)
163 len += sprintf(buffer + len, "srckey=0x%x ",
164 ntohl(match->src.u.gre.key));
165
166 if (mask->dst.u.gre.key)
167 len += sprintf(buffer + len, "dstkey=0x%x ",
168 ntohl(match->src.u.gre.key));
169
170 return len;
171}
172
173/* print a range of keys */
174static unsigned int
175gre_print_range(char *buffer, const struct ip_nat_range *range)
176{
177 if (range->min.gre.key != 0
178 || range->max.gre.key != 0xFFFF) {
179 if (range->min.gre.key == range->max.gre.key)
180 return sprintf(buffer, "key 0x%x ",
181 ntohl(range->min.gre.key));
182 else
183 return sprintf(buffer, "keys 0x%u-0x%u ",
184 ntohl(range->min.gre.key),
185 ntohl(range->max.gre.key));
186 } else
187 return 0;
188}
189
190/* nat helper struct */ 154/* nat helper struct */
191static struct ip_nat_protocol gre = { 155static struct ip_nat_protocol gre = {
192 .name = "GRE", 156 .name = "GRE",
@@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = {
194 .manip_pkt = gre_manip_pkt, 158 .manip_pkt = gre_manip_pkt,
195 .in_range = gre_in_range, 159 .in_range = gre_in_range,
196 .unique_tuple = gre_unique_tuple, 160 .unique_tuple = gre_unique_tuple,
197 .print = gre_print,
198 .print_range = gre_print_range,
199#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 161#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
200 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 162 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
201 .range_to_nfattr = ip_nat_port_range_to_nfattr, 163 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 938719043999..31a3f4ccb99c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb,
74 return 1; 74 return 1;
75} 75}
76 76
77static unsigned int
78icmp_print(char *buffer,
79 const struct ip_conntrack_tuple *match,
80 const struct ip_conntrack_tuple *mask)
81{
82 unsigned int len = 0;
83
84 if (mask->src.u.icmp.id)
85 len += sprintf(buffer + len, "id=%u ",
86 ntohs(match->src.u.icmp.id));
87
88 if (mask->dst.u.icmp.type)
89 len += sprintf(buffer + len, "type=%u ",
90 ntohs(match->dst.u.icmp.type));
91
92 if (mask->dst.u.icmp.code)
93 len += sprintf(buffer + len, "code=%u ",
94 ntohs(match->dst.u.icmp.code));
95
96 return len;
97}
98
99static unsigned int
100icmp_print_range(char *buffer, const struct ip_nat_range *range)
101{
102 if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
103 return sprintf(buffer, "id %u-%u ",
104 ntohs(range->min.icmp.id),
105 ntohs(range->max.icmp.id));
106 else return 0;
107}
108
109struct ip_nat_protocol ip_nat_protocol_icmp = { 77struct ip_nat_protocol ip_nat_protocol_icmp = {
110 .name = "ICMP", 78 .name = "ICMP",
111 .protonum = IPPROTO_ICMP, 79 .protonum = IPPROTO_ICMP,
@@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = {
113 .manip_pkt = icmp_manip_pkt, 81 .manip_pkt = icmp_manip_pkt,
114 .in_range = icmp_in_range, 82 .in_range = icmp_in_range,
115 .unique_tuple = icmp_unique_tuple, 83 .unique_tuple = icmp_unique_tuple,
116 .print = icmp_print,
117 .print_range = icmp_print_range,
118#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 84#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
119 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 85 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
120 .range_to_nfattr = ip_nat_port_range_to_nfattr, 86 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 1d381bf68574..a3d14079eba6 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb,
136 return 1; 136 return 1;
137} 137}
138 138
139static unsigned int
140tcp_print(char *buffer,
141 const struct ip_conntrack_tuple *match,
142 const struct ip_conntrack_tuple *mask)
143{
144 unsigned int len = 0;
145
146 if (mask->src.u.tcp.port)
147 len += sprintf(buffer + len, "srcpt=%u ",
148 ntohs(match->src.u.tcp.port));
149
150
151 if (mask->dst.u.tcp.port)
152 len += sprintf(buffer + len, "dstpt=%u ",
153 ntohs(match->dst.u.tcp.port));
154
155 return len;
156}
157
158static unsigned int
159tcp_print_range(char *buffer, const struct ip_nat_range *range)
160{
161 if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
162 if (range->min.tcp.port == range->max.tcp.port)
163 return sprintf(buffer, "port %u ",
164 ntohs(range->min.tcp.port));
165 else
166 return sprintf(buffer, "ports %u-%u ",
167 ntohs(range->min.tcp.port),
168 ntohs(range->max.tcp.port));
169 }
170 else return 0;
171}
172
173struct ip_nat_protocol ip_nat_protocol_tcp = { 139struct ip_nat_protocol ip_nat_protocol_tcp = {
174 .name = "TCP", 140 .name = "TCP",
175 .protonum = IPPROTO_TCP, 141 .protonum = IPPROTO_TCP,
@@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = {
177 .manip_pkt = tcp_manip_pkt, 143 .manip_pkt = tcp_manip_pkt,
178 .in_range = tcp_in_range, 144 .in_range = tcp_in_range,
179 .unique_tuple = tcp_unique_tuple, 145 .unique_tuple = tcp_unique_tuple,
180 .print = tcp_print,
181 .print_range = tcp_print_range,
182#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 146#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
183 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 147 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
184 .range_to_nfattr = ip_nat_port_range_to_nfattr, 148 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c4906e1aa24a..ec6053fdc867 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb,
122 return 1; 122 return 1;
123} 123}
124 124
125static unsigned int
126udp_print(char *buffer,
127 const struct ip_conntrack_tuple *match,
128 const struct ip_conntrack_tuple *mask)
129{
130 unsigned int len = 0;
131
132 if (mask->src.u.udp.port)
133 len += sprintf(buffer + len, "srcpt=%u ",
134 ntohs(match->src.u.udp.port));
135
136
137 if (mask->dst.u.udp.port)
138 len += sprintf(buffer + len, "dstpt=%u ",
139 ntohs(match->dst.u.udp.port));
140
141 return len;
142}
143
144static unsigned int
145udp_print_range(char *buffer, const struct ip_nat_range *range)
146{
147 if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
148 if (range->min.udp.port == range->max.udp.port)
149 return sprintf(buffer, "port %u ",
150 ntohs(range->min.udp.port));
151 else
152 return sprintf(buffer, "ports %u-%u ",
153 ntohs(range->min.udp.port),
154 ntohs(range->max.udp.port));
155 }
156 else return 0;
157}
158
159struct ip_nat_protocol ip_nat_protocol_udp = { 125struct ip_nat_protocol ip_nat_protocol_udp = {
160 .name = "UDP", 126 .name = "UDP",
161 .protonum = IPPROTO_UDP, 127 .protonum = IPPROTO_UDP,
@@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = {
163 .manip_pkt = udp_manip_pkt, 129 .manip_pkt = udp_manip_pkt,
164 .in_range = udp_in_range, 130 .in_range = udp_in_range,
165 .unique_tuple = udp_unique_tuple, 131 .unique_tuple = udp_unique_tuple,
166 .print = udp_print,
167 .print_range = udp_print_range,
168#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 132#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
169 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 133 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
170 .range_to_nfattr = ip_nat_port_range_to_nfattr, 134 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f0099a646a0b..3bf049517246 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb,
46 return 1; 46 return 1;
47} 47}
48 48
49static unsigned int
50unknown_print(char *buffer,
51 const struct ip_conntrack_tuple *match,
52 const struct ip_conntrack_tuple *mask)
53{
54 return 0;
55}
56
57static unsigned int
58unknown_print_range(char *buffer, const struct ip_nat_range *range)
59{
60 return 0;
61}
62
63struct ip_nat_protocol ip_nat_unknown_protocol = { 49struct ip_nat_protocol ip_nat_unknown_protocol = {
64 .name = "unknown", 50 .name = "unknown",
65 /* .me isn't set: getting a ref to this cannot fail. */ 51 /* .me isn't set: getting a ref to this cannot fail. */
66 .manip_pkt = unknown_manip_pkt, 52 .manip_pkt = unknown_manip_pkt,
67 .in_range = unknown_in_range, 53 .in_range = unknown_in_range,
68 .unique_tuple = unknown_unique_tuple, 54 .unique_tuple = unknown_unique_tuple,
69 .print = unknown_print,
70 .print_range = unknown_print_range
71}; 55};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index cb66b8bddeb3..1de86282d232 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -95,6 +95,7 @@ static struct ipt_table nat_table = {
95 .valid_hooks = NAT_VALID_HOOKS, 95 .valid_hooks = NAT_VALID_HOOKS,
96 .lock = RW_LOCK_UNLOCKED, 96 .lock = RW_LOCK_UNLOCKED,
97 .me = THIS_MODULE, 97 .me = THIS_MODULE,
98 .af = AF_INET,
98}; 99};
99 100
100/* Source NAT */ 101/* Source NAT */
@@ -168,7 +169,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
168} 169}
169 170
170static int ipt_snat_checkentry(const char *tablename, 171static int ipt_snat_checkentry(const char *tablename,
171 const struct ipt_entry *e, 172 const void *entry,
172 void *targinfo, 173 void *targinfo,
173 unsigned int targinfosize, 174 unsigned int targinfosize,
174 unsigned int hook_mask) 175 unsigned int hook_mask)
@@ -201,7 +202,7 @@ static int ipt_snat_checkentry(const char *tablename,
201} 202}
202 203
203static int ipt_dnat_checkentry(const char *tablename, 204static int ipt_dnat_checkentry(const char *tablename,
204 const struct ipt_entry *e, 205 const void *entry,
205 void *targinfo, 206 void *targinfo,
206 unsigned int targinfosize, 207 unsigned int targinfosize,
207 unsigned int hook_mask) 208 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b47..4f95d477805c 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
44 * 44 *
45 */ 45 */
46#include <linux/config.h> 46#include <linux/config.h>
47#include <linux/in.h>
47#include <linux/module.h> 48#include <linux/module.h>
48#include <linux/types.h> 49#include <linux/types.h>
49#include <linux/kernel.h> 50#include <linux/kernel.h>
@@ -53,6 +54,7 @@
53#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 54#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
54#include <linux/netfilter_ipv4/ip_nat_helper.h> 55#include <linux/netfilter_ipv4/ip_nat_helper.h>
55#include <linux/ip.h> 56#include <linux/ip.h>
57#include <linux/udp.h>
56#include <net/checksum.h> 58#include <net/checksum.h>
57#include <net/udp.h> 59#include <net/udp.h>
58#include <asm/uaccess.h> 60#include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c129..ad438fb185b8 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ 55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
56 : "*ERROR*"))) 56 : "*ERROR*")))
57 57
58#ifdef CONFIG_XFRM
59static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
60{
61 struct ip_conntrack *ct;
62 struct ip_conntrack_tuple *t;
63 enum ip_conntrack_info ctinfo;
64 enum ip_conntrack_dir dir;
65 unsigned long statusbit;
66
67 ct = ip_conntrack_get(skb, &ctinfo);
68 if (ct == NULL)
69 return;
70 dir = CTINFO2DIR(ctinfo);
71 t = &ct->tuplehash[dir].tuple;
72
73 if (dir == IP_CT_DIR_ORIGINAL)
74 statusbit = IPS_DST_NAT;
75 else
76 statusbit = IPS_SRC_NAT;
77
78 if (ct->status & statusbit) {
79 fl->fl4_dst = t->dst.ip;
80 if (t->dst.protonum == IPPROTO_TCP ||
81 t->dst.protonum == IPPROTO_UDP)
82 fl->fl_ip_dport = t->dst.u.tcp.port;
83 }
84
85 statusbit ^= IPS_NAT_MASK;
86
87 if (ct->status & statusbit) {
88 fl->fl4_src = t->src.ip;
89 if (t->dst.protonum == IPPROTO_TCP ||
90 t->dst.protonum == IPPROTO_UDP)
91 fl->fl_ip_sport = t->src.u.tcp.port;
92 }
93}
94#endif
95
58static unsigned int 96static unsigned int
59ip_nat_fn(unsigned int hooknum, 97ip_nat_fn(unsigned int hooknum,
60 struct sk_buff **pskb, 98 struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
162 const struct net_device *out, 200 const struct net_device *out,
163 int (*okfn)(struct sk_buff *)) 201 int (*okfn)(struct sk_buff *))
164{ 202{
165 u_int32_t saddr, daddr; 203 struct ip_conntrack *ct;
204 enum ip_conntrack_info ctinfo;
166 unsigned int ret; 205 unsigned int ret;
167 206
168 saddr = (*pskb)->nh.iph->saddr;
169 daddr = (*pskb)->nh.iph->daddr;
170
171 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 207 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
172 if (ret != NF_DROP && ret != NF_STOLEN 208 if (ret != NF_DROP && ret != NF_STOLEN
173 && ((*pskb)->nh.iph->saddr != saddr 209 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
174 || (*pskb)->nh.iph->daddr != daddr)) { 210 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
175 dst_release((*pskb)->dst); 211
176 (*pskb)->dst = NULL; 212 if (ct->tuplehash[dir].tuple.src.ip !=
213 ct->tuplehash[!dir].tuple.dst.ip) {
214 dst_release((*pskb)->dst);
215 (*pskb)->dst = NULL;
216 }
177 } 217 }
178 return ret; 218 return ret;
179} 219}
@@ -185,29 +225,30 @@ ip_nat_out(unsigned int hooknum,
185 const struct net_device *out, 225 const struct net_device *out,
186 int (*okfn)(struct sk_buff *)) 226 int (*okfn)(struct sk_buff *))
187{ 227{
228 struct ip_conntrack *ct;
229 enum ip_conntrack_info ctinfo;
230 unsigned int ret;
231
188 /* root is playing with raw sockets. */ 232 /* root is playing with raw sockets. */
189 if ((*pskb)->len < sizeof(struct iphdr) 233 if ((*pskb)->len < sizeof(struct iphdr)
190 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 234 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
191 return NF_ACCEPT; 235 return NF_ACCEPT;
192 236
193 /* We can hit fragment here; forwarded packets get 237 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
194 defragmented by connection tracking coming in, then 238 if (ret != NF_DROP && ret != NF_STOLEN
195 fragmented (grr) by the forward code. 239 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
196 240 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
197 In future: If we have nfct != NULL, AND we have NAT 241
198 initialized, AND there is no helper, then we can do full 242 if (ct->tuplehash[dir].tuple.src.ip !=
199 NAPT on the head, and IP-address-only NAT on the rest. 243 ct->tuplehash[!dir].tuple.dst.ip
200 244#ifdef CONFIG_XFRM
201 I'm starting to have nightmares about fragments. */ 245 || ct->tuplehash[dir].tuple.src.u.all !=
202 246 ct->tuplehash[!dir].tuple.dst.u.all
203 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 247#endif
204 *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT); 248 )
205 249 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
206 if (!*pskb)
207 return NF_STOLEN;
208 } 250 }
209 251 return ret;
210 return ip_nat_fn(hooknum, pskb, in, out, okfn);
211} 252}
212 253
213static unsigned int 254static unsigned int
@@ -217,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
217 const struct net_device *out, 258 const struct net_device *out,
218 int (*okfn)(struct sk_buff *)) 259 int (*okfn)(struct sk_buff *))
219{ 260{
220 u_int32_t saddr, daddr; 261 struct ip_conntrack *ct;
262 enum ip_conntrack_info ctinfo;
221 unsigned int ret; 263 unsigned int ret;
222 264
223 /* root is playing with raw sockets. */ 265 /* root is playing with raw sockets. */
@@ -225,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
225 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 267 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
226 return NF_ACCEPT; 268 return NF_ACCEPT;
227 269
228 saddr = (*pskb)->nh.iph->saddr;
229 daddr = (*pskb)->nh.iph->daddr;
230
231 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 270 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
232 if (ret != NF_DROP && ret != NF_STOLEN 271 if (ret != NF_DROP && ret != NF_STOLEN
233 && ((*pskb)->nh.iph->saddr != saddr 272 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
234 || (*pskb)->nh.iph->daddr != daddr)) 273 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
235 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; 274
275 if (ct->tuplehash[dir].tuple.dst.ip !=
276 ct->tuplehash[!dir].tuple.src.ip
277#ifdef CONFIG_XFRM
278 || ct->tuplehash[dir].tuple.dst.u.all !=
279 ct->tuplehash[dir].tuple.src.u.all
280#endif
281 )
282 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
283 }
236 return ret; 284 return ret;
237} 285}
238 286
@@ -316,14 +364,18 @@ static int init_or_cleanup(int init)
316{ 364{
317 int ret = 0; 365 int ret = 0;
318 366
319 need_ip_conntrack(); 367 need_conntrack();
320 368
321 if (!init) goto cleanup; 369 if (!init) goto cleanup;
322 370
371#ifdef CONFIG_XFRM
372 BUG_ON(ip_nat_decode_session != NULL);
373 ip_nat_decode_session = nat_decode_session;
374#endif
323 ret = ip_nat_rule_init(); 375 ret = ip_nat_rule_init();
324 if (ret < 0) { 376 if (ret < 0) {
325 printk("ip_nat_init: can't setup rules.\n"); 377 printk("ip_nat_init: can't setup rules.\n");
326 goto cleanup_nothing; 378 goto cleanup_decode_session;
327 } 379 }
328 ret = nf_register_hook(&ip_nat_in_ops); 380 ret = nf_register_hook(&ip_nat_in_ops);
329 if (ret < 0) { 381 if (ret < 0) {
@@ -371,7 +423,11 @@ static int init_or_cleanup(int init)
371 nf_unregister_hook(&ip_nat_in_ops); 423 nf_unregister_hook(&ip_nat_in_ops);
372 cleanup_rule_init: 424 cleanup_rule_init:
373 ip_nat_rule_cleanup(); 425 ip_nat_rule_cleanup();
374 cleanup_nothing: 426 cleanup_decode_session:
427#ifdef CONFIG_XFRM
428 ip_nat_decode_session = NULL;
429 synchronize_net();
430#endif
375 return ret; 431 return ret;
376} 432}
377 433
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e8..2371b2062c2d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2,7 +2,7 @@
2 * Packet matching code. 2 * Packet matching code.
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,17 @@
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org> 11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside 12 * - increase module usage count as soon as we have rules inside
13 * a table 13 * a table
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
14 */ 16 */
15#include <linux/config.h> 17#include <linux/config.h>
16#include <linux/cache.h> 18#include <linux/cache.h>
19#include <linux/capability.h>
17#include <linux/skbuff.h> 20#include <linux/skbuff.h>
18#include <linux/kmod.h> 21#include <linux/kmod.h>
19#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
20#include <linux/netdevice.h> 23#include <linux/netdevice.h>
21#include <linux/module.h> 24#include <linux/module.h>
22#include <linux/tcp.h>
23#include <linux/udp.h>
24#include <linux/icmp.h> 25#include <linux/icmp.h>
25#include <net/ip.h> 26#include <net/ip.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
@@ -29,6 +30,7 @@
29#include <linux/err.h> 30#include <linux/err.h>
30#include <linux/cpumask.h> 31#include <linux/cpumask.h>
31 32
33#include <linux/netfilter/x_tables.h>
32#include <linux/netfilter_ipv4/ip_tables.h> 34#include <linux/netfilter_ipv4/ip_tables.h>
33 35
34MODULE_LICENSE("GPL"); 36MODULE_LICENSE("GPL");
@@ -61,14 +63,6 @@ do { \
61#else 63#else
62#define IP_NF_ASSERT(x) 64#define IP_NF_ASSERT(x)
63#endif 65#endif
64#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65
66static DECLARE_MUTEX(ipt_mutex);
67
68/* Must have mutex */
69#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71#include <linux/netfilter_ipv4/listhelp.h>
72 66
73#if 0 67#if 0
74/* All the better to debug you with... */ 68/* All the better to debug you with... */
@@ -83,48 +77,8 @@ static DECLARE_MUTEX(ipt_mutex);
83 context stops packets coming through and allows user context to read 77 context stops packets coming through and allows user context to read
84 the counters or update the rules. 78 the counters or update the rules.
85 79
86 To be cache friendly on SMP, we arrange them like so:
87 [ n-entries ]
88 ... cache-align padding ...
89 [ n-entries ]
90
91 Hence the start of any table is given by get_table() below. */ 80 Hence the start of any table is given by get_table() below. */
92 81
93/* The table itself */
94struct ipt_table_info
95{
96 /* Size per table */
97 unsigned int size;
98 /* Number of entries: FIXME. --RR */
99 unsigned int number;
100 /* Initial number of entries. Needed for module usage count */
101 unsigned int initial_entries;
102
103 /* Entry points and underflows */
104 unsigned int hook_entry[NF_IP_NUMHOOKS];
105 unsigned int underflow[NF_IP_NUMHOOKS];
106
107 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned;
109};
110
111static LIST_HEAD(ipt_target);
112static LIST_HEAD(ipt_match);
113static LIST_HEAD(ipt_tables);
114#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116#ifdef CONFIG_SMP
117#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118#else
119#define TABLE_OFFSET(t,p) 0
120#endif
121
122#if 0
123#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126#endif
127
128/* Returns whether matches rule or not. */ 82/* Returns whether matches rule or not. */
129static inline int 83static inline int
130ip_packet_match(const struct iphdr *ip, 84ip_packet_match(const struct iphdr *ip,
@@ -243,7 +197,8 @@ int do_match(struct ipt_entry_match *m,
243 int *hotdrop) 197 int *hotdrop)
244{ 198{
245 /* Stop iteration if it doesn't match */ 199 /* Stop iteration if it doesn't match */
246 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop)) 200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
247 return 1; 202 return 1;
248 else 203 else
249 return 0; 204 return 0;
@@ -274,6 +229,7 @@ ipt_do_table(struct sk_buff **pskb,
274 const char *indev, *outdev; 229 const char *indev, *outdev;
275 void *table_base; 230 void *table_base;
276 struct ipt_entry *e, *back; 231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
277 233
278 /* Initialization */ 234 /* Initialization */
279 ip = (*pskb)->nh.iph; 235 ip = (*pskb)->nh.iph;
@@ -290,25 +246,11 @@ ipt_do_table(struct sk_buff **pskb,
290 246
291 read_lock_bh(&table->lock); 247 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries 249 table_base = (void *)private->entries[smp_processor_id()];
294 + TABLE_OFFSET(table->private, smp_processor_id()); 250 e = get_entry(table_base, private->hook_entry[hook]);
295 e = get_entry(table_base, table->private->hook_entry[hook]);
296
297#ifdef CONFIG_NETFILTER_DEBUG
298 /* Check noone else using our table */
299 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302 smp_processor_id(),
303 table->name,
304 &((struct ipt_entry *)table_base)->comefrom,
305 ((struct ipt_entry *)table_base)->comefrom);
306 }
307 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308#endif
309 251
310 /* For return from builtin chain */ 252 /* For return from builtin chain */
311 back = get_entry(table_base, table->private->underflow[hook]); 253 back = get_entry(table_base, private->underflow[hook]);
312 254
313 do { 255 do {
314 IP_NF_ASSERT(e); 256 IP_NF_ASSERT(e);
@@ -394,9 +336,6 @@ ipt_do_table(struct sk_buff **pskb,
394 } 336 }
395 } while (!hotdrop); 337 } while (!hotdrop);
396 338
397#ifdef CONFIG_NETFILTER_DEBUG
398 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399#endif
400 read_unlock_bh(&table->lock); 339 read_unlock_bh(&table->lock);
401 340
402#ifdef DEBUG_ALLOW_ALL 341#ifdef DEBUG_ALLOW_ALL
@@ -408,145 +347,6 @@ ipt_do_table(struct sk_buff **pskb,
408#endif 347#endif
409} 348}
410 349
411/*
412 * These are weird, but module loading must not be done with mutex
413 * held (since they will register), and we have to have a single
414 * function to use try_then_request_module().
415 */
416
417/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
418static inline struct ipt_table *find_table_lock(const char *name)
419{
420 struct ipt_table *t;
421
422 if (down_interruptible(&ipt_mutex) != 0)
423 return ERR_PTR(-EINTR);
424
425 list_for_each_entry(t, &ipt_tables, list)
426 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
427 return t;
428 up(&ipt_mutex);
429 return NULL;
430}
431
432/* Find match, grabs ref. Returns ERR_PTR() on error. */
433static inline struct ipt_match *find_match(const char *name, u8 revision)
434{
435 struct ipt_match *m;
436 int err = 0;
437
438 if (down_interruptible(&ipt_mutex) != 0)
439 return ERR_PTR(-EINTR);
440
441 list_for_each_entry(m, &ipt_match, list) {
442 if (strcmp(m->name, name) == 0) {
443 if (m->revision == revision) {
444 if (try_module_get(m->me)) {
445 up(&ipt_mutex);
446 return m;
447 }
448 } else
449 err = -EPROTOTYPE; /* Found something. */
450 }
451 }
452 up(&ipt_mutex);
453 return ERR_PTR(err);
454}
455
456/* Find target, grabs ref. Returns ERR_PTR() on error. */
457static inline struct ipt_target *find_target(const char *name, u8 revision)
458{
459 struct ipt_target *t;
460 int err = 0;
461
462 if (down_interruptible(&ipt_mutex) != 0)
463 return ERR_PTR(-EINTR);
464
465 list_for_each_entry(t, &ipt_target, list) {
466 if (strcmp(t->name, name) == 0) {
467 if (t->revision == revision) {
468 if (try_module_get(t->me)) {
469 up(&ipt_mutex);
470 return t;
471 }
472 } else
473 err = -EPROTOTYPE; /* Found something. */
474 }
475 }
476 up(&ipt_mutex);
477 return ERR_PTR(err);
478}
479
480struct ipt_target *ipt_find_target(const char *name, u8 revision)
481{
482 struct ipt_target *target;
483
484 target = try_then_request_module(find_target(name, revision),
485 "ipt_%s", name);
486 if (IS_ERR(target) || !target)
487 return NULL;
488 return target;
489}
490
491static int match_revfn(const char *name, u8 revision, int *bestp)
492{
493 struct ipt_match *m;
494 int have_rev = 0;
495
496 list_for_each_entry(m, &ipt_match, list) {
497 if (strcmp(m->name, name) == 0) {
498 if (m->revision > *bestp)
499 *bestp = m->revision;
500 if (m->revision == revision)
501 have_rev = 1;
502 }
503 }
504 return have_rev;
505}
506
507static int target_revfn(const char *name, u8 revision, int *bestp)
508{
509 struct ipt_target *t;
510 int have_rev = 0;
511
512 list_for_each_entry(t, &ipt_target, list) {
513 if (strcmp(t->name, name) == 0) {
514 if (t->revision > *bestp)
515 *bestp = t->revision;
516 if (t->revision == revision)
517 have_rev = 1;
518 }
519 }
520 return have_rev;
521}
522
523/* Returns true or false (if no such extension at all) */
524static inline int find_revision(const char *name, u8 revision,
525 int (*revfn)(const char *, u8, int *),
526 int *err)
527{
528 int have_rev, best = -1;
529
530 if (down_interruptible(&ipt_mutex) != 0) {
531 *err = -EINTR;
532 return 1;
533 }
534 have_rev = revfn(name, revision, &best);
535 up(&ipt_mutex);
536
537 /* Nothing at all? Return 0 to try loading module. */
538 if (best == -1) {
539 *err = -ENOENT;
540 return 0;
541 }
542
543 *err = best;
544 if (!have_rev)
545 *err = -EPROTONOSUPPORT;
546 return 1;
547}
548
549
550/* All zeroes == unconditional rule. */ 350/* All zeroes == unconditional rule. */
551static inline int 351static inline int
552unconditional(const struct ipt_ip *ip) 352unconditional(const struct ipt_ip *ip)
@@ -563,7 +363,8 @@ unconditional(const struct ipt_ip *ip)
563/* Figures out from what hook each rule can be called: returns 0 if 363/* Figures out from what hook each rule can be called: returns 0 if
564 there are loops. Puts hook bitmask in comefrom. */ 364 there are loops. Puts hook bitmask in comefrom. */
565static int 365static int
566mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) 366mark_source_chains(struct xt_table_info *newinfo,
367 unsigned int valid_hooks, void *entry0)
567{ 368{
568 unsigned int hook; 369 unsigned int hook;
569 370
@@ -572,7 +373,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
572 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { 373 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573 unsigned int pos = newinfo->hook_entry[hook]; 374 unsigned int pos = newinfo->hook_entry[hook];
574 struct ipt_entry *e 375 struct ipt_entry *e
575 = (struct ipt_entry *)(newinfo->entries + pos); 376 = (struct ipt_entry *)(entry0 + pos);
576 377
577 if (!(valid_hooks & (1 << hook))) 378 if (!(valid_hooks & (1 << hook)))
578 continue; 379 continue;
@@ -622,13 +423,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
622 goto next; 423 goto next;
623 424
624 e = (struct ipt_entry *) 425 e = (struct ipt_entry *)
625 (newinfo->entries + pos); 426 (entry0 + pos);
626 } while (oldpos == pos + e->next_offset); 427 } while (oldpos == pos + e->next_offset);
627 428
628 /* Move along one */ 429 /* Move along one */
629 size = e->next_offset; 430 size = e->next_offset;
630 e = (struct ipt_entry *) 431 e = (struct ipt_entry *)
631 (newinfo->entries + pos + size); 432 (entry0 + pos + size);
632 e->counters.pcnt = pos; 433 e->counters.pcnt = pos;
633 pos += size; 434 pos += size;
634 } else { 435 } else {
@@ -645,7 +446,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
645 newpos = pos + e->next_offset; 446 newpos = pos + e->next_offset;
646 } 447 }
647 e = (struct ipt_entry *) 448 e = (struct ipt_entry *)
648 (newinfo->entries + newpos); 449 (entry0 + newpos);
649 e->counters.pcnt = pos; 450 e->counters.pcnt = pos;
650 pos = newpos; 451 pos = newpos;
651 } 452 }
@@ -708,7 +509,7 @@ check_match(struct ipt_entry_match *m,
708{ 509{
709 struct ipt_match *match; 510 struct ipt_match *match;
710 511
711 match = try_then_request_module(find_match(m->u.user.name, 512 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
712 m->u.user.revision), 513 m->u.user.revision),
713 "ipt_%s", m->u.user.name); 514 "ipt_%s", m->u.user.name);
714 if (IS_ERR(match) || !match) { 515 if (IS_ERR(match) || !match) {
@@ -753,7 +554,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
753 goto cleanup_matches; 554 goto cleanup_matches;
754 555
755 t = ipt_get_target(e); 556 t = ipt_get_target(e);
756 target = try_then_request_module(find_target(t->u.user.name, 557 target = try_then_request_module(xt_find_target(AF_INET,
558 t->u.user.name,
757 t->u.user.revision), 559 t->u.user.revision),
758 "ipt_%s", t->u.user.name); 560 "ipt_%s", t->u.user.name);
759 if (IS_ERR(target) || !target) { 561 if (IS_ERR(target) || !target) {
@@ -790,7 +592,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
790 592
791static inline int 593static inline int
792check_entry_size_and_hooks(struct ipt_entry *e, 594check_entry_size_and_hooks(struct ipt_entry *e,
793 struct ipt_table_info *newinfo, 595 struct xt_table_info *newinfo,
794 unsigned char *base, 596 unsigned char *base,
795 unsigned char *limit, 597 unsigned char *limit,
796 const unsigned int *hook_entries, 598 const unsigned int *hook_entries,
@@ -824,7 +626,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
824 < 0 (not IPT_RETURN). --RR */ 626 < 0 (not IPT_RETURN). --RR */
825 627
826 /* Clear counters and comefrom */ 628 /* Clear counters and comefrom */
827 e->counters = ((struct ipt_counters) { 0, 0 }); 629 e->counters = ((struct xt_counters) { 0, 0 });
828 e->comefrom = 0; 630 e->comefrom = 0;
829 631
830 (*i)++; 632 (*i)++;
@@ -854,7 +656,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
854static int 656static int
855translate_table(const char *name, 657translate_table(const char *name,
856 unsigned int valid_hooks, 658 unsigned int valid_hooks,
857 struct ipt_table_info *newinfo, 659 struct xt_table_info *newinfo,
660 void *entry0,
858 unsigned int size, 661 unsigned int size,
859 unsigned int number, 662 unsigned int number,
860 const unsigned int *hook_entries, 663 const unsigned int *hook_entries,
@@ -875,11 +678,11 @@ translate_table(const char *name,
875 duprintf("translate_table: size %u\n", newinfo->size); 678 duprintf("translate_table: size %u\n", newinfo->size);
876 i = 0; 679 i = 0;
877 /* Walk through entries, checking offsets. */ 680 /* Walk through entries, checking offsets. */
878 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 681 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
879 check_entry_size_and_hooks, 682 check_entry_size_and_hooks,
880 newinfo, 683 newinfo,
881 newinfo->entries, 684 entry0,
882 newinfo->entries + size, 685 entry0 + size,
883 hook_entries, underflows, &i); 686 hook_entries, underflows, &i);
884 if (ret != 0) 687 if (ret != 0)
885 return ret; 688 return ret;
@@ -907,95 +710,79 @@ translate_table(const char *name,
907 } 710 }
908 } 711 }
909 712
910 if (!mark_source_chains(newinfo, valid_hooks)) 713 if (!mark_source_chains(newinfo, valid_hooks, entry0))
911 return -ELOOP; 714 return -ELOOP;
912 715
913 /* Finally, each sanity check must pass */ 716 /* Finally, each sanity check must pass */
914 i = 0; 717 i = 0;
915 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 718 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
916 check_entry, name, size, &i); 719 check_entry, name, size, &i);
917 720
918 if (ret != 0) { 721 if (ret != 0) {
919 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 722 IPT_ENTRY_ITERATE(entry0, newinfo->size,
920 cleanup_entry, &i); 723 cleanup_entry, &i);
921 return ret; 724 return ret;
922 } 725 }
923 726
924 /* And one copy for every other CPU */ 727 /* And one copy for every other CPU */
925 for_each_cpu(i) { 728 for_each_cpu(i) {
926 if (i == 0) 729 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
927 continue; 730 memcpy(newinfo->entries[i], entry0, newinfo->size);
928 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
929 newinfo->entries,
930 SMP_ALIGN(newinfo->size));
931 } 731 }
932 732
933 return ret; 733 return ret;
934} 734}
935 735
936static struct ipt_table_info * 736/* Gets counters. */
937replace_table(struct ipt_table *table, 737static inline int
938 unsigned int num_counters, 738add_entry_to_counter(const struct ipt_entry *e,
939 struct ipt_table_info *newinfo, 739 struct xt_counters total[],
940 int *error) 740 unsigned int *i)
941{ 741{
942 struct ipt_table_info *oldinfo; 742 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
943
944#ifdef CONFIG_NETFILTER_DEBUG
945 {
946 struct ipt_entry *table_base;
947 unsigned int i;
948
949 for_each_cpu(i) {
950 table_base =
951 (void *)newinfo->entries
952 + TABLE_OFFSET(newinfo, i);
953
954 table_base->comefrom = 0xdead57ac;
955 }
956 }
957#endif
958
959 /* Do the substitution. */
960 write_lock_bh(&table->lock);
961 /* Check inside lock: is the old number correct? */
962 if (num_counters != table->private->number) {
963 duprintf("num_counters != table->private->number (%u/%u)\n",
964 num_counters, table->private->number);
965 write_unlock_bh(&table->lock);
966 *error = -EAGAIN;
967 return NULL;
968 }
969 oldinfo = table->private;
970 table->private = newinfo;
971 newinfo->initial_entries = oldinfo->initial_entries;
972 write_unlock_bh(&table->lock);
973 743
974 return oldinfo; 744 (*i)++;
745 return 0;
975} 746}
976 747
977/* Gets counters. */
978static inline int 748static inline int
979add_entry_to_counter(const struct ipt_entry *e, 749set_entry_to_counter(const struct ipt_entry *e,
980 struct ipt_counters total[], 750 struct ipt_counters total[],
981 unsigned int *i) 751 unsigned int *i)
982{ 752{
983 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); 753 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
984 754
985 (*i)++; 755 (*i)++;
986 return 0; 756 return 0;
987} 757}
988 758
989static void 759static void
990get_counters(const struct ipt_table_info *t, 760get_counters(const struct xt_table_info *t,
991 struct ipt_counters counters[]) 761 struct xt_counters counters[])
992{ 762{
993 unsigned int cpu; 763 unsigned int cpu;
994 unsigned int i; 764 unsigned int i;
765 unsigned int curcpu;
766
767 /* Instead of clearing (by a previous call to memset())
768 * the counters and using adds, we set the counters
769 * with data used by 'current' CPU
770 * We dont care about preemption here.
771 */
772 curcpu = raw_smp_processor_id();
773
774 i = 0;
775 IPT_ENTRY_ITERATE(t->entries[curcpu],
776 t->size,
777 set_entry_to_counter,
778 counters,
779 &i);
995 780
996 for_each_cpu(cpu) { 781 for_each_cpu(cpu) {
782 if (cpu == curcpu)
783 continue;
997 i = 0; 784 i = 0;
998 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 785 IPT_ENTRY_ITERATE(t->entries[cpu],
999 t->size, 786 t->size,
1000 add_entry_to_counter, 787 add_entry_to_counter,
1001 counters, 788 counters,
@@ -1010,26 +797,32 @@ copy_entries_to_user(unsigned int total_size,
1010{ 797{
1011 unsigned int off, num, countersize; 798 unsigned int off, num, countersize;
1012 struct ipt_entry *e; 799 struct ipt_entry *e;
1013 struct ipt_counters *counters; 800 struct xt_counters *counters;
801 struct xt_table_info *private = table->private;
1014 int ret = 0; 802 int ret = 0;
803 void *loc_cpu_entry;
1015 804
1016 /* We need atomic snapshot of counters: rest doesn't change 805 /* We need atomic snapshot of counters: rest doesn't change
1017 (other than comefrom, which userspace doesn't care 806 (other than comefrom, which userspace doesn't care
1018 about). */ 807 about). */
1019 countersize = sizeof(struct ipt_counters) * table->private->number; 808 countersize = sizeof(struct xt_counters) * private->number;
1020 counters = vmalloc(countersize); 809 counters = vmalloc_node(countersize, numa_node_id());
1021 810
1022 if (counters == NULL) 811 if (counters == NULL)
1023 return -ENOMEM; 812 return -ENOMEM;
1024 813
1025 /* First, sum counters... */ 814 /* First, sum counters... */
1026 memset(counters, 0, countersize);
1027 write_lock_bh(&table->lock); 815 write_lock_bh(&table->lock);
1028 get_counters(table->private, counters); 816 get_counters(private, counters);
1029 write_unlock_bh(&table->lock); 817 write_unlock_bh(&table->lock);
1030 818
1031 /* ... then copy entire thing from CPU 0... */ 819 /* choose the copy that is on our node/cpu, ...
1032 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 820 * This choice is lazy (because current thread is
821 * allowed to migrate to another cpu)
822 */
823 loc_cpu_entry = private->entries[raw_smp_processor_id()];
824 /* ... then copy entire thing ... */
825 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1033 ret = -EFAULT; 826 ret = -EFAULT;
1034 goto free_counters; 827 goto free_counters;
1035 } 828 }
@@ -1041,7 +834,7 @@ copy_entries_to_user(unsigned int total_size,
1041 struct ipt_entry_match *m; 834 struct ipt_entry_match *m;
1042 struct ipt_entry_target *t; 835 struct ipt_entry_target *t;
1043 836
1044 e = (struct ipt_entry *)(table->private->entries + off); 837 e = (struct ipt_entry *)(loc_cpu_entry + off);
1045 if (copy_to_user(userptr + off 838 if (copy_to_user(userptr + off
1046 + offsetof(struct ipt_entry, counters), 839 + offsetof(struct ipt_entry, counters),
1047 &counters[num], 840 &counters[num],
@@ -1089,21 +882,22 @@ get_entries(const struct ipt_get_entries *entries,
1089 int ret; 882 int ret;
1090 struct ipt_table *t; 883 struct ipt_table *t;
1091 884
1092 t = find_table_lock(entries->name); 885 t = xt_find_table_lock(AF_INET, entries->name);
1093 if (t && !IS_ERR(t)) { 886 if (t && !IS_ERR(t)) {
887 struct xt_table_info *private = t->private;
1094 duprintf("t->private->number = %u\n", 888 duprintf("t->private->number = %u\n",
1095 t->private->number); 889 private->number);
1096 if (entries->size == t->private->size) 890 if (entries->size == private->size)
1097 ret = copy_entries_to_user(t->private->size, 891 ret = copy_entries_to_user(private->size,
1098 t, uptr->entrytable); 892 t, uptr->entrytable);
1099 else { 893 else {
1100 duprintf("get_entries: I've got %u not %u!\n", 894 duprintf("get_entries: I've got %u not %u!\n",
1101 t->private->size, 895 private->size,
1102 entries->size); 896 entries->size);
1103 ret = -EINVAL; 897 ret = -EINVAL;
1104 } 898 }
1105 module_put(t->me); 899 module_put(t->me);
1106 up(&ipt_mutex); 900 xt_table_unlock(t);
1107 } else 901 } else
1108 ret = t ? PTR_ERR(t) : -ENOENT; 902 ret = t ? PTR_ERR(t) : -ENOENT;
1109 903
@@ -1116,8 +910,9 @@ do_replace(void __user *user, unsigned int len)
1116 int ret; 910 int ret;
1117 struct ipt_replace tmp; 911 struct ipt_replace tmp;
1118 struct ipt_table *t; 912 struct ipt_table *t;
1119 struct ipt_table_info *newinfo, *oldinfo; 913 struct xt_table_info *newinfo, *oldinfo;
1120 struct ipt_counters *counters; 914 struct xt_counters *counters;
915 void *loc_cpu_entry, *loc_cpu_old_entry;
1121 916
1122 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 917 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1123 return -EFAULT; 918 return -EFAULT;
@@ -1126,38 +921,33 @@ do_replace(void __user *user, unsigned int len)
1126 if (len != sizeof(tmp) + tmp.size) 921 if (len != sizeof(tmp) + tmp.size)
1127 return -ENOPROTOOPT; 922 return -ENOPROTOOPT;
1128 923
1129 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 924 newinfo = xt_alloc_table_info(tmp.size);
1130 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1131 return -ENOMEM;
1132
1133 newinfo = vmalloc(sizeof(struct ipt_table_info)
1134 + SMP_ALIGN(tmp.size) *
1135 (highest_possible_processor_id()+1));
1136 if (!newinfo) 925 if (!newinfo)
1137 return -ENOMEM; 926 return -ENOMEM;
1138 927
1139 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 928 /* choose the copy that is our node/cpu */
929 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
930 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1140 tmp.size) != 0) { 931 tmp.size) != 0) {
1141 ret = -EFAULT; 932 ret = -EFAULT;
1142 goto free_newinfo; 933 goto free_newinfo;
1143 } 934 }
1144 935
1145 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters)); 936 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
1146 if (!counters) { 937 if (!counters) {
1147 ret = -ENOMEM; 938 ret = -ENOMEM;
1148 goto free_newinfo; 939 goto free_newinfo;
1149 } 940 }
1150 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1151 941
1152 ret = translate_table(tmp.name, tmp.valid_hooks, 942 ret = translate_table(tmp.name, tmp.valid_hooks,
1153 newinfo, tmp.size, tmp.num_entries, 943 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1154 tmp.hook_entry, tmp.underflow); 944 tmp.hook_entry, tmp.underflow);
1155 if (ret != 0) 945 if (ret != 0)
1156 goto free_newinfo_counters; 946 goto free_newinfo_counters;
1157 947
1158 duprintf("ip_tables: Translated table\n"); 948 duprintf("ip_tables: Translated table\n");
1159 949
1160 t = try_then_request_module(find_table_lock(tmp.name), 950 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
1161 "iptable_%s", tmp.name); 951 "iptable_%s", tmp.name);
1162 if (!t || IS_ERR(t)) { 952 if (!t || IS_ERR(t)) {
1163 ret = t ? PTR_ERR(t) : -ENOENT; 953 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1172,7 +962,7 @@ do_replace(void __user *user, unsigned int len)
1172 goto put_module; 962 goto put_module;
1173 } 963 }
1174 964
1175 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); 965 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
1176 if (!oldinfo) 966 if (!oldinfo)
1177 goto put_module; 967 goto put_module;
1178 968
@@ -1189,24 +979,25 @@ do_replace(void __user *user, unsigned int len)
1189 /* Get the old counters. */ 979 /* Get the old counters. */
1190 get_counters(oldinfo, counters); 980 get_counters(oldinfo, counters);
1191 /* Decrease module usage counts and free resource */ 981 /* Decrease module usage counts and free resource */
1192 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 982 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1193 vfree(oldinfo); 983 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
984 xt_free_table_info(oldinfo);
1194 if (copy_to_user(tmp.counters, counters, 985 if (copy_to_user(tmp.counters, counters,
1195 sizeof(struct ipt_counters) * tmp.num_counters) != 0) 986 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1196 ret = -EFAULT; 987 ret = -EFAULT;
1197 vfree(counters); 988 vfree(counters);
1198 up(&ipt_mutex); 989 xt_table_unlock(t);
1199 return ret; 990 return ret;
1200 991
1201 put_module: 992 put_module:
1202 module_put(t->me); 993 module_put(t->me);
1203 up(&ipt_mutex); 994 xt_table_unlock(t);
1204 free_newinfo_counters_untrans: 995 free_newinfo_counters_untrans:
1205 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 996 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1206 free_newinfo_counters: 997 free_newinfo_counters:
1207 vfree(counters); 998 vfree(counters);
1208 free_newinfo: 999 free_newinfo:
1209 vfree(newinfo); 1000 xt_free_table_info(newinfo);
1210 return ret; 1001 return ret;
1211} 1002}
1212 1003
@@ -1214,7 +1005,7 @@ do_replace(void __user *user, unsigned int len)
1214 * and everything is OK. */ 1005 * and everything is OK. */
1215static inline int 1006static inline int
1216add_counter_to_entry(struct ipt_entry *e, 1007add_counter_to_entry(struct ipt_entry *e,
1217 const struct ipt_counters addme[], 1008 const struct xt_counters addme[],
1218 unsigned int *i) 1009 unsigned int *i)
1219{ 1010{
1220#if 0 1011#if 0
@@ -1236,17 +1027,19 @@ static int
1236do_add_counters(void __user *user, unsigned int len) 1027do_add_counters(void __user *user, unsigned int len)
1237{ 1028{
1238 unsigned int i; 1029 unsigned int i;
1239 struct ipt_counters_info tmp, *paddc; 1030 struct xt_counters_info tmp, *paddc;
1240 struct ipt_table *t; 1031 struct ipt_table *t;
1032 struct xt_table_info *private;
1241 int ret = 0; 1033 int ret = 0;
1034 void *loc_cpu_entry;
1242 1035
1243 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1036 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1244 return -EFAULT; 1037 return -EFAULT;
1245 1038
1246 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) 1039 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1247 return -EINVAL; 1040 return -EINVAL;
1248 1041
1249 paddc = vmalloc(len); 1042 paddc = vmalloc_node(len, numa_node_id());
1250 if (!paddc) 1043 if (!paddc)
1251 return -ENOMEM; 1044 return -ENOMEM;
1252 1045
@@ -1255,27 +1048,30 @@ do_add_counters(void __user *user, unsigned int len)
1255 goto free; 1048 goto free;
1256 } 1049 }
1257 1050
1258 t = find_table_lock(tmp.name); 1051 t = xt_find_table_lock(AF_INET, tmp.name);
1259 if (!t || IS_ERR(t)) { 1052 if (!t || IS_ERR(t)) {
1260 ret = t ? PTR_ERR(t) : -ENOENT; 1053 ret = t ? PTR_ERR(t) : -ENOENT;
1261 goto free; 1054 goto free;
1262 } 1055 }
1263 1056
1264 write_lock_bh(&t->lock); 1057 write_lock_bh(&t->lock);
1265 if (t->private->number != paddc->num_counters) { 1058 private = t->private;
1059 if (private->number != paddc->num_counters) {
1266 ret = -EINVAL; 1060 ret = -EINVAL;
1267 goto unlock_up_free; 1061 goto unlock_up_free;
1268 } 1062 }
1269 1063
1270 i = 0; 1064 i = 0;
1271 IPT_ENTRY_ITERATE(t->private->entries, 1065 /* Choose the copy that is on our node */
1272 t->private->size, 1066 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1067 IPT_ENTRY_ITERATE(loc_cpu_entry,
1068 private->size,
1273 add_counter_to_entry, 1069 add_counter_to_entry,
1274 paddc->counters, 1070 paddc->counters,
1275 &i); 1071 &i);
1276 unlock_up_free: 1072 unlock_up_free:
1277 write_unlock_bh(&t->lock); 1073 write_unlock_bh(&t->lock);
1278 up(&ipt_mutex); 1074 xt_table_unlock(t);
1279 module_put(t->me); 1075 module_put(t->me);
1280 free: 1076 free:
1281 vfree(paddc); 1077 vfree(paddc);
@@ -1334,25 +1130,26 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1334 } 1130 }
1335 name[IPT_TABLE_MAXNAMELEN-1] = '\0'; 1131 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1336 1132
1337 t = try_then_request_module(find_table_lock(name), 1133 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1338 "iptable_%s", name); 1134 "iptable_%s", name);
1339 if (t && !IS_ERR(t)) { 1135 if (t && !IS_ERR(t)) {
1340 struct ipt_getinfo info; 1136 struct ipt_getinfo info;
1137 struct xt_table_info *private = t->private;
1341 1138
1342 info.valid_hooks = t->valid_hooks; 1139 info.valid_hooks = t->valid_hooks;
1343 memcpy(info.hook_entry, t->private->hook_entry, 1140 memcpy(info.hook_entry, private->hook_entry,
1344 sizeof(info.hook_entry)); 1141 sizeof(info.hook_entry));
1345 memcpy(info.underflow, t->private->underflow, 1142 memcpy(info.underflow, private->underflow,
1346 sizeof(info.underflow)); 1143 sizeof(info.underflow));
1347 info.num_entries = t->private->number; 1144 info.num_entries = private->number;
1348 info.size = t->private->size; 1145 info.size = private->size;
1349 memcpy(info.name, name, sizeof(info.name)); 1146 memcpy(info.name, name, sizeof(info.name));
1350 1147
1351 if (copy_to_user(user, &info, *len) != 0) 1148 if (copy_to_user(user, &info, *len) != 0)
1352 ret = -EFAULT; 1149 ret = -EFAULT;
1353 else 1150 else
1354 ret = 0; 1151 ret = 0;
1355 up(&ipt_mutex); 1152 xt_table_unlock(t);
1356 module_put(t->me); 1153 module_put(t->me);
1357 } else 1154 } else
1358 ret = t ? PTR_ERR(t) : -ENOENT; 1155 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1379,7 +1176,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1379 case IPT_SO_GET_REVISION_MATCH: 1176 case IPT_SO_GET_REVISION_MATCH:
1380 case IPT_SO_GET_REVISION_TARGET: { 1177 case IPT_SO_GET_REVISION_TARGET: {
1381 struct ipt_get_revision rev; 1178 struct ipt_get_revision rev;
1382 int (*revfn)(const char *, u8, int *); 1179 int target;
1383 1180
1384 if (*len != sizeof(rev)) { 1181 if (*len != sizeof(rev)) {
1385 ret = -EINVAL; 1182 ret = -EINVAL;
@@ -1391,12 +1188,13 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1391 } 1188 }
1392 1189
1393 if (cmd == IPT_SO_GET_REVISION_TARGET) 1190 if (cmd == IPT_SO_GET_REVISION_TARGET)
1394 revfn = target_revfn; 1191 target = 1;
1395 else 1192 else
1396 revfn = match_revfn; 1193 target = 0;
1397 1194
1398 try_then_request_module(find_revision(rev.name, rev.revision, 1195 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1399 revfn, &ret), 1196 rev.revision,
1197 target, &ret),
1400 "ipt_%s", rev.name); 1198 "ipt_%s", rev.name);
1401 break; 1199 break;
1402 } 1200 }
@@ -1409,309 +1207,53 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1409 return ret; 1207 return ret;
1410} 1208}
1411 1209
1412/* Registration hooks for targets. */ 1210int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1413int
1414ipt_register_target(struct ipt_target *target)
1415{ 1211{
1416 int ret; 1212 int ret;
1417 1213 struct xt_table_info *newinfo;
1418 ret = down_interruptible(&ipt_mutex); 1214 static struct xt_table_info bootstrap
1419 if (ret != 0)
1420 return ret;
1421 list_add(&target->list, &ipt_target);
1422 up(&ipt_mutex);
1423 return ret;
1424}
1425
1426void
1427ipt_unregister_target(struct ipt_target *target)
1428{
1429 down(&ipt_mutex);
1430 LIST_DELETE(&ipt_target, target);
1431 up(&ipt_mutex);
1432}
1433
1434int
1435ipt_register_match(struct ipt_match *match)
1436{
1437 int ret;
1438
1439 ret = down_interruptible(&ipt_mutex);
1440 if (ret != 0)
1441 return ret;
1442
1443 list_add(&match->list, &ipt_match);
1444 up(&ipt_mutex);
1445
1446 return ret;
1447}
1448
1449void
1450ipt_unregister_match(struct ipt_match *match)
1451{
1452 down(&ipt_mutex);
1453 LIST_DELETE(&ipt_match, match);
1454 up(&ipt_mutex);
1455}
1456
1457int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1458{
1459 int ret;
1460 struct ipt_table_info *newinfo;
1461 static struct ipt_table_info bootstrap
1462 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1215 = { 0, 0, 0, { 0 }, { 0 }, { } };
1216 void *loc_cpu_entry;
1463 1217
1464 newinfo = vmalloc(sizeof(struct ipt_table_info) 1218 newinfo = xt_alloc_table_info(repl->size);
1465 + SMP_ALIGN(repl->size) *
1466 (highest_possible_processor_id()+1));
1467 if (!newinfo) 1219 if (!newinfo)
1468 return -ENOMEM; 1220 return -ENOMEM;
1469 1221
1470 memcpy(newinfo->entries, repl->entries, repl->size); 1222 /* choose the copy on our node/cpu
1223 * but dont care of preemption
1224 */
1225 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1226 memcpy(loc_cpu_entry, repl->entries, repl->size);
1471 1227
1472 ret = translate_table(table->name, table->valid_hooks, 1228 ret = translate_table(table->name, table->valid_hooks,
1473 newinfo, repl->size, 1229 newinfo, loc_cpu_entry, repl->size,
1474 repl->num_entries, 1230 repl->num_entries,
1475 repl->hook_entry, 1231 repl->hook_entry,
1476 repl->underflow); 1232 repl->underflow);
1477 if (ret != 0) { 1233 if (ret != 0) {
1478 vfree(newinfo); 1234 xt_free_table_info(newinfo);
1479 return ret; 1235 return ret;
1480 } 1236 }
1481 1237
1482 ret = down_interruptible(&ipt_mutex); 1238 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1483 if (ret != 0) { 1239 xt_free_table_info(newinfo);
1484 vfree(newinfo);
1485 return ret; 1240 return ret;
1486 } 1241 }
1487 1242
1488 /* Don't autoload: we'd eat our tail... */ 1243 return 0;
1489 if (list_named_find(&ipt_tables, table->name)) {
1490 ret = -EEXIST;
1491 goto free_unlock;
1492 }
1493
1494 /* Simplifies replace_table code. */
1495 table->private = &bootstrap;
1496 if (!replace_table(table, 0, newinfo, &ret))
1497 goto free_unlock;
1498
1499 duprintf("table->private->number = %u\n",
1500 table->private->number);
1501
1502 /* save number of initial entries */
1503 table->private->initial_entries = table->private->number;
1504
1505 rwlock_init(&table->lock);
1506 list_prepend(&ipt_tables, table);
1507
1508 unlock:
1509 up(&ipt_mutex);
1510 return ret;
1511
1512 free_unlock:
1513 vfree(newinfo);
1514 goto unlock;
1515} 1244}
1516 1245
1517void ipt_unregister_table(struct ipt_table *table) 1246void ipt_unregister_table(struct ipt_table *table)
1518{ 1247{
1519 down(&ipt_mutex); 1248 struct xt_table_info *private;
1520 LIST_DELETE(&ipt_tables, table); 1249 void *loc_cpu_entry;
1521 up(&ipt_mutex);
1522 1250
1523 /* Decrease module usage counts and free resources */ 1251 private = xt_unregister_table(table);
1524 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1525 cleanup_entry, NULL);
1526 vfree(table->private);
1527}
1528
1529/* Returns 1 if the port is matched by the range, 0 otherwise */
1530static inline int
1531port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1532{
1533 int ret;
1534
1535 ret = (port >= min && port <= max) ^ invert;
1536 return ret;
1537}
1538
1539static int
1540tcp_find_option(u_int8_t option,
1541 const struct sk_buff *skb,
1542 unsigned int optlen,
1543 int invert,
1544 int *hotdrop)
1545{
1546 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1547 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1548 unsigned int i;
1549
1550 duprintf("tcp_match: finding option\n");
1551
1552 if (!optlen)
1553 return invert;
1554
1555 /* If we don't have the whole header, drop packet. */
1556 op = skb_header_pointer(skb,
1557 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1558 optlen, _opt);
1559 if (op == NULL) {
1560 *hotdrop = 1;
1561 return 0;
1562 }
1563
1564 for (i = 0; i < optlen; ) {
1565 if (op[i] == option) return !invert;
1566 if (op[i] < 2) i++;
1567 else i += op[i+1]?:1;
1568 }
1569
1570 return invert;
1571}
1572
1573static int
1574tcp_match(const struct sk_buff *skb,
1575 const struct net_device *in,
1576 const struct net_device *out,
1577 const void *matchinfo,
1578 int offset,
1579 int *hotdrop)
1580{
1581 struct tcphdr _tcph, *th;
1582 const struct ipt_tcp *tcpinfo = matchinfo;
1583
1584 if (offset) {
1585 /* To quote Alan:
1586
1587 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1588 causes this. Its a cracker trying to break in by doing a
1589 flag overwrite to pass the direction checks.
1590 */
1591 if (offset == 1) {
1592 duprintf("Dropping evil TCP offset=1 frag.\n");
1593 *hotdrop = 1;
1594 }
1595 /* Must not be a fragment. */
1596 return 0;
1597 }
1598
1599#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1600
1601 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1602 sizeof(_tcph), &_tcph);
1603 if (th == NULL) {
1604 /* We've been asked to examine this packet, and we
1605 can't. Hence, no choice but to drop. */
1606 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1607 *hotdrop = 1;
1608 return 0;
1609 }
1610
1611 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1612 ntohs(th->source),
1613 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1614 return 0;
1615 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1616 ntohs(th->dest),
1617 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1618 return 0;
1619 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1620 == tcpinfo->flg_cmp,
1621 IPT_TCP_INV_FLAGS))
1622 return 0;
1623 if (tcpinfo->option) {
1624 if (th->doff * 4 < sizeof(_tcph)) {
1625 *hotdrop = 1;
1626 return 0;
1627 }
1628 if (!tcp_find_option(tcpinfo->option, skb,
1629 th->doff*4 - sizeof(_tcph),
1630 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1631 hotdrop))
1632 return 0;
1633 }
1634 return 1;
1635}
1636
1637/* Called when user tries to insert an entry of this type. */
1638static int
1639tcp_checkentry(const char *tablename,
1640 const struct ipt_ip *ip,
1641 void *matchinfo,
1642 unsigned int matchsize,
1643 unsigned int hook_mask)
1644{
1645 const struct ipt_tcp *tcpinfo = matchinfo;
1646
1647 /* Must specify proto == TCP, and no unknown invflags */
1648 return ip->proto == IPPROTO_TCP
1649 && !(ip->invflags & IPT_INV_PROTO)
1650 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1651 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1652}
1653
1654static int
1655udp_match(const struct sk_buff *skb,
1656 const struct net_device *in,
1657 const struct net_device *out,
1658 const void *matchinfo,
1659 int offset,
1660 int *hotdrop)
1661{
1662 struct udphdr _udph, *uh;
1663 const struct ipt_udp *udpinfo = matchinfo;
1664
1665 /* Must not be a fragment. */
1666 if (offset)
1667 return 0;
1668
1669 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1670 sizeof(_udph), &_udph);
1671 if (uh == NULL) {
1672 /* We've been asked to examine this packet, and we
1673 can't. Hence, no choice but to drop. */
1674 duprintf("Dropping evil UDP tinygram.\n");
1675 *hotdrop = 1;
1676 return 0;
1677 }
1678 1252
1679 return port_match(udpinfo->spts[0], udpinfo->spts[1], 1253 /* Decrease module usage counts and free resources */
1680 ntohs(uh->source), 1254 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1681 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT)) 1255 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1682 && port_match(udpinfo->dpts[0], udpinfo->dpts[1], 1256 xt_free_table_info(private);
1683 ntohs(uh->dest),
1684 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1685}
1686
1687/* Called when user tries to insert an entry of this type. */
1688static int
1689udp_checkentry(const char *tablename,
1690 const struct ipt_ip *ip,
1691 void *matchinfo,
1692 unsigned int matchinfosize,
1693 unsigned int hook_mask)
1694{
1695 const struct ipt_udp *udpinfo = matchinfo;
1696
1697 /* Must specify proto == UDP, and no unknown invflags */
1698 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1699 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1700 IPPROTO_UDP);
1701 return 0;
1702 }
1703 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1704 duprintf("ipt_udp: matchsize %u != %u\n",
1705 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1706 return 0;
1707 }
1708 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1709 duprintf("ipt_udp: unknown flags %X\n",
1710 udpinfo->invflags);
1711 return 0;
1712 }
1713
1714 return 1;
1715} 1257}
1716 1258
1717/* Returns 1 if the type and code is matched by the range, 0 otherwise */ 1259/* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1730,6 +1272,7 @@ icmp_match(const struct sk_buff *skb,
1730 const struct net_device *out, 1272 const struct net_device *out,
1731 const void *matchinfo, 1273 const void *matchinfo,
1732 int offset, 1274 int offset,
1275 unsigned int protoff,
1733 int *hotdrop) 1276 int *hotdrop)
1734{ 1277{
1735 struct icmphdr _icmph, *ic; 1278 struct icmphdr _icmph, *ic;
@@ -1739,8 +1282,7 @@ icmp_match(const struct sk_buff *skb,
1739 if (offset) 1282 if (offset)
1740 return 0; 1283 return 0;
1741 1284
1742 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4, 1285 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1743 sizeof(_icmph), &_icmph);
1744 if (ic == NULL) { 1286 if (ic == NULL) {
1745 /* We've been asked to examine this packet, and we 1287 /* We've been asked to examine this packet, and we
1746 * can't. Hence, no choice but to drop. 1288 * can't. Hence, no choice but to drop.
@@ -1760,11 +1302,12 @@ icmp_match(const struct sk_buff *skb,
1760/* Called when user tries to insert an entry of this type. */ 1302/* Called when user tries to insert an entry of this type. */
1761static int 1303static int
1762icmp_checkentry(const char *tablename, 1304icmp_checkentry(const char *tablename,
1763 const struct ipt_ip *ip, 1305 const void *info,
1764 void *matchinfo, 1306 void *matchinfo,
1765 unsigned int matchsize, 1307 unsigned int matchsize,
1766 unsigned int hook_mask) 1308 unsigned int hook_mask)
1767{ 1309{
1310 const struct ipt_ip *ip = info;
1768 const struct ipt_icmp *icmpinfo = matchinfo; 1311 const struct ipt_icmp *icmpinfo = matchinfo;
1769 1312
1770 /* Must specify proto == ICMP, and no unknown invflags */ 1313 /* Must specify proto == ICMP, and no unknown invflags */
@@ -1794,123 +1337,22 @@ static struct nf_sockopt_ops ipt_sockopts = {
1794 .get = do_ipt_get_ctl, 1337 .get = do_ipt_get_ctl,
1795}; 1338};
1796 1339
1797static struct ipt_match tcp_matchstruct = {
1798 .name = "tcp",
1799 .match = &tcp_match,
1800 .checkentry = &tcp_checkentry,
1801};
1802
1803static struct ipt_match udp_matchstruct = {
1804 .name = "udp",
1805 .match = &udp_match,
1806 .checkentry = &udp_checkentry,
1807};
1808
1809static struct ipt_match icmp_matchstruct = { 1340static struct ipt_match icmp_matchstruct = {
1810 .name = "icmp", 1341 .name = "icmp",
1811 .match = &icmp_match, 1342 .match = &icmp_match,
1812 .checkentry = &icmp_checkentry, 1343 .checkentry = &icmp_checkentry,
1813}; 1344};
1814 1345
1815#ifdef CONFIG_PROC_FS
1816static inline int print_name(const char *i,
1817 off_t start_offset, char *buffer, int length,
1818 off_t *pos, unsigned int *count)
1819{
1820 if ((*count)++ >= start_offset) {
1821 unsigned int namelen;
1822
1823 namelen = sprintf(buffer + *pos, "%s\n",
1824 i + sizeof(struct list_head));
1825 if (*pos + namelen > length) {
1826 /* Stop iterating */
1827 return 1;
1828 }
1829 *pos += namelen;
1830 }
1831 return 0;
1832}
1833
1834static inline int print_target(const struct ipt_target *t,
1835 off_t start_offset, char *buffer, int length,
1836 off_t *pos, unsigned int *count)
1837{
1838 if (t == &ipt_standard_target || t == &ipt_error_target)
1839 return 0;
1840 return print_name((char *)t, start_offset, buffer, length, pos, count);
1841}
1842
1843static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1844{
1845 off_t pos = 0;
1846 unsigned int count = 0;
1847
1848 if (down_interruptible(&ipt_mutex) != 0)
1849 return 0;
1850
1851 LIST_FIND(&ipt_tables, print_name, void *,
1852 offset, buffer, length, &pos, &count);
1853
1854 up(&ipt_mutex);
1855
1856 /* `start' hack - see fs/proc/generic.c line ~105 */
1857 *start=(char *)((unsigned long)count-offset);
1858 return pos;
1859}
1860
1861static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1862{
1863 off_t pos = 0;
1864 unsigned int count = 0;
1865
1866 if (down_interruptible(&ipt_mutex) != 0)
1867 return 0;
1868
1869 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1870 offset, buffer, length, &pos, &count);
1871
1872 up(&ipt_mutex);
1873
1874 *start = (char *)((unsigned long)count - offset);
1875 return pos;
1876}
1877
1878static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1879{
1880 off_t pos = 0;
1881 unsigned int count = 0;
1882
1883 if (down_interruptible(&ipt_mutex) != 0)
1884 return 0;
1885
1886 LIST_FIND(&ipt_match, print_name, void *,
1887 offset, buffer, length, &pos, &count);
1888
1889 up(&ipt_mutex);
1890
1891 *start = (char *)((unsigned long)count - offset);
1892 return pos;
1893}
1894
1895static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1896{ { "ip_tables_names", ipt_get_tables },
1897 { "ip_tables_targets", ipt_get_targets },
1898 { "ip_tables_matches", ipt_get_matches },
1899 { NULL, NULL} };
1900#endif /*CONFIG_PROC_FS*/
1901
1902static int __init init(void) 1346static int __init init(void)
1903{ 1347{
1904 int ret; 1348 int ret;
1905 1349
1350 xt_proto_init(AF_INET);
1351
1906 /* Noone else will be downing sem now, so we won't sleep */ 1352 /* Noone else will be downing sem now, so we won't sleep */
1907 down(&ipt_mutex); 1353 xt_register_target(AF_INET, &ipt_standard_target);
1908 list_append(&ipt_target, &ipt_standard_target); 1354 xt_register_target(AF_INET, &ipt_error_target);
1909 list_append(&ipt_target, &ipt_error_target); 1355 xt_register_match(AF_INET, &icmp_matchstruct);
1910 list_append(&ipt_match, &tcp_matchstruct);
1911 list_append(&ipt_match, &udp_matchstruct);
1912 list_append(&ipt_match, &icmp_matchstruct);
1913 up(&ipt_mutex);
1914 1356
1915 /* Register setsockopt */ 1357 /* Register setsockopt */
1916 ret = nf_register_sockopt(&ipt_sockopts); 1358 ret = nf_register_sockopt(&ipt_sockopts);
@@ -1919,49 +1361,23 @@ static int __init init(void)
1919 return ret; 1361 return ret;
1920 } 1362 }
1921 1363
1922#ifdef CONFIG_PROC_FS 1364 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1923 {
1924 struct proc_dir_entry *proc;
1925 int i;
1926
1927 for (i = 0; ipt_proc_entry[i].name; i++) {
1928 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1929 ipt_proc_entry[i].get_info);
1930 if (!proc) {
1931 while (--i >= 0)
1932 proc_net_remove(ipt_proc_entry[i].name);
1933 nf_unregister_sockopt(&ipt_sockopts);
1934 return -ENOMEM;
1935 }
1936 proc->owner = THIS_MODULE;
1937 }
1938 }
1939#endif
1940
1941 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1942 return 0; 1365 return 0;
1943} 1366}
1944 1367
1945static void __exit fini(void) 1368static void __exit fini(void)
1946{ 1369{
1947 nf_unregister_sockopt(&ipt_sockopts); 1370 nf_unregister_sockopt(&ipt_sockopts);
1948#ifdef CONFIG_PROC_FS 1371
1949 { 1372 xt_unregister_match(AF_INET, &icmp_matchstruct);
1950 int i; 1373 xt_unregister_target(AF_INET, &ipt_error_target);
1951 for (i = 0; ipt_proc_entry[i].name; i++) 1374 xt_unregister_target(AF_INET, &ipt_standard_target);
1952 proc_net_remove(ipt_proc_entry[i].name); 1375
1953 } 1376 xt_proto_fini(AF_INET);
1954#endif
1955} 1377}
1956 1378
1957EXPORT_SYMBOL(ipt_register_table); 1379EXPORT_SYMBOL(ipt_register_table);
1958EXPORT_SYMBOL(ipt_unregister_table); 1380EXPORT_SYMBOL(ipt_unregister_table);
1959EXPORT_SYMBOL(ipt_register_match);
1960EXPORT_SYMBOL(ipt_unregister_match);
1961EXPORT_SYMBOL(ipt_do_table); 1381EXPORT_SYMBOL(ipt_do_table);
1962EXPORT_SYMBOL(ipt_register_target);
1963EXPORT_SYMBOL(ipt_unregister_target);
1964EXPORT_SYMBOL(ipt_find_target);
1965
1966module_init(init); 1382module_init(init);
1967module_exit(fini); 1383module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
deleted file mode 100644
index dab78d8bd494..000000000000
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ /dev/null
@@ -1,90 +0,0 @@
1/*
2 * This is a module which is used for setting the skb->priority field
3 * of an skb for qdisc classification.
4 */
5
6/* (C) 2001-2002 Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <net/checksum.h>
17
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_LICENSE("GPL");
23MODULE_DESCRIPTION("iptables qdisc classification target module");
24
25static unsigned int
26target(struct sk_buff **pskb,
27 const struct net_device *in,
28 const struct net_device *out,
29 unsigned int hooknum,
30 const void *targinfo,
31 void *userinfo)
32{
33 const struct ipt_classify_target_info *clinfo = targinfo;
34
35 if((*pskb)->priority != clinfo->priority)
36 (*pskb)->priority = clinfo->priority;
37
38 return IPT_CONTINUE;
39}
40
41static int
42checkentry(const char *tablename,
43 const struct ipt_entry *e,
44 void *targinfo,
45 unsigned int targinfosize,
46 unsigned int hook_mask)
47{
48 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
49 printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
50 targinfosize,
51 IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
52 return 0;
53 }
54
55 if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
56 (1 << NF_IP_POST_ROUTING))) {
57 printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
58 "and POST_ROUTING.\n");
59 return 0;
60 }
61
62 if (strcmp(tablename, "mangle") != 0) {
63 printk(KERN_ERR "CLASSIFY: can only be called from "
64 "\"mangle\" table, not \"%s\".\n",
65 tablename);
66 return 0;
67 }
68
69 return 1;
70}
71
72static struct ipt_target ipt_classify_reg = {
73 .name = "CLASSIFY",
74 .target = target,
75 .checkentry = checkentry,
76 .me = THIS_MODULE,
77};
78
79static int __init init(void)
80{
81 return ipt_register_target(&ipt_classify_reg);
82}
83
84static void __exit fini(void)
85{
86 ipt_unregister_target(&ipt_classify_reg);
87}
88
89module_init(init);
90module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45c52d8f4d99..d9bc971f03af 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -379,12 +379,13 @@ target(struct sk_buff **pskb,
379 379
380static int 380static int
381checkentry(const char *tablename, 381checkentry(const char *tablename,
382 const struct ipt_entry *e, 382 const void *e_void,
383 void *targinfo, 383 void *targinfo,
384 unsigned int targinfosize, 384 unsigned int targinfosize,
385 unsigned int hook_mask) 385 unsigned int hook_mask)
386{ 386{
387 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 387 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
388 const struct ipt_entry *e = e_void;
388 389
389 struct clusterip_config *config; 390 struct clusterip_config *config;
390 391
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
deleted file mode 100644
index 8acac5a40a92..000000000000
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/* This kernel module is used to modify the connection mark values, or
2 * to optionally restore the skb nfmark from the connection mark
3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21#include <linux/module.h>
22#include <linux/skbuff.h>
23#include <linux/ip.h>
24#include <net/checksum.h>
25
26MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
27MODULE_DESCRIPTION("IP tables CONNMARK matching module");
28MODULE_LICENSE("GPL");
29
30#include <linux/netfilter_ipv4/ip_tables.h>
31#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
32#include <net/netfilter/nf_conntrack_compat.h>
33
34static unsigned int
35target(struct sk_buff **pskb,
36 const struct net_device *in,
37 const struct net_device *out,
38 unsigned int hooknum,
39 const void *targinfo,
40 void *userinfo)
41{
42 const struct ipt_connmark_target_info *markinfo = targinfo;
43 u_int32_t diff;
44 u_int32_t nfmark;
45 u_int32_t newmark;
46 u_int32_t ctinfo;
47 u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
48
49 if (ctmark) {
50 switch(markinfo->mode) {
51 case IPT_CONNMARK_SET:
52 newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
53 if (newmark != *ctmark)
54 *ctmark = newmark;
55 break;
56 case IPT_CONNMARK_SAVE:
57 newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
58 if (*ctmark != newmark)
59 *ctmark = newmark;
60 break;
61 case IPT_CONNMARK_RESTORE:
62 nfmark = (*pskb)->nfmark;
63 diff = (*ctmark ^ nfmark) & markinfo->mask;
64 if (diff != 0)
65 (*pskb)->nfmark = nfmark ^ diff;
66 break;
67 }
68 }
69
70 return IPT_CONTINUE;
71}
72
73static int
74checkentry(const char *tablename,
75 const struct ipt_entry *e,
76 void *targinfo,
77 unsigned int targinfosize,
78 unsigned int hook_mask)
79{
80 struct ipt_connmark_target_info *matchinfo = targinfo;
81 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
82 printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
83 targinfosize,
84 IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
85 return 0;
86 }
87
88 if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
89 if (strcmp(tablename, "mangle") != 0) {
90 printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
91 return 0;
92 }
93 }
94
95 if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
96 printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
97 return 0;
98 }
99
100 return 1;
101}
102
103static struct ipt_target ipt_connmark_reg = {
104 .name = "CONNMARK",
105 .target = &target,
106 .checkentry = &checkentry,
107 .me = THIS_MODULE
108};
109
110static int __init init(void)
111{
112 need_ip_conntrack();
113 return ipt_register_target(&ipt_connmark_reg);
114}
115
116static void __exit fini(void)
117{
118 ipt_unregister_target(&ipt_connmark_reg);
119}
120
121module_init(init);
122module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 6e319570a28c..898cdf79ce18 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -57,7 +57,7 @@ target(struct sk_buff **pskb,
57 57
58static int 58static int
59checkentry(const char *tablename, 59checkentry(const char *tablename,
60 const struct ipt_entry *e, 60 const void *e_void,
61 void *targinfo, 61 void *targinfo,
62 unsigned int targinfosize, 62 unsigned int targinfosize,
63 unsigned int hook_mask) 63 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index a1319693f648..706445426a6d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -113,12 +113,13 @@ target(struct sk_buff **pskb,
113 113
114static int 114static int
115checkentry(const char *tablename, 115checkentry(const char *tablename,
116 const struct ipt_entry *e, 116 const void *e_void,
117 void *targinfo, 117 void *targinfo,
118 unsigned int targinfosize, 118 unsigned int targinfosize,
119 unsigned int hook_mask) 119 unsigned int hook_mask)
120{ 120{
121 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 121 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
122 const struct ipt_entry *e = e_void;
122 123
123 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) { 124 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
124 printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n", 125 printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 30be0f1dae37..6606ddb66a29 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -431,7 +431,7 @@ ipt_log_target(struct sk_buff **pskb,
431} 431}
432 432
433static int ipt_log_checkentry(const char *tablename, 433static int ipt_log_checkentry(const char *tablename,
434 const struct ipt_entry *e, 434 const void *e,
435 void *targinfo, 435 void *targinfo,
436 unsigned int targinfosize, 436 unsigned int targinfosize,
437 unsigned int hook_mask) 437 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
deleted file mode 100644
index 52b4f2c296bf..000000000000
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/* This is a module which is used for setting the NFMARK field of an skb. */
2
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <net/checksum.h>
14
15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/netfilter_ipv4/ipt_MARK.h>
17
18MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
20MODULE_DESCRIPTION("iptables MARK modification module");
21
22static unsigned int
23target_v0(struct sk_buff **pskb,
24 const struct net_device *in,
25 const struct net_device *out,
26 unsigned int hooknum,
27 const void *targinfo,
28 void *userinfo)
29{
30 const struct ipt_mark_target_info *markinfo = targinfo;
31
32 if((*pskb)->nfmark != markinfo->mark)
33 (*pskb)->nfmark = markinfo->mark;
34
35 return IPT_CONTINUE;
36}
37
38static unsigned int
39target_v1(struct sk_buff **pskb,
40 const struct net_device *in,
41 const struct net_device *out,
42 unsigned int hooknum,
43 const void *targinfo,
44 void *userinfo)
45{
46 const struct ipt_mark_target_info_v1 *markinfo = targinfo;
47 int mark = 0;
48
49 switch (markinfo->mode) {
50 case IPT_MARK_SET:
51 mark = markinfo->mark;
52 break;
53
54 case IPT_MARK_AND:
55 mark = (*pskb)->nfmark & markinfo->mark;
56 break;
57
58 case IPT_MARK_OR:
59 mark = (*pskb)->nfmark | markinfo->mark;
60 break;
61 }
62
63 if((*pskb)->nfmark != mark)
64 (*pskb)->nfmark = mark;
65
66 return IPT_CONTINUE;
67}
68
69
70static int
71checkentry_v0(const char *tablename,
72 const struct ipt_entry *e,
73 void *targinfo,
74 unsigned int targinfosize,
75 unsigned int hook_mask)
76{
77 struct ipt_mark_target_info *markinfo = targinfo;
78
79 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
80 printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
81 targinfosize,
82 IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
83 return 0;
84 }
85
86 if (strcmp(tablename, "mangle") != 0) {
87 printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
88 return 0;
89 }
90
91 if (markinfo->mark > 0xffffffff) {
92 printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
93 return 0;
94 }
95
96 return 1;
97}
98
99static int
100checkentry_v1(const char *tablename,
101 const struct ipt_entry *e,
102 void *targinfo,
103 unsigned int targinfosize,
104 unsigned int hook_mask)
105{
106 struct ipt_mark_target_info_v1 *markinfo = targinfo;
107
108 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1))){
109 printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
110 targinfosize,
111 IPT_ALIGN(sizeof(struct ipt_mark_target_info_v1)));
112 return 0;
113 }
114
115 if (strcmp(tablename, "mangle") != 0) {
116 printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
117 return 0;
118 }
119
120 if (markinfo->mode != IPT_MARK_SET
121 && markinfo->mode != IPT_MARK_AND
122 && markinfo->mode != IPT_MARK_OR) {
123 printk(KERN_WARNING "MARK: unknown mode %u\n",
124 markinfo->mode);
125 return 0;
126 }
127
128 if (markinfo->mark > 0xffffffff) {
129 printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
130 return 0;
131 }
132
133 return 1;
134}
135
136static struct ipt_target ipt_mark_reg_v0 = {
137 .name = "MARK",
138 .target = target_v0,
139 .checkentry = checkentry_v0,
140 .me = THIS_MODULE,
141 .revision = 0,
142};
143
144static struct ipt_target ipt_mark_reg_v1 = {
145 .name = "MARK",
146 .target = target_v1,
147 .checkentry = checkentry_v1,
148 .me = THIS_MODULE,
149 .revision = 1,
150};
151
152static int __init init(void)
153{
154 int err;
155
156 err = ipt_register_target(&ipt_mark_reg_v0);
157 if (!err) {
158 err = ipt_register_target(&ipt_mark_reg_v1);
159 if (err)
160 ipt_unregister_target(&ipt_mark_reg_v0);
161 }
162 return err;
163}
164
165static void __exit fini(void)
166{
167 ipt_unregister_target(&ipt_mark_reg_v0);
168 ipt_unregister_target(&ipt_mark_reg_v1);
169}
170
171module_init(init);
172module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe6..12c56d3343ca 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/inetdevice.h>
14#include <linux/ip.h> 15#include <linux/ip.h>
15#include <linux/timer.h> 16#include <linux/timer.h>
16#include <linux/module.h> 17#include <linux/module.h>
@@ -18,6 +19,7 @@
18#include <net/protocol.h> 19#include <net/protocol.h>
19#include <net/ip.h> 20#include <net/ip.h>
20#include <net/checksum.h> 21#include <net/checksum.h>
22#include <net/route.h>
21#include <linux/netfilter_ipv4.h> 23#include <linux/netfilter_ipv4.h>
22#include <linux/netfilter_ipv4/ip_nat_rule.h> 24#include <linux/netfilter_ipv4/ip_nat_rule.h>
23#include <linux/netfilter_ipv4/ip_tables.h> 25#include <linux/netfilter_ipv4/ip_tables.h>
@@ -38,7 +40,7 @@ static DEFINE_RWLOCK(masq_lock);
38/* FIXME: Multiple targets. --RR */ 40/* FIXME: Multiple targets. --RR */
39static int 41static int
40masquerade_check(const char *tablename, 42masquerade_check(const char *tablename,
41 const struct ipt_entry *e, 43 const void *e,
42 void *targinfo, 44 void *targinfo,
43 unsigned int targinfosize, 45 unsigned int targinfosize,
44 unsigned int hook_mask) 46 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index e6e7b6095363..b074467fe67b 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
31 31
32static int 32static int
33check(const char *tablename, 33check(const char *tablename,
34 const struct ipt_entry *e, 34 const void *e,
35 void *targinfo, 35 void *targinfo,
36 unsigned int targinfosize, 36 unsigned int targinfosize,
37 unsigned int hook_mask) 37 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c
deleted file mode 100644
index 3cedc9be8807..000000000000
--- a/net/ipv4/netfilter/ipt_NFQUEUE.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/* iptables module for using new netfilter netlink queue
2 *
3 * (C) 2005 by Harald Welte <laforge@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("iptables NFQUEUE target");
20MODULE_LICENSE("GPL");
21
22static unsigned int
23target(struct sk_buff **pskb,
24 const struct net_device *in,
25 const struct net_device *out,
26 unsigned int hooknum,
27 const void *targinfo,
28 void *userinfo)
29{
30 const struct ipt_NFQ_info *tinfo = targinfo;
31
32 return NF_QUEUE_NR(tinfo->queuenum);
33}
34
35static int
36checkentry(const char *tablename,
37 const struct ipt_entry *e,
38 void *targinfo,
39 unsigned int targinfosize,
40 unsigned int hook_mask)
41{
42 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
43 printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
44 targinfosize,
45 IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
46 return 0;
47 }
48
49 return 1;
50}
51
52static struct ipt_target ipt_NFQ_reg = {
53 .name = "NFQUEUE",
54 .target = target,
55 .checkentry = checkentry,
56 .me = THIS_MODULE,
57};
58
59static int __init init(void)
60{
61 return ipt_register_target(&ipt_NFQ_reg);
62}
63
64static void __exit fini(void)
65{
66 ipt_unregister_target(&ipt_NFQ_reg);
67}
68
69module_init(init);
70module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
deleted file mode 100644
index e3c69d072c6e..000000000000
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/* This is a module which is used for setting up fake conntracks
2 * on packets so that they are not seen by the conntrack/NAT code.
3 */
4#include <linux/module.h>
5#include <linux/skbuff.h>
6
7#include <linux/netfilter_ipv4/ip_tables.h>
8#include <net/netfilter/nf_conntrack_compat.h>
9
10static unsigned int
11target(struct sk_buff **pskb,
12 const struct net_device *in,
13 const struct net_device *out,
14 unsigned int hooknum,
15 const void *targinfo,
16 void *userinfo)
17{
18 /* Previously seen (loopback)? Ignore. */
19 if ((*pskb)->nfct != NULL)
20 return IPT_CONTINUE;
21
22 /* Attach fake conntrack entry.
23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */
26 nf_ct_untrack(*pskb);
27 (*pskb)->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get((*pskb)->nfct);
29
30 return IPT_CONTINUE;
31}
32
33static int
34checkentry(const char *tablename,
35 const struct ipt_entry *e,
36 void *targinfo,
37 unsigned int targinfosize,
38 unsigned int hook_mask)
39{
40 if (targinfosize != 0) {
41 printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
42 targinfosize);
43 return 0;
44 }
45
46 if (strcmp(tablename, "raw") != 0) {
47 printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
48 return 0;
49 }
50
51 return 1;
52}
53
54static struct ipt_target ipt_notrack_reg = {
55 .name = "NOTRACK",
56 .target = target,
57 .checkentry = checkentry,
58 .me = THIS_MODULE
59};
60
61static int __init init(void)
62{
63 if (ipt_register_target(&ipt_notrack_reg))
64 return -EINVAL;
65
66 return 0;
67}
68
69static void __exit fini(void)
70{
71 ipt_unregister_target(&ipt_notrack_reg);
72}
73
74module_init(init);
75module_exit(fini);
76MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5245bfd33d52..140be51f2f01 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables REDIRECT target module");
33/* FIXME: Take multiple ranges --RR */ 33/* FIXME: Take multiple ranges --RR */
34static int 34static int
35redirect_check(const char *tablename, 35redirect_check(const char *tablename,
36 const struct ipt_entry *e, 36 const void *e,
37 void *targinfo, 37 void *targinfo,
38 unsigned int targinfosize, 38 unsigned int targinfosize,
39 unsigned int hook_mask) 39 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719e..3eb47aae78c5 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
203 sizeof(struct tcphdr), 0)); 203 sizeof(struct tcphdr), 0));
204 204
205 /* Adjust IP TTL, DF */ 205 /* Adjust IP TTL, DF */
206 nskb->nh.iph->ttl = MAXTTL; 206 nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
207 /* Set DF, id = 0 */ 207 /* Set DF, id = 0 */
208 nskb->nh.iph->frag_off = htons(IP_DF); 208 nskb->nh.iph->frag_off = htons(IP_DF);
209 nskb->nh.iph->id = 0; 209 nskb->nh.iph->id = 0;
@@ -282,12 +282,13 @@ static unsigned int reject(struct sk_buff **pskb,
282} 282}
283 283
284static int check(const char *tablename, 284static int check(const char *tablename,
285 const struct ipt_entry *e, 285 const void *e_void,
286 void *targinfo, 286 void *targinfo,
287 unsigned int targinfosize, 287 unsigned int targinfosize,
288 unsigned int hook_mask) 288 unsigned int hook_mask)
289{ 289{
290 const struct ipt_reject_info *rejinfo = targinfo; 290 const struct ipt_reject_info *rejinfo = targinfo;
291 const struct ipt_entry *e = e_void;
291 292
292 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) { 293 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
293 DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize); 294 DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7a0536d864ac..a22de59bba0e 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -49,7 +49,7 @@ MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
49 49
50static int 50static int
51same_check(const char *tablename, 51same_check(const char *tablename,
52 const struct ipt_entry *e, 52 const void *e,
53 void *targinfo, 53 void *targinfo,
54 unsigned int targinfosize, 54 unsigned int targinfosize,
55 unsigned int hook_mask) 55 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 8db70d6908c3..c122841e182c 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -210,12 +210,13 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
210/* Must specify -p tcp --syn/--tcp-flags SYN */ 210/* Must specify -p tcp --syn/--tcp-flags SYN */
211static int 211static int
212ipt_tcpmss_checkentry(const char *tablename, 212ipt_tcpmss_checkentry(const char *tablename,
213 const struct ipt_entry *e, 213 const void *e_void,
214 void *targinfo, 214 void *targinfo,
215 unsigned int targinfosize, 215 unsigned int targinfosize,
216 unsigned int hook_mask) 216 unsigned int hook_mask)
217{ 217{
218 const struct ipt_tcpmss_info *tcpmssinfo = targinfo; 218 const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
219 const struct ipt_entry *e = e_void;
219 220
220 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) { 221 if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
221 DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n", 222 DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index deadb36d4428..3a44a56db239 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -52,7 +52,7 @@ target(struct sk_buff **pskb,
52 52
53static int 53static int
54checkentry(const char *tablename, 54checkentry(const char *tablename,
55 const struct ipt_entry *e, 55 const void *e_void,
56 void *targinfo, 56 void *targinfo,
57 unsigned int targinfosize, 57 unsigned int targinfosize,
58 unsigned int hook_mask) 58 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index b9ae6a9382f3..b769eb231970 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -66,7 +66,7 @@ ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
66} 66}
67 67
68static int ipt_ttl_checkentry(const char *tablename, 68static int ipt_ttl_checkentry(const char *tablename,
69 const struct ipt_entry *e, 69 const void *e,
70 void *targinfo, 70 void *targinfo,
71 unsigned int targinfosize, 71 unsigned int targinfosize,
72 unsigned int hook_mask) 72 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91d..641dbc477650 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
77#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) 77#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
78 78
79static unsigned int nlbufsiz = 4096; 79static unsigned int nlbufsiz = 4096;
80module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */ 80module_param(nlbufsiz, uint, 0400);
81MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); 81MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
82 82
83static unsigned int flushtimeout = 10; 83static unsigned int flushtimeout = 10;
84module_param(flushtimeout, int, 0600); 84module_param(flushtimeout, uint, 0600);
85MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); 85MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
86 86
87static unsigned int nflog = 1; 87static int nflog = 1;
88module_param(nflog, int, 0400); 88module_param(nflog, bool, 0400);
89MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); 89MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
90 90
91/* global data structures */ 91/* global data structures */
@@ -330,7 +330,7 @@ static void ipt_logfn(unsigned int pf,
330} 330}
331 331
332static int ipt_ulog_checkentry(const char *tablename, 332static int ipt_ulog_checkentry(const char *tablename,
333 const struct ipt_entry *e, 333 const void *e,
334 void *targinfo, 334 void *targinfo,
335 unsigned int targinfosize, 335 unsigned int targinfosize,
336 unsigned int hookmask) 336 unsigned int hookmask)
@@ -376,7 +376,7 @@ static int __init init(void)
376 376
377 DEBUGP("ipt_ULOG: init module\n"); 377 DEBUGP("ipt_ULOG: init module\n");
378 378
379 if (nlbufsiz >= 128*1024) { 379 if (nlbufsiz > 128*1024) {
380 printk("Netlink buffer has to be <= 128kB\n"); 380 printk("Netlink buffer has to be <= 128kB\n");
381 return -EINVAL; 381 return -EINVAL;
382 } 382 }
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e19c2a52d00c..d6b83a976518 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -29,7 +29,7 @@ static inline int match_type(u_int32_t addr, u_int16_t mask)
29 29
30static int match(const struct sk_buff *skb, const struct net_device *in, 30static int match(const struct sk_buff *skb, const struct net_device *in,
31 const struct net_device *out, const void *matchinfo, 31 const struct net_device *out, const void *matchinfo,
32 int offset, int *hotdrop) 32 int offset, unsigned int protoff, int *hotdrop)
33{ 33{
34 const struct ipt_addrtype_info *info = matchinfo; 34 const struct ipt_addrtype_info *info = matchinfo;
35 const struct iphdr *iph = skb->nh.iph; 35 const struct iphdr *iph = skb->nh.iph;
@@ -43,7 +43,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
43 return ret; 43 return ret;
44} 44}
45 45
46static int checkentry(const char *tablename, const struct ipt_ip *ip, 46static int checkentry(const char *tablename, const void *ip,
47 void *matchinfo, unsigned int matchsize, 47 void *matchinfo, unsigned int matchsize,
48 unsigned int hook_mask) 48 unsigned int hook_mask)
49{ 49{
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a0fea847cb72..144adfec13cc 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -41,6 +41,7 @@ match(const struct sk_buff *skb,
41 const struct net_device *out, 41 const struct net_device *out,
42 const void *matchinfo, 42 const void *matchinfo,
43 int offset, 43 int offset,
44 unsigned int protoff,
44 int *hotdrop) 45 int *hotdrop)
45{ 46{
46 struct ip_auth_hdr _ahdr, *ah; 47 struct ip_auth_hdr _ahdr, *ah;
@@ -50,7 +51,7 @@ match(const struct sk_buff *skb,
50 if (offset) 51 if (offset)
51 return 0; 52 return 0;
52 53
53 ah = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 54 ah = skb_header_pointer(skb, protoff,
54 sizeof(_ahdr), &_ahdr); 55 sizeof(_ahdr), &_ahdr);
55 if (ah == NULL) { 56 if (ah == NULL) {
56 /* We've been asked to examine this packet, and we 57 /* We've been asked to examine this packet, and we
@@ -69,12 +70,13 @@ match(const struct sk_buff *skb,
69/* Called when user tries to insert an entry of this type. */ 70/* Called when user tries to insert an entry of this type. */
70static int 71static int
71checkentry(const char *tablename, 72checkentry(const char *tablename,
72 const struct ipt_ip *ip, 73 const void *ip_void,
73 void *matchinfo, 74 void *matchinfo,
74 unsigned int matchinfosize, 75 unsigned int matchinfosize,
75 unsigned int hook_mask) 76 unsigned int hook_mask)
76{ 77{
77 const struct ipt_ah *ahinfo = matchinfo; 78 const struct ipt_ah *ahinfo = matchinfo;
79 const struct ipt_ip *ip = ip_void;
78 80
79 /* Must specify proto == AH, and no unknown invflags */ 81 /* Must specify proto == AH, and no unknown invflags */
80 if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) { 82 if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_comment.c b/net/ipv4/netfilter/ipt_comment.c
deleted file mode 100644
index 6b76a1ea5245..000000000000
--- a/net/ipv4/netfilter/ipt_comment.c
+++ /dev/null
@@ -1,59 +0,0 @@
1/*
2 * Implements a dummy match to allow attaching comments to rules
3 *
4 * 2003-05-13 Brad Fisher (brad@info-link.net)
5 */
6
7#include <linux/module.h>
8#include <linux/skbuff.h>
9#include <linux/netfilter_ipv4/ip_tables.h>
10#include <linux/netfilter_ipv4/ipt_comment.h>
11
12MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
13MODULE_DESCRIPTION("iptables comment match module");
14MODULE_LICENSE("GPL");
15
16static int
17match(const struct sk_buff *skb,
18 const struct net_device *in,
19 const struct net_device *out,
20 const void *matchinfo,
21 int offset,
22 int *hotdrop)
23{
24 /* We always match */
25 return 1;
26}
27
28static int
29checkentry(const char *tablename,
30 const struct ipt_ip *ip,
31 void *matchinfo,
32 unsigned int matchsize,
33 unsigned int hook_mask)
34{
35 /* Check the size */
36 if (matchsize != IPT_ALIGN(sizeof(struct ipt_comment_info)))
37 return 0;
38 return 1;
39}
40
41static struct ipt_match comment_match = {
42 .name = "comment",
43 .match = match,
44 .checkentry = checkentry,
45 .me = THIS_MODULE
46};
47
48static int __init init(void)
49{
50 return ipt_register_match(&comment_match);
51}
52
53static void __exit fini(void)
54{
55 ipt_unregister_match(&comment_match);
56}
57
58module_init(init);
59module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
deleted file mode 100644
index d68a048b7176..000000000000
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ /dev/null
@@ -1,161 +0,0 @@
1/* Kernel module to match connection tracking byte counter.
2 * GPL (C) 2002 Martin Devera (devik@cdi.cz).
3 *
4 * 2004-07-20 Harald Welte <laforge@netfilter.org>
5 * - reimplemented to use per-connection accounting counters
6 * - add functionality to match number of packets
7 * - add functionality to match average packet size
8 * - add support to match directions seperately
9 *
10 */
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <net/netfilter/nf_conntrack_compat.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_connbytes.h>
16
17#include <asm/div64.h>
18#include <asm/bitops.h>
19
20MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
23
24/* 64bit divisor, dividend and result. dynamic precision */
25static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
26{
27 u_int32_t d = divisor;
28
29 if (divisor > 0xffffffffULL) {
30 unsigned int shift = fls(divisor >> 32);
31
32 d = divisor >> shift;
33 dividend >>= shift;
34 }
35
36 do_div(dividend, d);
37 return dividend;
38}
39
40static int
41match(const struct sk_buff *skb,
42 const struct net_device *in,
43 const struct net_device *out,
44 const void *matchinfo,
45 int offset,
46 int *hotdrop)
47{
48 const struct ipt_connbytes_info *sinfo = matchinfo;
49 u_int64_t what = 0; /* initialize to make gcc happy */
50 const struct ip_conntrack_counter *counters;
51
52 if (!(counters = nf_ct_get_counters(skb)))
53 return 0; /* no match */
54
55 switch (sinfo->what) {
56 case IPT_CONNBYTES_PKTS:
57 switch (sinfo->direction) {
58 case IPT_CONNBYTES_DIR_ORIGINAL:
59 what = counters[IP_CT_DIR_ORIGINAL].packets;
60 break;
61 case IPT_CONNBYTES_DIR_REPLY:
62 what = counters[IP_CT_DIR_REPLY].packets;
63 break;
64 case IPT_CONNBYTES_DIR_BOTH:
65 what = counters[IP_CT_DIR_ORIGINAL].packets;
66 what += counters[IP_CT_DIR_REPLY].packets;
67 break;
68 }
69 break;
70 case IPT_CONNBYTES_BYTES:
71 switch (sinfo->direction) {
72 case IPT_CONNBYTES_DIR_ORIGINAL:
73 what = counters[IP_CT_DIR_ORIGINAL].bytes;
74 break;
75 case IPT_CONNBYTES_DIR_REPLY:
76 what = counters[IP_CT_DIR_REPLY].bytes;
77 break;
78 case IPT_CONNBYTES_DIR_BOTH:
79 what = counters[IP_CT_DIR_ORIGINAL].bytes;
80 what += counters[IP_CT_DIR_REPLY].bytes;
81 break;
82 }
83 break;
84 case IPT_CONNBYTES_AVGPKT:
85 switch (sinfo->direction) {
86 case IPT_CONNBYTES_DIR_ORIGINAL:
87 what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
88 counters[IP_CT_DIR_ORIGINAL].packets);
89 break;
90 case IPT_CONNBYTES_DIR_REPLY:
91 what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
92 counters[IP_CT_DIR_REPLY].packets);
93 break;
94 case IPT_CONNBYTES_DIR_BOTH:
95 {
96 u_int64_t bytes;
97 u_int64_t pkts;
98 bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
99 counters[IP_CT_DIR_REPLY].bytes;
100 pkts = counters[IP_CT_DIR_ORIGINAL].packets+
101 counters[IP_CT_DIR_REPLY].packets;
102
103 /* FIXME_THEORETICAL: what to do if sum
104 * overflows ? */
105
106 what = div64_64(bytes, pkts);
107 }
108 break;
109 }
110 break;
111 }
112
113 if (sinfo->count.to)
114 return (what <= sinfo->count.to && what >= sinfo->count.from);
115 else
116 return (what >= sinfo->count.from);
117}
118
119static int check(const char *tablename,
120 const struct ipt_ip *ip,
121 void *matchinfo,
122 unsigned int matchsize,
123 unsigned int hook_mask)
124{
125 const struct ipt_connbytes_info *sinfo = matchinfo;
126
127 if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
128 return 0;
129
130 if (sinfo->what != IPT_CONNBYTES_PKTS &&
131 sinfo->what != IPT_CONNBYTES_BYTES &&
132 sinfo->what != IPT_CONNBYTES_AVGPKT)
133 return 0;
134
135 if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
136 sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
137 sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
138 return 0;
139
140 return 1;
141}
142
143static struct ipt_match state_match = {
144 .name = "connbytes",
145 .match = &match,
146 .checkentry = &check,
147 .me = THIS_MODULE
148};
149
150static int __init init(void)
151{
152 return ipt_register_match(&state_match);
153}
154
155static void __exit fini(void)
156{
157 ipt_unregister_match(&state_match);
158}
159
160module_init(init);
161module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
deleted file mode 100644
index 5306ef293b92..000000000000
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ /dev/null
@@ -1,88 +0,0 @@
1/* This kernel module matches connection mark values set by the
2 * CONNMARK target
3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/module.h>
23#include <linux/skbuff.h>
24
25MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
26MODULE_DESCRIPTION("IP tables connmark match module");
27MODULE_LICENSE("GPL");
28
29#include <linux/netfilter_ipv4/ip_tables.h>
30#include <linux/netfilter_ipv4/ipt_connmark.h>
31#include <net/netfilter/nf_conntrack_compat.h>
32
33static int
34match(const struct sk_buff *skb,
35 const struct net_device *in,
36 const struct net_device *out,
37 const void *matchinfo,
38 int offset,
39 int *hotdrop)
40{
41 const struct ipt_connmark_info *info = matchinfo;
42 u_int32_t ctinfo;
43 const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
44 if (!ctmark)
45 return 0;
46
47 return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
48}
49
50static int
51checkentry(const char *tablename,
52 const struct ipt_ip *ip,
53 void *matchinfo,
54 unsigned int matchsize,
55 unsigned int hook_mask)
56{
57 struct ipt_connmark_info *cm =
58 (struct ipt_connmark_info *)matchinfo;
59 if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
60 return 0;
61
62 if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
63 printk(KERN_WARNING "connmark: only support 32bit mark\n");
64 return 0;
65 }
66
67 return 1;
68}
69
70static struct ipt_match connmark_match = {
71 .name = "connmark",
72 .match = &match,
73 .checkentry = &checkentry,
74 .me = THIS_MODULE
75};
76
77static int __init init(void)
78{
79 return ipt_register_match(&connmark_match);
80}
81
82static void __exit fini(void)
83{
84 ipt_unregister_match(&connmark_match);
85}
86
87module_init(init);
88module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
deleted file mode 100644
index c8d18705469b..000000000000
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ /dev/null
@@ -1,232 +0,0 @@
1/* Kernel module to match connection tracking information.
2 * Superset of Rusty's minimalistic state match.
3 *
4 * (C) 2001 Marc Boucher (marc@mbsi.ca).
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13
14#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
15#include <linux/netfilter_ipv4/ip_conntrack.h>
16#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
17#else
18#include <net/netfilter/nf_conntrack.h>
19#endif
20
21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter_ipv4/ipt_conntrack.h>
23
24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
26MODULE_DESCRIPTION("iptables connection tracking match module");
27
28#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
29
30static int
31match(const struct sk_buff *skb,
32 const struct net_device *in,
33 const struct net_device *out,
34 const void *matchinfo,
35 int offset,
36 int *hotdrop)
37{
38 const struct ipt_conntrack_info *sinfo = matchinfo;
39 struct ip_conntrack *ct;
40 enum ip_conntrack_info ctinfo;
41 unsigned int statebit;
42
43 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
44
45#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
46
47 if (ct == &ip_conntrack_untracked)
48 statebit = IPT_CONNTRACK_STATE_UNTRACKED;
49 else if (ct)
50 statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
51 else
52 statebit = IPT_CONNTRACK_STATE_INVALID;
53
54 if(sinfo->flags & IPT_CONNTRACK_STATE) {
55 if (ct) {
56 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
57 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
58 statebit |= IPT_CONNTRACK_STATE_SNAT;
59
60 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
61 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
62 statebit |= IPT_CONNTRACK_STATE_DNAT;
63 }
64
65 if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
66 return 0;
67 }
68
69 if(sinfo->flags & IPT_CONNTRACK_PROTO) {
70 if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
71 return 0;
72 }
73
74 if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
75 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
76 return 0;
77 }
78
79 if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
80 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
81 return 0;
82 }
83
84 if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
85 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
86 return 0;
87 }
88
89 if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
90 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
91 return 0;
92 }
93
94 if(sinfo->flags & IPT_CONNTRACK_STATUS) {
95 if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
96 return 0;
97 }
98
99 if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
100 unsigned long expires;
101
102 if(!ct)
103 return 0;
104
105 expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
106
107 if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
108 return 0;
109 }
110
111 return 1;
112}
113
114#else /* CONFIG_IP_NF_CONNTRACK */
115static int
116match(const struct sk_buff *skb,
117 const struct net_device *in,
118 const struct net_device *out,
119 const void *matchinfo,
120 int offset,
121 int *hotdrop)
122{
123 const struct ipt_conntrack_info *sinfo = matchinfo;
124 struct nf_conn *ct;
125 enum ip_conntrack_info ctinfo;
126 unsigned int statebit;
127
128 ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
129
130#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
131
132 if (ct == &nf_conntrack_untracked)
133 statebit = IPT_CONNTRACK_STATE_UNTRACKED;
134 else if (ct)
135 statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
136 else
137 statebit = IPT_CONNTRACK_STATE_INVALID;
138
139 if(sinfo->flags & IPT_CONNTRACK_STATE) {
140 if (ct) {
141 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
142 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
143 statebit |= IPT_CONNTRACK_STATE_SNAT;
144
145 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
146 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
147 statebit |= IPT_CONNTRACK_STATE_DNAT;
148 }
149
150 if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
151 return 0;
152 }
153
154 if(sinfo->flags & IPT_CONNTRACK_PROTO) {
155 if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
156 return 0;
157 }
158
159 if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
160 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
161 return 0;
162 }
163
164 if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
165 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
166 return 0;
167 }
168
169 if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
170 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
171 return 0;
172 }
173
174 if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
175 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
176 return 0;
177 }
178
179 if(sinfo->flags & IPT_CONNTRACK_STATUS) {
180 if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
181 return 0;
182 }
183
184 if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
185 unsigned long expires;
186
187 if(!ct)
188 return 0;
189
190 expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
191
192 if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
193 return 0;
194 }
195
196 return 1;
197}
198
199#endif /* CONFIG_NF_IP_CONNTRACK */
200
201static int check(const char *tablename,
202 const struct ipt_ip *ip,
203 void *matchinfo,
204 unsigned int matchsize,
205 unsigned int hook_mask)
206{
207 if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
208 return 0;
209
210 return 1;
211}
212
213static struct ipt_match conntrack_match = {
214 .name = "conntrack",
215 .match = &match,
216 .checkentry = &check,
217 .me = THIS_MODULE,
218};
219
220static int __init init(void)
221{
222 need_ip_conntrack();
223 return ipt_register_match(&conntrack_match);
224}
225
226static void __exit fini(void)
227{
228 ipt_unregister_match(&conntrack_match);
229}
230
231module_init(init);
232module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c
deleted file mode 100644
index ad3278bba6c1..000000000000
--- a/net/ipv4/netfilter/ipt_dccp.c
+++ /dev/null
@@ -1,176 +0,0 @@
1/*
2 * iptables module for DCCP protocol header matching
3 *
4 * (C) 2005 by Harald Welte <laforge@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/spinlock.h>
14#include <net/ip.h>
15#include <linux/dccp.h>
16
17#include <linux/netfilter_ipv4/ip_tables.h>
18#include <linux/netfilter_ipv4/ipt_dccp.h>
19
20#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
21 || (!!((invflag) & (option)) ^ (cond)))
22
23static unsigned char *dccp_optbuf;
24static DEFINE_SPINLOCK(dccp_buflock);
25
26static inline int
27dccp_find_option(u_int8_t option,
28 const struct sk_buff *skb,
29 const struct dccp_hdr *dh,
30 int *hotdrop)
31{
32 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
33 unsigned char *op;
34 unsigned int optoff = __dccp_hdr_len(dh);
35 unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
36 unsigned int i;
37
38 if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
39 *hotdrop = 1;
40 return 0;
41 }
42
43 if (!optlen)
44 return 0;
45
46 spin_lock_bh(&dccp_buflock);
47 op = skb_header_pointer(skb,
48 skb->nh.iph->ihl*4 + optoff,
49 optlen, dccp_optbuf);
50 if (op == NULL) {
51 /* If we don't have the whole header, drop packet. */
52 spin_unlock_bh(&dccp_buflock);
53 *hotdrop = 1;
54 return 0;
55 }
56
57 for (i = 0; i < optlen; ) {
58 if (op[i] == option) {
59 spin_unlock_bh(&dccp_buflock);
60 return 1;
61 }
62
63 if (op[i] < 2)
64 i++;
65 else
66 i += op[i+1]?:1;
67 }
68
69 spin_unlock_bh(&dccp_buflock);
70 return 0;
71}
72
73
74static inline int
75match_types(const struct dccp_hdr *dh, u_int16_t typemask)
76{
77 return (typemask & (1 << dh->dccph_type));
78}
79
80static inline int
81match_option(u_int8_t option, const struct sk_buff *skb,
82 const struct dccp_hdr *dh, int *hotdrop)
83{
84 return dccp_find_option(option, skb, dh, hotdrop);
85}
86
87static int
88match(const struct sk_buff *skb,
89 const struct net_device *in,
90 const struct net_device *out,
91 const void *matchinfo,
92 int offset,
93 int *hotdrop)
94{
95 const struct ipt_dccp_info *info =
96 (const struct ipt_dccp_info *)matchinfo;
97 struct dccp_hdr _dh, *dh;
98
99 if (offset)
100 return 0;
101
102 dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
103 if (dh == NULL) {
104 *hotdrop = 1;
105 return 0;
106 }
107
108 return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0])
109 && (ntohs(dh->dccph_sport) <= info->spts[1])),
110 IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
111 && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0])
112 && (ntohs(dh->dccph_dport) <= info->dpts[1])),
113 IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
114 && DCCHECK(match_types(dh, info->typemask),
115 IPT_DCCP_TYPE, info->flags, info->invflags)
116 && DCCHECK(match_option(info->option, skb, dh, hotdrop),
117 IPT_DCCP_OPTION, info->flags, info->invflags);
118}
119
120static int
121checkentry(const char *tablename,
122 const struct ipt_ip *ip,
123 void *matchinfo,
124 unsigned int matchsize,
125 unsigned int hook_mask)
126{
127 const struct ipt_dccp_info *info;
128
129 info = (const struct ipt_dccp_info *)matchinfo;
130
131 return ip->proto == IPPROTO_DCCP
132 && !(ip->invflags & IPT_INV_PROTO)
133 && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
134 && !(info->flags & ~IPT_DCCP_VALID_FLAGS)
135 && !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
136 && !(info->invflags & ~info->flags);
137}
138
139static struct ipt_match dccp_match =
140{
141 .name = "dccp",
142 .match = &match,
143 .checkentry = &checkentry,
144 .me = THIS_MODULE,
145};
146
147static int __init init(void)
148{
149 int ret;
150
151 /* doff is 8 bits, so the maximum option size is (4*256). Don't put
152 * this in BSS since DaveM is worried about locked TLB's for kernel
153 * BSS. */
154 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
155 if (!dccp_optbuf)
156 return -ENOMEM;
157 ret = ipt_register_match(&dccp_match);
158 if (ret)
159 kfree(dccp_optbuf);
160
161 return ret;
162}
163
164static void __exit fini(void)
165{
166 ipt_unregister_match(&dccp_match);
167 kfree(dccp_optbuf);
168}
169
170module_init(init);
171module_exit(fini);
172
173MODULE_LICENSE("GPL");
174MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
175MODULE_DESCRIPTION("Match for DCCP protocol packets");
176
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
index 5df52a64a5d4..92063b4f8602 100644
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ b/net/ipv4/netfilter/ipt_dscp.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
21 21
22static int match(const struct sk_buff *skb, const struct net_device *in, 22static int match(const struct sk_buff *skb, const struct net_device *in,
23 const struct net_device *out, const void *matchinfo, 23 const struct net_device *out, const void *matchinfo,
24 int offset, int *hotdrop) 24 int offset, unsigned int protoff, int *hotdrop)
25{ 25{
26 const struct ipt_dscp_info *info = matchinfo; 26 const struct ipt_dscp_info *info = matchinfo;
27 const struct iphdr *iph = skb->nh.iph; 27 const struct iphdr *iph = skb->nh.iph;
@@ -31,7 +31,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
31 return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; 31 return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
32} 32}
33 33
34static int checkentry(const char *tablename, const struct ipt_ip *ip, 34static int checkentry(const char *tablename, const void *ip,
35 void *matchinfo, unsigned int matchsize, 35 void *matchinfo, unsigned int matchsize,
36 unsigned int hook_mask) 36 unsigned int hook_mask)
37{ 37{
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b6f7181e89cc..e68b0c7981f0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,7 @@ static inline int match_tcp(const struct sk_buff *skb,
67 67
68static int match(const struct sk_buff *skb, const struct net_device *in, 68static int match(const struct sk_buff *skb, const struct net_device *in,
69 const struct net_device *out, const void *matchinfo, 69 const struct net_device *out, const void *matchinfo,
70 int offset, int *hotdrop) 70 int offset, unsigned int protoff, int *hotdrop)
71{ 71{
72 const struct ipt_ecn_info *info = matchinfo; 72 const struct ipt_ecn_info *info = matchinfo;
73 73
@@ -85,11 +85,12 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
85 return 1; 85 return 1;
86} 86}
87 87
88static int checkentry(const char *tablename, const struct ipt_ip *ip, 88static int checkentry(const char *tablename, const void *ip_void,
89 void *matchinfo, unsigned int matchsize, 89 void *matchinfo, unsigned int matchsize,
90 unsigned int hook_mask) 90 unsigned int hook_mask)
91{ 91{
92 const struct ipt_ecn_info *info = matchinfo; 92 const struct ipt_ecn_info *info = matchinfo;
93 const struct ipt_ip *ip = ip_void;
93 94
94 if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info))) 95 if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
95 return 0; 96 return 0;
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
index e1d0dd31e117..9de191a8162d 100644
--- a/net/ipv4/netfilter/ipt_esp.c
+++ b/net/ipv4/netfilter/ipt_esp.c
@@ -42,6 +42,7 @@ match(const struct sk_buff *skb,
42 const struct net_device *out, 42 const struct net_device *out,
43 const void *matchinfo, 43 const void *matchinfo,
44 int offset, 44 int offset,
45 unsigned int protoff,
45 int *hotdrop) 46 int *hotdrop)
46{ 47{
47 struct ip_esp_hdr _esp, *eh; 48 struct ip_esp_hdr _esp, *eh;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
51 if (offset) 52 if (offset)
52 return 0; 53 return 0;
53 54
54 eh = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 55 eh = skb_header_pointer(skb, protoff,
55 sizeof(_esp), &_esp); 56 sizeof(_esp), &_esp);
56 if (eh == NULL) { 57 if (eh == NULL) {
57 /* We've been asked to examine this packet, and we 58 /* We've been asked to examine this packet, and we
@@ -70,12 +71,13 @@ match(const struct sk_buff *skb,
70/* Called when user tries to insert an entry of this type. */ 71/* Called when user tries to insert an entry of this type. */
71static int 72static int
72checkentry(const char *tablename, 73checkentry(const char *tablename,
73 const struct ipt_ip *ip, 74 const void *ip_void,
74 void *matchinfo, 75 void *matchinfo,
75 unsigned int matchinfosize, 76 unsigned int matchinfosize,
76 unsigned int hook_mask) 77 unsigned int hook_mask)
77{ 78{
78 const struct ipt_esp *espinfo = matchinfo; 79 const struct ipt_esp *espinfo = matchinfo;
80 const struct ipt_ip *ip = ip_void;
79 81
80 /* Must specify proto == ESP, and no unknown invflags */ 82 /* Must specify proto == ESP, and no unknown invflags */
81 if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) { 83 if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 2dd1cccbdab9..4fe48c1bd5f3 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -429,6 +429,7 @@ hashlimit_match(const struct sk_buff *skb,
429 const struct net_device *out, 429 const struct net_device *out,
430 const void *matchinfo, 430 const void *matchinfo,
431 int offset, 431 int offset,
432 unsigned int protoff,
432 int *hotdrop) 433 int *hotdrop)
433{ 434{
434 struct ipt_hashlimit_info *r = 435 struct ipt_hashlimit_info *r =
@@ -504,7 +505,7 @@ hashlimit_match(const struct sk_buff *skb,
504 505
505static int 506static int
506hashlimit_checkentry(const char *tablename, 507hashlimit_checkentry(const char *tablename,
507 const struct ipt_ip *ip, 508 const void *inf,
508 void *matchinfo, 509 void *matchinfo,
509 unsigned int matchsize, 510 unsigned int matchsize,
510 unsigned int hook_mask) 511 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
deleted file mode 100644
index bf14e1c7798a..000000000000
--- a/net/ipv4/netfilter/ipt_helper.c
+++ /dev/null
@@ -1,167 +0,0 @@
1/* iptables module to match on related connections */
2/*
3 * (C) 2001 Martin Josefsson <gandalf@wlug.westbo.se>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * 19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
10 * - Port to newnat infrastructure
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netfilter.h>
16#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
17#include <linux/netfilter_ipv4/ip_conntrack.h>
18#include <linux/netfilter_ipv4/ip_conntrack_core.h>
19#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
20#else
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#endif
25#include <linux/netfilter_ipv4/ip_tables.h>
26#include <linux/netfilter_ipv4/ipt_helper.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
30MODULE_DESCRIPTION("iptables helper match module");
31
32#if 0
33#define DEBUGP printk
34#else
35#define DEBUGP(format, args...)
36#endif
37
38#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
39static int
40match(const struct sk_buff *skb,
41 const struct net_device *in,
42 const struct net_device *out,
43 const void *matchinfo,
44 int offset,
45 int *hotdrop)
46{
47 const struct ipt_helper_info *info = matchinfo;
48 struct ip_conntrack *ct;
49 enum ip_conntrack_info ctinfo;
50 int ret = info->invert;
51
52 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
53 if (!ct) {
54 DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
55 return ret;
56 }
57
58 if (!ct->master) {
59 DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
60 return ret;
61 }
62
63 read_lock_bh(&ip_conntrack_lock);
64 if (!ct->master->helper) {
65 DEBUGP("ipt_helper: master ct %p has no helper\n",
66 exp->expectant);
67 goto out_unlock;
68 }
69
70 DEBUGP("master's name = %s , info->name = %s\n",
71 ct->master->helper->name, info->name);
72
73 if (info->name[0] == '\0')
74 ret ^= 1;
75 else
76 ret ^= !strncmp(ct->master->helper->name, info->name,
77 strlen(ct->master->helper->name));
78out_unlock:
79 read_unlock_bh(&ip_conntrack_lock);
80 return ret;
81}
82
83#else /* CONFIG_IP_NF_CONNTRACK */
84
85static int
86match(const struct sk_buff *skb,
87 const struct net_device *in,
88 const struct net_device *out,
89 const void *matchinfo,
90 int offset,
91 int *hotdrop)
92{
93 const struct ipt_helper_info *info = matchinfo;
94 struct nf_conn *ct;
95 enum ip_conntrack_info ctinfo;
96 int ret = info->invert;
97
98 ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
99 if (!ct) {
100 DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
101 return ret;
102 }
103
104 if (!ct->master) {
105 DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
106 return ret;
107 }
108
109 read_lock_bh(&nf_conntrack_lock);
110 if (!ct->master->helper) {
111 DEBUGP("ipt_helper: master ct %p has no helper\n",
112 exp->expectant);
113 goto out_unlock;
114 }
115
116 DEBUGP("master's name = %s , info->name = %s\n",
117 ct->master->helper->name, info->name);
118
119 if (info->name[0] == '\0')
120 ret ^= 1;
121 else
122 ret ^= !strncmp(ct->master->helper->name, info->name,
123 strlen(ct->master->helper->name));
124out_unlock:
125 read_unlock_bh(&nf_conntrack_lock);
126 return ret;
127}
128#endif
129
130static int check(const char *tablename,
131 const struct ipt_ip *ip,
132 void *matchinfo,
133 unsigned int matchsize,
134 unsigned int hook_mask)
135{
136 struct ipt_helper_info *info = matchinfo;
137
138 info->name[29] = '\0';
139
140 /* verify size */
141 if (matchsize != IPT_ALIGN(sizeof(struct ipt_helper_info)))
142 return 0;
143
144 return 1;
145}
146
147static struct ipt_match helper_match = {
148 .name = "helper",
149 .match = &match,
150 .checkentry = &check,
151 .me = THIS_MODULE,
152};
153
154static int __init init(void)
155{
156 need_ip_conntrack();
157 return ipt_register_match(&helper_match);
158}
159
160static void __exit fini(void)
161{
162 ipt_unregister_match(&helper_match);
163}
164
165module_init(init);
166module_exit(fini);
167
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index b835b7b2e560..13fb16fb7892 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -28,7 +28,7 @@ match(const struct sk_buff *skb,
28 const struct net_device *in, 28 const struct net_device *in,
29 const struct net_device *out, 29 const struct net_device *out,
30 const void *matchinfo, 30 const void *matchinfo,
31 int offset, int *hotdrop) 31 int offset, unsigned int protoff, int *hotdrop)
32{ 32{
33 const struct ipt_iprange_info *info = matchinfo; 33 const struct ipt_iprange_info *info = matchinfo;
34 const struct iphdr *iph = skb->nh.iph; 34 const struct iphdr *iph = skb->nh.iph;
@@ -63,7 +63,7 @@ match(const struct sk_buff *skb,
63} 63}
64 64
65static int check(const char *tablename, 65static int check(const char *tablename,
66 const struct ipt_ip *ip, 66 const void *inf,
67 void *matchinfo, 67 void *matchinfo,
68 unsigned int matchsize, 68 unsigned int matchsize,
69 unsigned int hook_mask) 69 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_length.c b/net/ipv4/netfilter/ipt_length.c
deleted file mode 100644
index 4eabcfbda9d1..000000000000
--- a/net/ipv4/netfilter/ipt_length.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/* Kernel module to match packet length. */
2/* (C) 1999-2001 James Morris <jmorros@intercode.com.au>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11
12#include <linux/netfilter_ipv4/ipt_length.h>
13#include <linux/netfilter_ipv4/ip_tables.h>
14
15MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
16MODULE_DESCRIPTION("IP tables packet length matching module");
17MODULE_LICENSE("GPL");
18
19static int
20match(const struct sk_buff *skb,
21 const struct net_device *in,
22 const struct net_device *out,
23 const void *matchinfo,
24 int offset,
25 int *hotdrop)
26{
27 const struct ipt_length_info *info = matchinfo;
28 u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
29
30 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
31}
32
33static int
34checkentry(const char *tablename,
35 const struct ipt_ip *ip,
36 void *matchinfo,
37 unsigned int matchsize,
38 unsigned int hook_mask)
39{
40 if (matchsize != IPT_ALIGN(sizeof(struct ipt_length_info)))
41 return 0;
42
43 return 1;
44}
45
46static struct ipt_match length_match = {
47 .name = "length",
48 .match = &match,
49 .checkentry = &checkentry,
50 .me = THIS_MODULE,
51};
52
53static int __init init(void)
54{
55 return ipt_register_match(&length_match);
56}
57
58static void __exit fini(void)
59{
60 ipt_unregister_match(&length_match);
61}
62
63module_init(init);
64module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
deleted file mode 100644
index 0c24dcc703a5..000000000000
--- a/net/ipv4/netfilter/ipt_limit.c
+++ /dev/null
@@ -1,157 +0,0 @@
1/* Kernel module to control the rate
2 *
3 * 2 September 1999: Changed from the target RATE to the match
4 * `limit', removed logging. Did I mention that
5 * Alexey is a fucking genius?
6 * Rusty Russell (rusty@rustcorp.com.au). */
7
8/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
9 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/module.h>
17#include <linux/skbuff.h>
18#include <linux/spinlock.h>
19#include <linux/interrupt.h>
20
21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter_ipv4/ipt_limit.h>
23
24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
26MODULE_DESCRIPTION("iptables rate limit match");
27
28/* The algorithm used is the Simple Token Bucket Filter (TBF)
29 * see net/sched/sch_tbf.c in the linux source tree
30 */
31
32static DEFINE_SPINLOCK(limit_lock);
33
34/* Rusty: This is my (non-mathematically-inclined) understanding of
35 this algorithm. The `average rate' in jiffies becomes your initial
36 amount of credit `credit' and the most credit you can ever have
37 `credit_cap'. The `peak rate' becomes the cost of passing the
38 test, `cost'.
39
40 `prev' tracks the last packet hit: you gain one credit per jiffy.
41 If you get credit balance more than this, the extra credit is
42 discarded. Every time the match passes, you lose `cost' credits;
43 if you don't have that many, the test fails.
44
45 See Alexey's formal explanation in net/sched/sch_tbf.c.
46
47 To get the maxmum range, we multiply by this factor (ie. you get N
48 credits per jiffy). We want to allow a rate as low as 1 per day
49 (slowest userspace tool allows), which means
50 CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */
51#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
52
53/* Repeated shift and or gives us all 1s, final shift and add 1 gives
54 * us the power of 2 below the theoretical max, so GCC simply does a
55 * shift. */
56#define _POW2_BELOW2(x) ((x)|((x)>>1))
57#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
58#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
59#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
60#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
61#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
62
63#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
64
65static int
66ipt_limit_match(const struct sk_buff *skb,
67 const struct net_device *in,
68 const struct net_device *out,
69 const void *matchinfo,
70 int offset,
71 int *hotdrop)
72{
73 struct ipt_rateinfo *r = ((struct ipt_rateinfo *)matchinfo)->master;
74 unsigned long now = jiffies;
75
76 spin_lock_bh(&limit_lock);
77 r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY;
78 if (r->credit > r->credit_cap)
79 r->credit = r->credit_cap;
80
81 if (r->credit >= r->cost) {
82 /* We're not limited. */
83 r->credit -= r->cost;
84 spin_unlock_bh(&limit_lock);
85 return 1;
86 }
87
88 spin_unlock_bh(&limit_lock);
89 return 0;
90}
91
92/* Precision saver. */
93static u_int32_t
94user2credits(u_int32_t user)
95{
96 /* If multiplying would overflow... */
97 if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
98 /* Divide first. */
99 return (user / IPT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
100
101 return (user * HZ * CREDITS_PER_JIFFY) / IPT_LIMIT_SCALE;
102}
103
104static int
105ipt_limit_checkentry(const char *tablename,
106 const struct ipt_ip *ip,
107 void *matchinfo,
108 unsigned int matchsize,
109 unsigned int hook_mask)
110{
111 struct ipt_rateinfo *r = matchinfo;
112
113 if (matchsize != IPT_ALIGN(sizeof(struct ipt_rateinfo)))
114 return 0;
115
116 /* Check for overflow. */
117 if (r->burst == 0
118 || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
119 printk("Overflow in ipt_limit, try lower: %u/%u\n",
120 r->avg, r->burst);
121 return 0;
122 }
123
124 /* User avg in seconds * IPT_LIMIT_SCALE: convert to jiffies *
125 128. */
126 r->prev = jiffies;
127 r->credit = user2credits(r->avg * r->burst); /* Credits full. */
128 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
129 r->cost = user2credits(r->avg);
130
131 /* For SMP, we only want to use one set of counters. */
132 r->master = r;
133
134 return 1;
135}
136
137static struct ipt_match ipt_limit_reg = {
138 .name = "limit",
139 .match = ipt_limit_match,
140 .checkentry = ipt_limit_checkentry,
141 .me = THIS_MODULE,
142};
143
144static int __init init(void)
145{
146 if (ipt_register_match(&ipt_limit_reg))
147 return -EINVAL;
148 return 0;
149}
150
151static void __exit fini(void)
152{
153 ipt_unregister_match(&ipt_limit_reg);
154}
155
156module_init(init);
157module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
deleted file mode 100644
index 11a459e33f25..000000000000
--- a/net/ipv4/netfilter/ipt_mac.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/* Kernel module to match MAC address parameters. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/if_ether.h>
14
15#include <linux/netfilter_ipv4/ipt_mac.h>
16#include <linux/netfilter_ipv4/ip_tables.h>
17
18MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
20MODULE_DESCRIPTION("iptables mac matching module");
21
22static int
23match(const struct sk_buff *skb,
24 const struct net_device *in,
25 const struct net_device *out,
26 const void *matchinfo,
27 int offset,
28 int *hotdrop)
29{
30 const struct ipt_mac_info *info = matchinfo;
31
32 /* Is mac pointer valid? */
33 return (skb->mac.raw >= skb->head
34 && (skb->mac.raw + ETH_HLEN) <= skb->data
35 /* If so, compare... */
36 && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN)
37 == 0) ^ info->invert));
38}
39
40static int
41ipt_mac_checkentry(const char *tablename,
42 const struct ipt_ip *ip,
43 void *matchinfo,
44 unsigned int matchsize,
45 unsigned int hook_mask)
46{
47 /* FORWARD isn't always valid, but it's nice to be able to do --RR */
48 if (hook_mask
49 & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
50 | (1 << NF_IP_FORWARD))) {
51 printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
52 return 0;
53 }
54
55 if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
56 return 0;
57
58 return 1;
59}
60
61static struct ipt_match mac_match = {
62 .name = "mac",
63 .match = &match,
64 .checkentry = &ipt_mac_checkentry,
65 .me = THIS_MODULE,
66};
67
68static int __init init(void)
69{
70 return ipt_register_match(&mac_match);
71}
72
73static void __exit fini(void)
74{
75 ipt_unregister_match(&mac_match);
76}
77
78module_init(init);
79module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
deleted file mode 100644
index 00bef6cdd3f8..000000000000
--- a/net/ipv4/netfilter/ipt_mark.c
+++ /dev/null
@@ -1,71 +0,0 @@
1/* Kernel module to match NFMARK values. */
2
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12
13#include <linux/netfilter_ipv4/ipt_mark.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
18MODULE_DESCRIPTION("iptables mark matching module");
19
20static int
21match(const struct sk_buff *skb,
22 const struct net_device *in,
23 const struct net_device *out,
24 const void *matchinfo,
25 int offset,
26 int *hotdrop)
27{
28 const struct ipt_mark_info *info = matchinfo;
29
30 return ((skb->nfmark & info->mask) == info->mark) ^ info->invert;
31}
32
33static int
34checkentry(const char *tablename,
35 const struct ipt_ip *ip,
36 void *matchinfo,
37 unsigned int matchsize,
38 unsigned int hook_mask)
39{
40 struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
41
42 if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
43 return 0;
44
45 if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
46 printk(KERN_WARNING "mark: only supports 32bit mark\n");
47 return 0;
48 }
49
50 return 1;
51}
52
53static struct ipt_match mark_match = {
54 .name = "mark",
55 .match = &match,
56 .checkentry = &checkentry,
57 .me = THIS_MODULE,
58};
59
60static int __init init(void)
61{
62 return ipt_register_match(&mark_match);
63}
64
65static void __exit fini(void)
66{
67 ipt_unregister_match(&mark_match);
68}
69
70module_init(init);
71module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 99e8188162e2..2d52326553f1 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -97,6 +97,7 @@ match(const struct sk_buff *skb,
97 const struct net_device *out, 97 const struct net_device *out,
98 const void *matchinfo, 98 const void *matchinfo,
99 int offset, 99 int offset,
100 unsigned int protoff,
100 int *hotdrop) 101 int *hotdrop)
101{ 102{
102 u16 _ports[2], *pptr; 103 u16 _ports[2], *pptr;
@@ -105,7 +106,7 @@ match(const struct sk_buff *skb,
105 if (offset) 106 if (offset)
106 return 0; 107 return 0;
107 108
108 pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 109 pptr = skb_header_pointer(skb, protoff,
109 sizeof(_ports), _ports); 110 sizeof(_ports), _ports);
110 if (pptr == NULL) { 111 if (pptr == NULL) {
111 /* We've been asked to examine this packet, and we 112 /* We've been asked to examine this packet, and we
@@ -128,6 +129,7 @@ match_v1(const struct sk_buff *skb,
128 const struct net_device *out, 129 const struct net_device *out,
129 const void *matchinfo, 130 const void *matchinfo,
130 int offset, 131 int offset,
132 unsigned int protoff,
131 int *hotdrop) 133 int *hotdrop)
132{ 134{
133 u16 _ports[2], *pptr; 135 u16 _ports[2], *pptr;
@@ -136,7 +138,7 @@ match_v1(const struct sk_buff *skb,
136 if (offset) 138 if (offset)
137 return 0; 139 return 0;
138 140
139 pptr = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 141 pptr = skb_header_pointer(skb, protoff,
140 sizeof(_ports), _ports); 142 sizeof(_ports), _ports);
141 if (pptr == NULL) { 143 if (pptr == NULL) {
142 /* We've been asked to examine this packet, and we 144 /* We've been asked to examine this packet, and we
@@ -154,7 +156,7 @@ match_v1(const struct sk_buff *skb,
154/* Called when user tries to insert an entry of this type. */ 156/* Called when user tries to insert an entry of this type. */
155static int 157static int
156checkentry(const char *tablename, 158checkentry(const char *tablename,
157 const struct ipt_ip *ip, 159 const void *ip,
158 void *matchinfo, 160 void *matchinfo,
159 unsigned int matchsize, 161 unsigned int matchsize,
160 unsigned int hook_mask) 162 unsigned int hook_mask)
@@ -164,7 +166,7 @@ checkentry(const char *tablename,
164 166
165static int 167static int
166checkentry_v1(const char *tablename, 168checkentry_v1(const char *tablename,
167 const struct ipt_ip *ip, 169 const void *ip,
168 void *matchinfo, 170 void *matchinfo,
169 unsigned int matchsize, 171 unsigned int matchsize,
170 unsigned int hook_mask) 172 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 0cee2862ed85..4843d0c9734f 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -27,6 +27,7 @@ match(const struct sk_buff *skb,
27 const struct net_device *out, 27 const struct net_device *out,
28 const void *matchinfo, 28 const void *matchinfo,
29 int offset, 29 int offset,
30 unsigned int protoff,
30 int *hotdrop) 31 int *hotdrop)
31{ 32{
32 const struct ipt_owner_info *info = matchinfo; 33 const struct ipt_owner_info *info = matchinfo;
@@ -51,7 +52,7 @@ match(const struct sk_buff *skb,
51 52
52static int 53static int
53checkentry(const char *tablename, 54checkentry(const char *tablename,
54 const struct ipt_ip *ip, 55 const void *ip,
55 void *matchinfo, 56 void *matchinfo,
56 unsigned int matchsize, 57 unsigned int matchsize,
57 unsigned int hook_mask) 58 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
deleted file mode 100644
index 1a53924041fc..000000000000
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/* Kernel module to match the bridge port in and
2 * out device for IP packets coming into contact with a bridge. */
3
4/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/netfilter_ipv4/ipt_physdev.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_bridge.h>
16#define MATCH 1
17#define NOMATCH 0
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
21MODULE_DESCRIPTION("iptables bridge physical device match module");
22
23static int
24match(const struct sk_buff *skb,
25 const struct net_device *in,
26 const struct net_device *out,
27 const void *matchinfo,
28 int offset,
29 int *hotdrop)
30{
31 int i;
32 static const char nulldevname[IFNAMSIZ];
33 const struct ipt_physdev_info *info = matchinfo;
34 unsigned int ret;
35 const char *indev, *outdev;
36 struct nf_bridge_info *nf_bridge;
37
38 /* Not a bridged IP packet or no info available yet:
39 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
40 * the destination device will be a bridge. */
41 if (!(nf_bridge = skb->nf_bridge)) {
42 /* Return MATCH if the invert flags of the used options are on */
43 if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
44 !(info->invert & IPT_PHYSDEV_OP_BRIDGED))
45 return NOMATCH;
46 if ((info->bitmask & IPT_PHYSDEV_OP_ISIN) &&
47 !(info->invert & IPT_PHYSDEV_OP_ISIN))
48 return NOMATCH;
49 if ((info->bitmask & IPT_PHYSDEV_OP_ISOUT) &&
50 !(info->invert & IPT_PHYSDEV_OP_ISOUT))
51 return NOMATCH;
52 if ((info->bitmask & IPT_PHYSDEV_OP_IN) &&
53 !(info->invert & IPT_PHYSDEV_OP_IN))
54 return NOMATCH;
55 if ((info->bitmask & IPT_PHYSDEV_OP_OUT) &&
56 !(info->invert & IPT_PHYSDEV_OP_OUT))
57 return NOMATCH;
58 return MATCH;
59 }
60
61 /* This only makes sense in the FORWARD and POSTROUTING chains */
62 if ((info->bitmask & IPT_PHYSDEV_OP_BRIDGED) &&
63 (!!(nf_bridge->mask & BRNF_BRIDGED) ^
64 !(info->invert & IPT_PHYSDEV_OP_BRIDGED)))
65 return NOMATCH;
66
67 if ((info->bitmask & IPT_PHYSDEV_OP_ISIN &&
68 (!nf_bridge->physindev ^ !!(info->invert & IPT_PHYSDEV_OP_ISIN))) ||
69 (info->bitmask & IPT_PHYSDEV_OP_ISOUT &&
70 (!nf_bridge->physoutdev ^ !!(info->invert & IPT_PHYSDEV_OP_ISOUT))))
71 return NOMATCH;
72
73 if (!(info->bitmask & IPT_PHYSDEV_OP_IN))
74 goto match_outdev;
75 indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
76 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
77 ret |= (((const unsigned int *)indev)[i]
78 ^ ((const unsigned int *)info->physindev)[i])
79 & ((const unsigned int *)info->in_mask)[i];
80 }
81
82 if ((ret == 0) ^ !(info->invert & IPT_PHYSDEV_OP_IN))
83 return NOMATCH;
84
85match_outdev:
86 if (!(info->bitmask & IPT_PHYSDEV_OP_OUT))
87 return MATCH;
88 outdev = nf_bridge->physoutdev ?
89 nf_bridge->physoutdev->name : nulldevname;
90 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned int); i++) {
91 ret |= (((const unsigned int *)outdev)[i]
92 ^ ((const unsigned int *)info->physoutdev)[i])
93 & ((const unsigned int *)info->out_mask)[i];
94 }
95
96 return (ret != 0) ^ !(info->invert & IPT_PHYSDEV_OP_OUT);
97}
98
99static int
100checkentry(const char *tablename,
101 const struct ipt_ip *ip,
102 void *matchinfo,
103 unsigned int matchsize,
104 unsigned int hook_mask)
105{
106 const struct ipt_physdev_info *info = matchinfo;
107
108 if (matchsize != IPT_ALIGN(sizeof(struct ipt_physdev_info)))
109 return 0;
110 if (!(info->bitmask & IPT_PHYSDEV_OP_MASK) ||
111 info->bitmask & ~IPT_PHYSDEV_OP_MASK)
112 return 0;
113 return 1;
114}
115
116static struct ipt_match physdev_match = {
117 .name = "physdev",
118 .match = &match,
119 .checkentry = &checkentry,
120 .me = THIS_MODULE,
121};
122
123static int __init init(void)
124{
125 return ipt_register_match(&physdev_match);
126}
127
128static void __exit fini(void)
129{
130 ipt_unregister_match(&physdev_match);
131}
132
133module_init(init);
134module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_pkttype.c b/net/ipv4/netfilter/ipt_pkttype.c
deleted file mode 100644
index 8ddb1dc5e5ae..000000000000
--- a/net/ipv4/netfilter/ipt_pkttype.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/* (C) 1999-2001 Michal Ludvig <michal@logix.cz>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/if_ether.h>
11#include <linux/if_packet.h>
12
13#include <linux/netfilter_ipv4/ipt_pkttype.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
18MODULE_DESCRIPTION("IP tables match to match on linklayer packet type");
19
20static int match(const struct sk_buff *skb,
21 const struct net_device *in,
22 const struct net_device *out,
23 const void *matchinfo,
24 int offset,
25 int *hotdrop)
26{
27 const struct ipt_pkttype_info *info = matchinfo;
28
29 return (skb->pkt_type == info->pkttype) ^ info->invert;
30}
31
32static int checkentry(const char *tablename,
33 const struct ipt_ip *ip,
34 void *matchinfo,
35 unsigned int matchsize,
36 unsigned int hook_mask)
37{
38/*
39 if (hook_mask
40 & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
41 | (1 << NF_IP_FORWARD))) {
42 printk("ipt_pkttype: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
43 return 0;
44 }
45*/
46 if (matchsize != IPT_ALIGN(sizeof(struct ipt_pkttype_info)))
47 return 0;
48
49 return 1;
50}
51
52static struct ipt_match pkttype_match = {
53 .name = "pkttype",
54 .match = &match,
55 .checkentry = &checkentry,
56 .me = THIS_MODULE,
57};
58
59static int __init init(void)
60{
61 return ipt_register_match(&pkttype_match);
62}
63
64static void __exit fini(void)
65{
66 ipt_unregister_match(&pkttype_match);
67}
68
69module_init(init);
70module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 000000000000..18ca8258a1c5
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,173 @@
1/* IP tables module for matching IPsec policy
2 *
3 * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <net/xfrm.h>
16
17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_policy.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_DESCRIPTION("IPtables IPsec policy matching module");
23MODULE_LICENSE("GPL");
24
25
26static inline int
27match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
28{
29#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
30
31 return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
32 MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
33 MATCH(proto, x->id.proto) &&
34 MATCH(mode, x->props.mode) &&
35 MATCH(spi, x->id.spi) &&
36 MATCH(reqid, x->props.reqid);
37}
38
39static int
40match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
41{
42 const struct ipt_policy_elem *e;
43 struct sec_path *sp = skb->sp;
44 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
45 int i, pos;
46
47 if (sp == NULL)
48 return -1;
49 if (strict && info->len != sp->len)
50 return 0;
51
52 for (i = sp->len - 1; i >= 0; i--) {
53 pos = strict ? i - sp->len + 1 : 0;
54 if (pos >= info->len)
55 return 0;
56 e = &info->pol[pos];
57
58 if (match_xfrm_state(sp->x[i].xvec, e)) {
59 if (!strict)
60 return 1;
61 } else if (strict)
62 return 0;
63 }
64
65 return strict ? 1 : 0;
66}
67
68static int
69match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
70{
71 const struct ipt_policy_elem *e;
72 struct dst_entry *dst = skb->dst;
73 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
74 int i, pos;
75
76 if (dst->xfrm == NULL)
77 return -1;
78
79 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
80 pos = strict ? i : 0;
81 if (pos >= info->len)
82 return 0;
83 e = &info->pol[pos];
84
85 if (match_xfrm_state(dst->xfrm, e)) {
86 if (!strict)
87 return 1;
88 } else if (strict)
89 return 0;
90 }
91
92 return strict ? 1 : 0;
93}
94
95static int match(const struct sk_buff *skb,
96 const struct net_device *in,
97 const struct net_device *out,
98 const void *matchinfo,
99 int offset,
100 unsigned int protoff,
101 int *hotdrop)
102{
103 const struct ipt_policy_info *info = matchinfo;
104 int ret;
105
106 if (info->flags & IPT_POLICY_MATCH_IN)
107 ret = match_policy_in(skb, info);
108 else
109 ret = match_policy_out(skb, info);
110
111 if (ret < 0)
112 ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
113 else if (info->flags & IPT_POLICY_MATCH_NONE)
114 ret = 0;
115
116 return ret;
117}
118
119static int checkentry(const char *tablename, const void *ip_void,
120 void *matchinfo, unsigned int matchsize,
121 unsigned int hook_mask)
122{
123 struct ipt_policy_info *info = matchinfo;
124
125 if (matchsize != IPT_ALIGN(sizeof(*info))) {
126 printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
127 matchsize, IPT_ALIGN(sizeof(*info)));
128 return 0;
129 }
130 if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
131 printk(KERN_ERR "ipt_policy: neither incoming nor "
132 "outgoing policy selected\n");
133 return 0;
134 }
135 if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
136 && info->flags & IPT_POLICY_MATCH_OUT) {
137 printk(KERN_ERR "ipt_policy: output policy not valid in "
138 "PRE_ROUTING and INPUT\n");
139 return 0;
140 }
141 if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
142 && info->flags & IPT_POLICY_MATCH_IN) {
143 printk(KERN_ERR "ipt_policy: input policy not valid in "
144 "POST_ROUTING and OUTPUT\n");
145 return 0;
146 }
147 if (info->len > IPT_POLICY_MAX_ELEM) {
148 printk(KERN_ERR "ipt_policy: too many policy elements\n");
149 return 0;
150 }
151
152 return 1;
153}
154
155static struct ipt_match policy_match = {
156 .name = "policy",
157 .match = match,
158 .checkentry = checkentry,
159 .me = THIS_MODULE,
160};
161
162static int __init init(void)
163{
164 return ipt_register_match(&policy_match);
165}
166
167static void __exit fini(void)
168{
169 ipt_unregister_match(&policy_match);
170}
171
172module_init(init);
173module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_realm.c b/net/ipv4/netfilter/ipt_realm.c
deleted file mode 100644
index 54a6897ebaa6..000000000000
--- a/net/ipv4/netfilter/ipt_realm.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/* IP tables module for matching the routing realm
2 *
3 * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
4 *
5 * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/netdevice.h>
15#include <net/route.h>
16
17#include <linux/netfilter_ipv4/ipt_realm.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19
20MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
21MODULE_LICENSE("GPL");
22MODULE_DESCRIPTION("iptables realm match");
23
24static int
25match(const struct sk_buff *skb,
26 const struct net_device *in,
27 const struct net_device *out,
28 const void *matchinfo,
29 int offset,
30 int *hotdrop)
31{
32 const struct ipt_realm_info *info = matchinfo;
33 struct dst_entry *dst = skb->dst;
34
35 return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
36}
37
38static int check(const char *tablename,
39 const struct ipt_ip *ip,
40 void *matchinfo,
41 unsigned int matchsize,
42 unsigned int hook_mask)
43{
44 if (hook_mask
45 & ~((1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) |
46 (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN))) {
47 printk("ipt_realm: only valid for POST_ROUTING, LOCAL_OUT, "
48 "LOCAL_IN or FORWARD.\n");
49 return 0;
50 }
51 if (matchsize != IPT_ALIGN(sizeof(struct ipt_realm_info))) {
52 printk("ipt_realm: invalid matchsize.\n");
53 return 0;
54 }
55 return 1;
56}
57
58static struct ipt_match realm_match = {
59 .name = "realm",
60 .match = match,
61 .checkentry = check,
62 .me = THIS_MODULE
63};
64
65static int __init init(void)
66{
67 return ipt_register_match(&realm_match);
68}
69
70static void __exit fini(void)
71{
72 ipt_unregister_match(&realm_match);
73}
74
75module_init(init);
76module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 261cbb4d4c49..44611d6d14f5 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
24#define HASH_LOG 9 24#define HASH_LOG 9
25 25
26/* Defaults, these can be overridden on the module command-line. */ 26/* Defaults, these can be overridden on the module command-line. */
27static int ip_list_tot = 100; 27static unsigned int ip_list_tot = 100;
28static int ip_pkt_list_tot = 20; 28static unsigned int ip_pkt_list_tot = 20;
29static int ip_list_hash_size = 0; 29static unsigned int ip_list_hash_size = 0;
30static int ip_list_perms = 0644; 30static unsigned int ip_list_perms = 0644;
31#ifdef DEBUG 31#ifdef DEBUG
32static int debug = 1; 32static int debug = 1;
33#endif 33#endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. htt
38MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>"); 38MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
39MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); 39MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
40MODULE_LICENSE("GPL"); 40MODULE_LICENSE("GPL");
41module_param(ip_list_tot, int, 0400); 41module_param(ip_list_tot, uint, 0400);
42module_param(ip_pkt_list_tot, int, 0400); 42module_param(ip_pkt_list_tot, uint, 0400);
43module_param(ip_list_hash_size, int, 0400); 43module_param(ip_list_hash_size, uint, 0400);
44module_param(ip_list_perms, int, 0400); 44module_param(ip_list_perms, uint, 0400);
45#ifdef DEBUG 45#ifdef DEBUG
46module_param(debug, int, 0600); 46module_param(debug, bool, 0600);
47MODULE_PARM_DESC(debug,"debugging level, defaults to 1"); 47MODULE_PARM_DESC(debug,"enable debugging output");
48#endif 48#endif
49MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); 49MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
50MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember"); 50MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
@@ -104,6 +104,7 @@ match(const struct sk_buff *skb,
104 const struct net_device *out, 104 const struct net_device *out,
105 const void *matchinfo, 105 const void *matchinfo,
106 int offset, 106 int offset,
107 unsigned int protoff,
107 int *hotdrop); 108 int *hotdrop);
108 109
109/* Function to hash a given address into the hash table of table_size size */ 110/* Function to hash a given address into the hash table of table_size size */
@@ -317,7 +318,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
317 skb->nh.iph->daddr = 0; 318 skb->nh.iph->daddr = 0;
318 /* Clear ttl since we have no way of knowing it */ 319 /* Clear ttl since we have no way of knowing it */
319 skb->nh.iph->ttl = 0; 320 skb->nh.iph->ttl = 0;
320 match(skb,NULL,NULL,info,0,NULL); 321 match(skb,NULL,NULL,info,0,0,NULL);
321 322
322 kfree(skb->nh.iph); 323 kfree(skb->nh.iph);
323out_free_skb: 324out_free_skb:
@@ -357,6 +358,7 @@ match(const struct sk_buff *skb,
357 const struct net_device *out, 358 const struct net_device *out,
358 const void *matchinfo, 359 const void *matchinfo,
359 int offset, 360 int offset,
361 unsigned int protoff,
360 int *hotdrop) 362 int *hotdrop)
361{ 363{
362 int pkt_count, hits_found, ans; 364 int pkt_count, hits_found, ans;
@@ -654,7 +656,7 @@ match(const struct sk_buff *skb,
654 */ 656 */
655static int 657static int
656checkentry(const char *tablename, 658checkentry(const char *tablename,
657 const struct ipt_ip *ip, 659 const void *ip,
658 void *matchinfo, 660 void *matchinfo,
659 unsigned int matchsize, 661 unsigned int matchsize,
660 unsigned int hook_mask) 662 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_sctp.c b/net/ipv4/netfilter/ipt_sctp.c
deleted file mode 100644
index fe2b327bcaa4..000000000000
--- a/net/ipv4/netfilter/ipt_sctp.c
+++ /dev/null
@@ -1,203 +0,0 @@
1#include <linux/module.h>
2#include <linux/skbuff.h>
3#include <net/ip.h>
4#include <linux/sctp.h>
5
6#include <linux/netfilter_ipv4/ip_tables.h>
7#include <linux/netfilter_ipv4/ipt_sctp.h>
8
9#ifdef DEBUG_SCTP
10#define duprintf(format, args...) printk(format , ## args)
11#else
12#define duprintf(format, args...)
13#endif
14
15#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
16 || (!!((invflag) & (option)) ^ (cond)))
17
18static int
19match_flags(const struct ipt_sctp_flag_info *flag_info,
20 const int flag_count,
21 u_int8_t chunktype,
22 u_int8_t chunkflags)
23{
24 int i;
25
26 for (i = 0; i < flag_count; i++) {
27 if (flag_info[i].chunktype == chunktype) {
28 return (chunkflags & flag_info[i].flag_mask) == flag_info[i].flag;
29 }
30 }
31
32 return 1;
33}
34
35static int
36match_packet(const struct sk_buff *skb,
37 const u_int32_t *chunkmap,
38 int chunk_match_type,
39 const struct ipt_sctp_flag_info *flag_info,
40 const int flag_count,
41 int *hotdrop)
42{
43 int offset;
44 u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
45 sctp_chunkhdr_t _sch, *sch;
46
47#ifdef DEBUG_SCTP
48 int i = 0;
49#endif
50
51 if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) {
52 SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap);
53 }
54
55 offset = skb->nh.iph->ihl * 4 + sizeof (sctp_sctphdr_t);
56 do {
57 sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
58 if (sch == NULL) {
59 duprintf("Dropping invalid SCTP packet.\n");
60 *hotdrop = 1;
61 return 0;
62 }
63
64 duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
65 ++i, offset, sch->type, htons(sch->length), sch->flags);
66
67 offset += (htons(sch->length) + 3) & ~3;
68
69 duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
70
71 if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) {
72 switch (chunk_match_type) {
73 case SCTP_CHUNK_MATCH_ANY:
74 if (match_flags(flag_info, flag_count,
75 sch->type, sch->flags)) {
76 return 1;
77 }
78 break;
79
80 case SCTP_CHUNK_MATCH_ALL:
81 if (match_flags(flag_info, flag_count,
82 sch->type, sch->flags)) {
83 SCTP_CHUNKMAP_CLEAR(chunkmapcopy, sch->type);
84 }
85 break;
86
87 case SCTP_CHUNK_MATCH_ONLY:
88 if (!match_flags(flag_info, flag_count,
89 sch->type, sch->flags)) {
90 return 0;
91 }
92 break;
93 }
94 } else {
95 switch (chunk_match_type) {
96 case SCTP_CHUNK_MATCH_ONLY:
97 return 0;
98 }
99 }
100 } while (offset < skb->len);
101
102 switch (chunk_match_type) {
103 case SCTP_CHUNK_MATCH_ALL:
104 return SCTP_CHUNKMAP_IS_CLEAR(chunkmap);
105 case SCTP_CHUNK_MATCH_ANY:
106 return 0;
107 case SCTP_CHUNK_MATCH_ONLY:
108 return 1;
109 }
110
111 /* This will never be reached, but required to stop compiler whine */
112 return 0;
113}
114
115static int
116match(const struct sk_buff *skb,
117 const struct net_device *in,
118 const struct net_device *out,
119 const void *matchinfo,
120 int offset,
121 int *hotdrop)
122{
123 const struct ipt_sctp_info *info;
124 sctp_sctphdr_t _sh, *sh;
125
126 info = (const struct ipt_sctp_info *)matchinfo;
127
128 if (offset) {
129 duprintf("Dropping non-first fragment.. FIXME\n");
130 return 0;
131 }
132
133 sh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_sh), &_sh);
134 if (sh == NULL) {
135 duprintf("Dropping evil TCP offset=0 tinygram.\n");
136 *hotdrop = 1;
137 return 0;
138 }
139 duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
140
141 return SCCHECK(((ntohs(sh->source) >= info->spts[0])
142 && (ntohs(sh->source) <= info->spts[1])),
143 IPT_SCTP_SRC_PORTS, info->flags, info->invflags)
144 && SCCHECK(((ntohs(sh->dest) >= info->dpts[0])
145 && (ntohs(sh->dest) <= info->dpts[1])),
146 IPT_SCTP_DEST_PORTS, info->flags, info->invflags)
147 && SCCHECK(match_packet(skb, info->chunkmap, info->chunk_match_type,
148 info->flag_info, info->flag_count,
149 hotdrop),
150 IPT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
151}
152
153static int
154checkentry(const char *tablename,
155 const struct ipt_ip *ip,
156 void *matchinfo,
157 unsigned int matchsize,
158 unsigned int hook_mask)
159{
160 const struct ipt_sctp_info *info;
161
162 info = (const struct ipt_sctp_info *)matchinfo;
163
164 return ip->proto == IPPROTO_SCTP
165 && !(ip->invflags & IPT_INV_PROTO)
166 && matchsize == IPT_ALIGN(sizeof(struct ipt_sctp_info))
167 && !(info->flags & ~IPT_SCTP_VALID_FLAGS)
168 && !(info->invflags & ~IPT_SCTP_VALID_FLAGS)
169 && !(info->invflags & ~info->flags)
170 && ((!(info->flags & IPT_SCTP_CHUNK_TYPES)) ||
171 (info->chunk_match_type &
172 (SCTP_CHUNK_MATCH_ALL
173 | SCTP_CHUNK_MATCH_ANY
174 | SCTP_CHUNK_MATCH_ONLY)));
175}
176
177static struct ipt_match sctp_match =
178{
179 .list = { NULL, NULL},
180 .name = "sctp",
181 .match = &match,
182 .checkentry = &checkentry,
183 .destroy = NULL,
184 .me = THIS_MODULE
185};
186
187static int __init init(void)
188{
189 return ipt_register_match(&sctp_match);
190}
191
192static void __exit fini(void)
193{
194 ipt_unregister_match(&sctp_match);
195}
196
197module_init(init);
198module_exit(fini);
199
200MODULE_LICENSE("GPL");
201MODULE_AUTHOR("Kiran Kumar Immidi");
202MODULE_DESCRIPTION("Match for SCTP protocol packets");
203
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
deleted file mode 100644
index 4d7f16b70cec..000000000000
--- a/net/ipv4/netfilter/ipt_state.c
+++ /dev/null
@@ -1,74 +0,0 @@
1/* Kernel module to match connection tracking information. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <net/netfilter/nf_conntrack_compat.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_state.h>
16
17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
19MODULE_DESCRIPTION("iptables connection tracking state match module");
20
21static int
22match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const void *matchinfo,
26 int offset,
27 int *hotdrop)
28{
29 const struct ipt_state_info *sinfo = matchinfo;
30 enum ip_conntrack_info ctinfo;
31 unsigned int statebit;
32
33 if (nf_ct_is_untracked(skb))
34 statebit = IPT_STATE_UNTRACKED;
35 else if (!nf_ct_get_ctinfo(skb, &ctinfo))
36 statebit = IPT_STATE_INVALID;
37 else
38 statebit = IPT_STATE_BIT(ctinfo);
39
40 return (sinfo->statemask & statebit);
41}
42
43static int check(const char *tablename,
44 const struct ipt_ip *ip,
45 void *matchinfo,
46 unsigned int matchsize,
47 unsigned int hook_mask)
48{
49 if (matchsize != IPT_ALIGN(sizeof(struct ipt_state_info)))
50 return 0;
51
52 return 1;
53}
54
55static struct ipt_match state_match = {
56 .name = "state",
57 .match = &match,
58 .checkentry = &check,
59 .me = THIS_MODULE,
60};
61
62static int __init init(void)
63{
64 need_ip_conntrack();
65 return ipt_register_match(&state_match);
66}
67
68static void __exit fini(void)
69{
70 ipt_unregister_match(&state_match);
71}
72
73module_init(init);
74module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c
deleted file mode 100644
index b5def204d798..000000000000
--- a/net/ipv4/netfilter/ipt_string.c
+++ /dev/null
@@ -1,91 +0,0 @@
1/* String matching match for iptables
2 *
3 * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/kernel.h>
13#include <linux/skbuff.h>
14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_string.h>
16#include <linux/textsearch.h>
17
18MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
19MODULE_DESCRIPTION("IP tables string match module");
20MODULE_LICENSE("GPL");
21
22static int match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const void *matchinfo,
26 int offset,
27 int *hotdrop)
28{
29 struct ts_state state;
30 struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
31
32 memset(&state, 0, sizeof(struct ts_state));
33
34 return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
35 conf->to_offset, conf->config, &state)
36 != UINT_MAX) && !conf->invert;
37}
38
39#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
40
41static int checkentry(const char *tablename,
42 const struct ipt_ip *ip,
43 void *matchinfo,
44 unsigned int matchsize,
45 unsigned int hook_mask)
46{
47 struct ipt_string_info *conf = matchinfo;
48 struct ts_config *ts_conf;
49
50 if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
51 return 0;
52
53 /* Damn, can't handle this case properly with iptables... */
54 if (conf->from_offset > conf->to_offset)
55 return 0;
56
57 ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
58 GFP_KERNEL, TS_AUTOLOAD);
59 if (IS_ERR(ts_conf))
60 return 0;
61
62 conf->config = ts_conf;
63
64 return 1;
65}
66
67static void destroy(void *matchinfo, unsigned int matchsize)
68{
69 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
70}
71
72static struct ipt_match string_match = {
73 .name = "string",
74 .match = match,
75 .checkentry = checkentry,
76 .destroy = destroy,
77 .me = THIS_MODULE
78};
79
80static int __init init(void)
81{
82 return ipt_register_match(&string_match);
83}
84
85static void __exit fini(void)
86{
87 ipt_unregister_match(&string_match);
88}
89
90module_init(init);
91module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tcpmss.c b/net/ipv4/netfilter/ipt_tcpmss.c
deleted file mode 100644
index 4dc9b16ab4a3..000000000000
--- a/net/ipv4/netfilter/ipt_tcpmss.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/* Kernel module to match TCP MSS values. */
2
3/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <net/tcp.h>
13
14#include <linux/netfilter_ipv4/ipt_tcpmss.h>
15#include <linux/netfilter_ipv4/ip_tables.h>
16
17#define TH_SYN 0x02
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
21MODULE_DESCRIPTION("iptables TCP MSS match module");
22
23/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
24static inline int
25mssoption_match(u_int16_t min, u_int16_t max,
26 const struct sk_buff *skb,
27 int invert,
28 int *hotdrop)
29{
30 struct tcphdr _tcph, *th;
31 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
32 u8 _opt[15 * 4 - sizeof(_tcph)], *op;
33 unsigned int i, optlen;
34
35 /* If we don't have the whole header, drop packet. */
36 th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
37 sizeof(_tcph), &_tcph);
38 if (th == NULL)
39 goto dropit;
40
41 /* Malformed. */
42 if (th->doff*4 < sizeof(*th))
43 goto dropit;
44
45 optlen = th->doff*4 - sizeof(*th);
46 if (!optlen)
47 goto out;
48
49 /* Truncated options. */
50 op = skb_header_pointer(skb, skb->nh.iph->ihl * 4 + sizeof(*th),
51 optlen, _opt);
52 if (op == NULL)
53 goto dropit;
54
55 for (i = 0; i < optlen; ) {
56 if (op[i] == TCPOPT_MSS
57 && (optlen - i) >= TCPOLEN_MSS
58 && op[i+1] == TCPOLEN_MSS) {
59 u_int16_t mssval;
60
61 mssval = (op[i+2] << 8) | op[i+3];
62
63 return (mssval >= min && mssval <= max) ^ invert;
64 }
65 if (op[i] < 2) i++;
66 else i += op[i+1]?:1;
67 }
68out:
69 return invert;
70
71 dropit:
72 *hotdrop = 1;
73 return 0;
74}
75
76static int
77match(const struct sk_buff *skb,
78 const struct net_device *in,
79 const struct net_device *out,
80 const void *matchinfo,
81 int offset,
82 int *hotdrop)
83{
84 const struct ipt_tcpmss_match_info *info = matchinfo;
85
86 return mssoption_match(info->mss_min, info->mss_max, skb,
87 info->invert, hotdrop);
88}
89
90static int
91checkentry(const char *tablename,
92 const struct ipt_ip *ip,
93 void *matchinfo,
94 unsigned int matchsize,
95 unsigned int hook_mask)
96{
97 if (matchsize != IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)))
98 return 0;
99
100 /* Must specify -p tcp */
101 if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
102 printk("tcpmss: Only works on TCP packets\n");
103 return 0;
104 }
105
106 return 1;
107}
108
109static struct ipt_match tcpmss_match = {
110 .name = "tcpmss",
111 .match = &match,
112 .checkentry = &checkentry,
113 .me = THIS_MODULE,
114};
115
116static int __init init(void)
117{
118 return ipt_register_match(&tcpmss_match);
119}
120
121static void __exit fini(void)
122{
123 ipt_unregister_match(&tcpmss_match);
124}
125
126module_init(init);
127module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 086a1bb61e3e..9ab765e126f2 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -23,6 +23,7 @@ match(const struct sk_buff *skb,
23 const struct net_device *out, 23 const struct net_device *out,
24 const void *matchinfo, 24 const void *matchinfo,
25 int offset, 25 int offset,
26 unsigned int protoff,
26 int *hotdrop) 27 int *hotdrop)
27{ 28{
28 const struct ipt_tos_info *info = matchinfo; 29 const struct ipt_tos_info *info = matchinfo;
@@ -32,7 +33,7 @@ match(const struct sk_buff *skb,
32 33
33static int 34static int
34checkentry(const char *tablename, 35checkentry(const char *tablename,
35 const struct ipt_ip *ip, 36 const void *ip,
36 void *matchinfo, 37 void *matchinfo,
37 unsigned int matchsize, 38 unsigned int matchsize,
38 unsigned int hook_mask) 39 unsigned int hook_mask)
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 219aa9de88cc..82da53f430ab 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -21,7 +21,7 @@ MODULE_LICENSE("GPL");
21 21
22static int match(const struct sk_buff *skb, const struct net_device *in, 22static int match(const struct sk_buff *skb, const struct net_device *in,
23 const struct net_device *out, const void *matchinfo, 23 const struct net_device *out, const void *matchinfo,
24 int offset, int *hotdrop) 24 int offset, unsigned int protoff, int *hotdrop)
25{ 25{
26 const struct ipt_ttl_info *info = matchinfo; 26 const struct ipt_ttl_info *info = matchinfo;
27 27
@@ -47,7 +47,7 @@ static int match(const struct sk_buff *skb, const struct net_device *in,
47 return 0; 47 return 0;
48} 48}
49 49
50static int checkentry(const char *tablename, const struct ipt_ip *ip, 50static int checkentry(const char *tablename, const void *ip,
51 void *matchinfo, unsigned int matchsize, 51 void *matchinfo, unsigned int matchsize,
52 unsigned int hook_mask) 52 unsigned int hook_mask)
53{ 53{
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 260a4f0a2a90..212a3079085b 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -78,7 +78,8 @@ static struct ipt_table packet_filter = {
78 .name = "filter", 78 .name = "filter",
79 .valid_hooks = FILTER_VALID_HOOKS, 79 .valid_hooks = FILTER_VALID_HOOKS,
80 .lock = RW_LOCK_UNLOCKED, 80 .lock = RW_LOCK_UNLOCKED,
81 .me = THIS_MODULE 81 .me = THIS_MODULE,
82 .af = AF_INET,
82}; 83};
83 84
84/* The work comes in here from netfilter.c. */ 85/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 160eb11b6e2f..3212a5cc4b6b 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -109,6 +109,7 @@ static struct ipt_table packet_mangler = {
109 .valid_hooks = MANGLE_VALID_HOOKS, 109 .valid_hooks = MANGLE_VALID_HOOKS,
110 .lock = RW_LOCK_UNLOCKED, 110 .lock = RW_LOCK_UNLOCKED,
111 .me = THIS_MODULE, 111 .me = THIS_MODULE,
112 .af = AF_INET,
112}; 113};
113 114
114/* The work comes in here from netfilter.c. */ 115/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 47449ba83eb9..fdb9e9c81e81 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -83,7 +83,8 @@ static struct ipt_table packet_raw = {
83 .name = "raw", 83 .name = "raw",
84 .valid_hooks = RAW_VALID_HOOKS, 84 .valid_hooks = RAW_VALID_HOOKS,
85 .lock = RW_LOCK_UNLOCKED, 85 .lock = RW_LOCK_UNLOCKED,
86 .me = THIS_MODULE 86 .me = THIS_MODULE,
87 .af = AF_INET,
87}; 88};
88 89
89/* The work comes in here from netfilter.c. */ 90/* The work comes in here from netfilter.c. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afad..167619f638c6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/icmp.h> 23#include <linux/icmp.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <net/route.h>
25#include <net/ip.h> 26#include <net/ip.h>
26 27
27#include <linux/netfilter_ipv4.h> 28#include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
180 return NF_ACCEPT; 181 return NF_ACCEPT;
181} 182}
182 183
183static unsigned int ipv4_refrag(unsigned int hooknum,
184 struct sk_buff **pskb,
185 const struct net_device *in,
186 const struct net_device *out,
187 int (*okfn)(struct sk_buff *))
188{
189 struct rtable *rt = (struct rtable *)(*pskb)->dst;
190
191 /* We've seen it coming out the other side: confirm */
192 if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
193 return NF_DROP;
194
195 /* Local packets are never produced too large for their
196 interface. We degfragment them at LOCAL_OUT, however,
197 so we have to refragment them here. */
198 if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
199 !skb_shinfo(*pskb)->tso_size) {
200 /* No hook can be after us, so this should be OK. */
201 ip_fragment(*pskb, okfn);
202 return NF_STOLEN;
203 }
204 return NF_ACCEPT;
205}
206
207static unsigned int ipv4_conntrack_in(unsigned int hooknum, 184static unsigned int ipv4_conntrack_in(unsigned int hooknum,
208 struct sk_buff **pskb, 185 struct sk_buff **pskb,
209 const struct net_device *in, 186 const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
283 260
284/* Refragmenter; last chance. */ 261/* Refragmenter; last chance. */
285static struct nf_hook_ops ipv4_conntrack_out_ops = { 262static struct nf_hook_ops ipv4_conntrack_out_ops = {
286 .hook = ipv4_refrag, 263 .hook = ipv4_confirm,
287 .owner = THIS_MODULE, 264 .owner = THIS_MODULE,
288 .pf = PF_INET, 265 .pf = PF_INET,
289 .hooknum = NF_IP_POST_ROUTING, 266 .hooknum = NF_IP_POST_ROUTING,
@@ -300,7 +277,7 @@ static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
300 277
301#ifdef CONFIG_SYSCTL 278#ifdef CONFIG_SYSCTL
302/* From nf_conntrack_proto_icmp.c */ 279/* From nf_conntrack_proto_icmp.c */
303extern unsigned long nf_ct_icmp_timeout; 280extern unsigned int nf_ct_icmp_timeout;
304static struct ctl_table_header *nf_ct_ipv4_sysctl_header; 281static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
305 282
306static ctl_table nf_ct_sysctl_table[] = { 283static ctl_table nf_ct_sysctl_table[] = {
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
392 return -ENOENT; 369 return -ENOENT;
393} 370}
394 371
372#if defined(CONFIG_NF_CT_NETLINK) || \
373 defined(CONFIG_NF_CT_NETLINK_MODULE)
374
375#include <linux/netfilter/nfnetlink.h>
376#include <linux/netfilter/nfnetlink_conntrack.h>
377
378static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
379 const struct nf_conntrack_tuple *tuple)
380{
381 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
382 &tuple->src.u3.ip);
383 NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
384 &tuple->dst.u3.ip);
385 return 0;
386
387nfattr_failure:
388 return -1;
389}
390
391static const size_t cta_min_ip[CTA_IP_MAX] = {
392 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
393 [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
394};
395
396static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
397 struct nf_conntrack_tuple *t)
398{
399 if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
400 return -EINVAL;
401
402 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
403 return -EINVAL;
404
405 t->src.u3.ip =
406 *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
407 t->dst.u3.ip =
408 *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
409
410 return 0;
411}
412#endif
413
395static struct nf_sockopt_ops so_getorigdst = { 414static struct nf_sockopt_ops so_getorigdst = {
396 .pf = PF_INET, 415 .pf = PF_INET,
397 .get_optmin = SO_ORIGINAL_DST, 416 .get_optmin = SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
408 .print_conntrack = ipv4_print_conntrack, 427 .print_conntrack = ipv4_print_conntrack,
409 .prepare = ipv4_prepare, 428 .prepare = ipv4_prepare,
410 .get_features = ipv4_get_features, 429 .get_features = ipv4_get_features,
430#if defined(CONFIG_NF_CT_NETLINK) || \
431 defined(CONFIG_NF_CT_NETLINK_MODULE)
432 .tuple_to_nfattr = ipv4_tuple_to_nfattr,
433 .nfattr_to_tuple = ipv4_nfattr_to_tuple,
434#endif
411 .me = THIS_MODULE, 435 .me = THIS_MODULE,
412}; 436};
413 437
@@ -551,7 +575,7 @@ MODULE_LICENSE("GPL");
551 575
552static int __init init(void) 576static int __init init(void)
553{ 577{
554 need_nf_conntrack(); 578 need_conntrack();
555 return init_or_cleanup(1); 579 return init_or_cleanup(1);
556} 580}
557 581
@@ -563,9 +587,4 @@ static void __exit fini(void)
563module_init(init); 587module_init(init);
564module_exit(fini); 588module_exit(fini);
565 589
566void need_ip_conntrack(void)
567{
568}
569
570EXPORT_SYMBOL(need_ip_conntrack);
571EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); 590EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b8..52dc175be39a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
50 return 1; 50 return 1;
51} 51}
52 52
53/* Add 1; spaces filled with 0. */
54static const u_int8_t invmap[] = {
55 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
56 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
57 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
58 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
59 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
60 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
61 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
62 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
63};
64
53static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, 65static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
54 const struct nf_conntrack_tuple *orig) 66 const struct nf_conntrack_tuple *orig)
55{ 67{
56 /* Add 1; spaces filled with 0. */
57 static u_int8_t invmap[]
58 = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
59 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
60 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
61 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
62 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
63 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
64 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
65 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
66
67 if (orig->dst.u.icmp.type >= sizeof(invmap) 68 if (orig->dst.u.icmp.type >= sizeof(invmap)
68 || !invmap[orig->dst.u.icmp.type]) 69 || !invmap[orig->dst.u.icmp.type])
69 return 0; 70 return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
120static int icmp_new(struct nf_conn *conntrack, 121static int icmp_new(struct nf_conn *conntrack,
121 const struct sk_buff *skb, unsigned int dataoff) 122 const struct sk_buff *skb, unsigned int dataoff)
122{ 123{
123 static u_int8_t valid_new[] 124 static const u_int8_t valid_new[] = {
124 = { [ICMP_ECHO] = 1, 125 [ICMP_ECHO] = 1,
125 [ICMP_TIMESTAMP] = 1, 126 [ICMP_TIMESTAMP] = 1,
126 [ICMP_INFO_REQUEST] = 1, 127 [ICMP_INFO_REQUEST] = 1,
127 [ICMP_ADDRESS] = 1 }; 128 [ICMP_ADDRESS] = 1
129 };
128 130
129 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 131 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
130 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 132 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
168 return -NF_ACCEPT; 170 return -NF_ACCEPT;
169 } 171 }
170 172
171 innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); 173 innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
172 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); 174 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
173 /* Are they talking about one of our connections? */ 175 /* Are they talking about one of our connections? */
174 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, 176 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
281 return icmp_error_message(skb, ctinfo, hooknum); 283 return icmp_error_message(skb, ctinfo, hooknum);
282} 284}
283 285
286#if defined(CONFIG_NF_CT_NETLINK) || \
287 defined(CONFIG_NF_CT_NETLINK_MODULE)
288
289#include <linux/netfilter/nfnetlink.h>
290#include <linux/netfilter/nfnetlink_conntrack.h>
291
292static int icmp_tuple_to_nfattr(struct sk_buff *skb,
293 const struct nf_conntrack_tuple *t)
294{
295 NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
296 &t->src.u.icmp.id);
297 NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
298 &t->dst.u.icmp.type);
299 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
300 &t->dst.u.icmp.code);
301
302 return 0;
303
304nfattr_failure:
305 return -1;
306}
307
308static const size_t cta_min_proto[CTA_PROTO_MAX] = {
309 [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
310 [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
311 [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t)
312};
313
314static int icmp_nfattr_to_tuple(struct nfattr *tb[],
315 struct nf_conntrack_tuple *tuple)
316{
317 if (!tb[CTA_PROTO_ICMP_TYPE-1]
318 || !tb[CTA_PROTO_ICMP_CODE-1]
319 || !tb[CTA_PROTO_ICMP_ID-1])
320 return -EINVAL;
321
322 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
323 return -EINVAL;
324
325 tuple->dst.u.icmp.type =
326 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
327 tuple->dst.u.icmp.code =
328 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
329 tuple->src.u.icmp.id =
330 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
331
332 if (tuple->dst.u.icmp.type >= sizeof(invmap)
333 || !invmap[tuple->dst.u.icmp.type])
334 return -EINVAL;
335
336 return 0;
337}
338#endif
339
284struct nf_conntrack_protocol nf_conntrack_protocol_icmp = 340struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
285{ 341{
286 .list = { NULL, NULL }, 342 .list = { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
295 .new = icmp_new, 351 .new = icmp_new,
296 .error = icmp_error, 352 .error = icmp_error,
297 .destroy = NULL, 353 .destroy = NULL,
298 .me = NULL 354 .me = NULL,
355#if defined(CONFIG_NF_CT_NETLINK) || \
356 defined(CONFIG_NF_CT_NETLINK_MODULE)
357 .tuple_to_nfattr = icmp_tuple_to_nfattr,
358 .nfattr_to_tuple = icmp_nfattr_to_tuple,
359#endif
299}; 360};
300 361
301EXPORT_SYMBOL(nf_conntrack_protocol_icmp); 362EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db46..39d49dc333a7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
38#include <net/protocol.h> 38#include <net/protocol.h>
39#include <net/tcp.h> 39#include <net/tcp.h>
40#include <net/udp.h> 40#include <net/udp.h>
41#include <linux/inetdevice.h>
41#include <linux/proc_fs.h> 42#include <linux/proc_fs.h>
42#include <linux/seq_file.h> 43#include <linux/seq_file.h>
43#include <net/sock.h> 44#include <net/sock.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d6269..f29a12da5109 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -40,12 +40,12 @@
40 */ 40 */
41 41
42#include <linux/config.h> 42#include <linux/config.h>
43#include <linux/types.h>
43#include <asm/atomic.h> 44#include <asm/atomic.h>
44#include <asm/byteorder.h> 45#include <asm/byteorder.h>
45#include <asm/current.h> 46#include <asm/current.h>
46#include <asm/uaccess.h> 47#include <asm/uaccess.h>
47#include <asm/ioctls.h> 48#include <asm/ioctls.h>
48#include <linux/types.h>
49#include <linux/stddef.h> 49#include <linux/stddef.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/errno.h> 51#include <linux/errno.h>
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
255 kfree_skb(skb); 255 kfree_skb(skb);
256 return NET_RX_DROP; 256 return NET_RX_DROP;
257 } 257 }
258 nf_reset(skb);
258 259
259 skb_push(skb, skb->data - skb->nh.raw); 260 skb_push(skb, skb->data - skb->nh.raw);
260 261
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f701a136a6ae..d82c242ea704 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,9 +240,9 @@ static unsigned rt_hash_mask;
240static int rt_hash_log; 240static int rt_hash_log;
241static unsigned int rt_hash_rnd; 241static unsigned int rt_hash_rnd;
242 242
243static struct rt_cache_stat *rt_cache_stat; 243static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
244#define RT_CACHE_STAT_INC(field) \ 244#define RT_CACHE_STAT_INC(field) \
245 (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) 245 (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++)
246 246
247static int rt_intern_hash(unsigned hash, struct rtable *rth, 247static int rt_intern_hash(unsigned hash, struct rtable *rth,
248 struct rtable **res); 248 struct rtable **res);
@@ -401,7 +401,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
401 if (!cpu_possible(cpu)) 401 if (!cpu_possible(cpu))
402 continue; 402 continue;
403 *pos = cpu+1; 403 *pos = cpu+1;
404 return per_cpu_ptr(rt_cache_stat, cpu); 404 return &per_cpu(rt_cache_stat, cpu);
405 } 405 }
406 return NULL; 406 return NULL;
407} 407}
@@ -414,7 +414,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
414 if (!cpu_possible(cpu)) 414 if (!cpu_possible(cpu))
415 continue; 415 continue;
416 *pos = cpu+1; 416 *pos = cpu+1;
417 return per_cpu_ptr(rt_cache_stat, cpu); 417 return &per_cpu(rt_cache_stat, cpu);
418 } 418 }
419 return NULL; 419 return NULL;
420 420
@@ -3160,10 +3160,6 @@ int __init ip_rt_init(void)
3160 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3160 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3161 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3161 ip_rt_max_size = (rt_hash_mask + 1) * 16;
3162 3162
3163 rt_cache_stat = alloc_percpu(struct rt_cache_stat);
3164 if (!rt_cache_stat)
3165 return -ENOMEM;
3166
3167 devinet_init(); 3163 devinet_init();
3168 ip_fib_init(); 3164 ip_fib_init();
3169 3165
@@ -3191,7 +3187,6 @@ int __init ip_rt_init(void)
3191 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || 3187 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
3192 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 3188 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
3193 proc_net_stat))) { 3189 proc_net_stat))) {
3194 free_percpu(rt_cache_stat);
3195 return -ENOMEM; 3190 return -ENOMEM;
3196 } 3191 }
3197 rtstat_pde->proc_fops = &rt_cpu_seq_fops; 3192 rtstat_pde->proc_fops = &rt_cpu_seq_fops;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a1..e20be3331f67 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
173 struct request_sock *req, 173 struct request_sock *req,
174 struct dst_entry *dst) 174 struct dst_entry *dst)
175{ 175{
176 struct tcp_sock *tp = tcp_sk(sk); 176 struct inet_connection_sock *icsk = inet_csk(sk);
177 struct sock *child; 177 struct sock *child;
178 178
179 child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); 179 child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
180 if (child) 180 if (child)
181 inet_csk_reqsk_queue_add(sk, req, child); 181 inet_csk_reqsk_queue_add(sk, req, child);
182 else 182 else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48b..16984d4a8a06 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
12#include <linux/sysctl.h> 12#include <linux/sysctl.h>
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/igmp.h> 14#include <linux/igmp.h>
15#include <linux/inetdevice.h>
15#include <net/snmp.h> 16#include <net/snmp.h>
16#include <net/icmp.h> 17#include <net/icmp.h>
17#include <net/ip.h> 18#include <net/ip.h>
@@ -22,6 +23,7 @@
22extern int sysctl_ip_nonlocal_bind; 23extern int sysctl_ip_nonlocal_bind;
23 24
24#ifdef CONFIG_SYSCTL 25#ifdef CONFIG_SYSCTL
26static int zero;
25static int tcp_retr1_max = 255; 27static int tcp_retr1_max = 255;
26static int ip_local_port_range_min[] = { 1, 1 }; 28static int ip_local_port_range_min[] = { 1, 1 };
27static int ip_local_port_range_max[] = { 65535, 65535 }; 29static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
614 .strategy = &sysctl_jiffies 616 .strategy = &sysctl_jiffies
615 }, 617 },
616 { 618 {
619 .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
620 .procname = "ipfrag_max_dist",
621 .data = &sysctl_ipfrag_max_dist,
622 .maxlen = sizeof(int),
623 .mode = 0644,
624 .proc_handler = &proc_dointvec_minmax,
625 .extra1 = &zero
626 },
627 {
617 .ctl_name = NET_TCP_NO_METRICS_SAVE, 628 .ctl_name = NET_TCP_NO_METRICS_SAVE,
618 .procname = "tcp_no_metrics_save", 629 .procname = "tcp_no_metrics_save",
619 .data = &sysctl_tcp_nometrics_save, 630 .data = &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56d..00aa80e93243 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
1696 int err = 0; 1696 int err = 0;
1697 1697
1698 if (level != SOL_TCP) 1698 if (level != SOL_TCP)
1699 return tp->af_specific->setsockopt(sk, level, optname, 1699 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
1700 optval, optlen); 1700 optval, optlen);
1701 1701
1702 /* This is a string value all the others are int's */ 1702 /* This is a string value all the others are int's */
1703 if (optname == TCP_CONGESTION) { 1703 if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
1914 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); 1914 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
1915 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); 1915 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
1916 1916
1917 info->tcpi_pmtu = tp->pmtu_cookie; 1917 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
1918 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; 1918 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
1919 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; 1919 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
1920 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; 1920 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
1939 int val, len; 1939 int val, len;
1940 1940
1941 if (level != SOL_TCP) 1941 if (level != SOL_TCP)
1942 return tp->af_specific->getsockopt(sk, level, optname, 1942 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
1943 optval, optlen); 1943 optval, optlen);
1944 1944
1945 if (get_user(len, optlen)) 1945 if (get_user(len, optlen))
1946 return -EFAULT; 1946 return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b1..035f2092d73a 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
30static int max_increment = 16; 30static int max_increment = 16;
31static int low_window = 14; 31static int low_window = 14;
32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ 32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
33static int low_utilization_threshold = 153;
34static int low_utilization_period = 2;
35static int initial_ssthresh = 100; 33static int initial_ssthresh = 100;
36static int smooth_part = 20; 34static int smooth_part = 20;
37 35
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
43MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)"); 41MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
44module_param(beta, int, 0644); 42module_param(beta, int, 0644);
45MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 43MODULE_PARM_DESC(beta, "beta for multiplicative increase");
46module_param(low_utilization_threshold, int, 0644);
47MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
48module_param(low_utilization_period, int, 0644);
49MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
50module_param(initial_ssthresh, int, 0644); 44module_param(initial_ssthresh, int, 0644);
51MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 45MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
52module_param(smooth_part, int, 0644); 46module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
60 u32 loss_cwnd; /* congestion window at last loss */ 54 u32 loss_cwnd; /* congestion window at last loss */
61 u32 last_cwnd; /* the last snd_cwnd */ 55 u32 last_cwnd; /* the last snd_cwnd */
62 u32 last_time; /* time when updated last_cwnd */ 56 u32 last_time; /* time when updated last_cwnd */
63 u32 delay_min; /* min delay */
64 u32 delay_max; /* max delay */
65 u32 last_delay;
66 u8 low_utilization;/* 0: high; 1: low */
67 u32 low_utilization_start; /* starting time of low utilization detection*/
68 u32 epoch_start; /* beginning of an epoch */ 57 u32 epoch_start; /* beginning of an epoch */
69#define ACK_RATIO_SHIFT 4 58#define ACK_RATIO_SHIFT 4
70 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ 59 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
77 ca->loss_cwnd = 0; 66 ca->loss_cwnd = 0;
78 ca->last_cwnd = 0; 67 ca->last_cwnd = 0;
79 ca->last_time = 0; 68 ca->last_time = 0;
80 ca->delay_min = 0;
81 ca->delay_max = 0;
82 ca->last_delay = 0;
83 ca->low_utilization = 0;
84 ca->low_utilization_start = 0;
85 ca->epoch_start = 0; 69 ca->epoch_start = 0;
86 ca->delayed_ack = 2 << ACK_RATIO_SHIFT; 70 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
87} 71}
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
143 } 127 }
144 128
145 /* if in slow start or link utilization is very low */ 129 /* if in slow start or link utilization is very low */
146 if ( ca->loss_cwnd == 0 || 130 if (ca->loss_cwnd == 0) {
147 (cwnd > ca->loss_cwnd && ca->low_utilization)) {
148 if (ca->cnt > 20) /* increase cwnd 5% per RTT */ 131 if (ca->cnt > 20) /* increase cwnd 5% per RTT */
149 ca->cnt = 20; 132 ca->cnt = 20;
150 } 133 }
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
154 ca->cnt = 1; 137 ca->cnt = 1;
155} 138}
156 139
157
158/* Detect low utilization in congestion avoidance */
159static inline void bictcp_low_utilization(struct sock *sk, int flag)
160{
161 const struct tcp_sock *tp = tcp_sk(sk);
162 struct bictcp *ca = inet_csk_ca(sk);
163 u32 dist, delay;
164
165 /* No time stamp */
166 if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
167 /* Discard delay samples right after fast recovery */
168 tcp_time_stamp < ca->epoch_start + HZ ||
169 /* this delay samples may not be accurate */
170 flag == 0) {
171 ca->last_delay = 0;
172 goto notlow;
173 }
174
175 delay = ca->last_delay<<3; /* use the same scale as tp->srtt*/
176 ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
177 if (delay == 0) /* no previous delay sample */
178 goto notlow;
179
180 /* first time call or link delay decreases */
181 if (ca->delay_min == 0 || ca->delay_min > delay) {
182 ca->delay_min = ca->delay_max = delay;
183 goto notlow;
184 }
185
186 if (ca->delay_max < delay)
187 ca->delay_max = delay;
188
189 /* utilization is low, if avg delay < dist*threshold
190 for checking_period time */
191 dist = ca->delay_max - ca->delay_min;
192 if (dist <= ca->delay_min>>6 ||
193 tp->srtt - ca->delay_min >= (dist*low_utilization_threshold)>>10)
194 goto notlow;
195
196 if (ca->low_utilization_start == 0) {
197 ca->low_utilization = 0;
198 ca->low_utilization_start = tcp_time_stamp;
199 } else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
200 > low_utilization_period*HZ) {
201 ca->low_utilization = 1;
202 }
203
204 return;
205
206 notlow:
207 ca->low_utilization = 0;
208 ca->low_utilization_start = 0;
209
210}
211
212static void bictcp_cong_avoid(struct sock *sk, u32 ack, 140static void bictcp_cong_avoid(struct sock *sk, u32 ack,
213 u32 seq_rtt, u32 in_flight, int data_acked) 141 u32 seq_rtt, u32 in_flight, int data_acked)
214{ 142{
215 struct tcp_sock *tp = tcp_sk(sk); 143 struct tcp_sock *tp = tcp_sk(sk);
216 struct bictcp *ca = inet_csk_ca(sk); 144 struct bictcp *ca = inet_csk_ca(sk);
217 145
218 bictcp_low_utilization(sk, data_acked);
219
220 if (!tcp_is_cwnd_limited(sk, in_flight)) 146 if (!tcp_is_cwnd_limited(sk, in_flight))
221 return; 147 return;
222 148
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
249 175
250 ca->epoch_start = 0; /* end of epoch */ 176 ca->epoch_start = 0; /* end of epoch */
251 177
252 /* in case of wrong delay_max*/
253 if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
254 ca->delay_max = ca->delay_min
255 + ((ca->delay_max - ca->delay_min)* 90) / 100;
256
257 /* Wmax and fast convergence */ 178 /* Wmax and fast convergence */
258 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) 179 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
259 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) 180 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
289 bictcp_reset(inet_csk_ca(sk)); 210 bictcp_reset(inet_csk_ca(sk));
290} 211}
291 212
292/* Track delayed acknowledgement ratio using sliding window 213/* Track delayed acknowledgment ratio using sliding window
293 * ratio = (15*ratio + sample) / 16 214 * ratio = (15*ratio + sample) / 16
294 */ 215 */
295static void bictcp_acked(struct sock *sk, u32 cnt) 216static void bictcp_acked(struct sock *sk, u32 cnt)
296{ 217{
297 const struct inet_connection_sock *icsk = inet_csk(sk); 218 const struct inet_connection_sock *icsk = inet_csk(sk);
298 219
299 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { 220 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
300 struct bictcp *ca = inet_csk_ca(sk); 221 struct bictcp *ca = inet_csk_ca(sk);
301 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; 222 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
302 ca->delayed_ack += cnt; 223 ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc12..e688c687d62d 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
174 return err; 174 return err;
175} 175}
176 176
177
178/*
179 * Linear increase during slow start
180 */
181void tcp_slow_start(struct tcp_sock *tp)
182{
183 if (sysctl_tcp_abc) {
184 /* RFC3465: Slow Start
185 * TCP sender SHOULD increase cwnd by the number of
186 * previously unacknowledged bytes ACKed by each incoming
187 * acknowledgment, provided the increase is not more than L
188 */
189 if (tp->bytes_acked < tp->mss_cache)
190 return;
191
192 /* We MAY increase by 2 if discovered delayed ack */
193 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
194 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
195 tp->snd_cwnd++;
196 }
197 }
198 tp->bytes_acked = 0;
199
200 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
201 tp->snd_cwnd++;
202}
203EXPORT_SYMBOL_GPL(tcp_slow_start);
204
177/* 205/*
178 * TCP Reno congestion control 206 * TCP Reno congestion control
179 * This is special case used for fallback as well. 207 * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 000000000000..31a4986dfbf7
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
3 *
4 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu.
6 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
7 * in PFLDnet 2005
8 * Available from:
9 * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
10 *
11 * Unless CUBIC is enabled and congestion window is large
12 * this behaves the same as the original Reno.
13 */
14
15#include <linux/config.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <net/tcp.h>
19#include <asm/div64.h>
20
21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
22 * max_cwnd = snd_cwnd * beta
23 */
24#define BICTCP_B 4 /*
25 * In binary search,
26 * go to point (max+min)/N
27 */
28#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
29
30static int fast_convergence = 1;
31static int max_increment = 16;
32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
33static int initial_ssthresh = 100;
34static int bic_scale = 41;
35static int tcp_friendliness = 1;
36
37static u32 cube_rtt_scale;
38static u32 beta_scale;
39static u64 cube_factor;
40
41/* Note parameters that are used for precomputing scale factors are read-only */
42module_param(fast_convergence, int, 0644);
43MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
44module_param(max_increment, int, 0644);
45MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
46module_param(beta, int, 0444);
47MODULE_PARM_DESC(beta, "beta for multiplicative increase");
48module_param(initial_ssthresh, int, 0644);
49MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
50module_param(bic_scale, int, 0444);
51MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
52module_param(tcp_friendliness, int, 0644);
53MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54
55#include <asm/div64.h>
56
57/* BIC TCP Parameters */
58struct bictcp {
59 u32 cnt; /* increase cwnd by 1 after ACKs */
60 u32 last_max_cwnd; /* last maximum snd_cwnd */
61 u32 loss_cwnd; /* congestion window at last loss */
62 u32 last_cwnd; /* the last snd_cwnd */
63 u32 last_time; /* time when updated last_cwnd */
64 u32 bic_origin_point;/* origin point of bic function */
65 u32 bic_K; /* time to origin point from the beginning of the current epoch */
66 u32 delay_min; /* min delay */
67 u32 epoch_start; /* beginning of an epoch */
68 u32 ack_cnt; /* number of acks */
69 u32 tcp_cwnd; /* estimated tcp cwnd */
70#define ACK_RATIO_SHIFT 4
71 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
72};
73
74static inline void bictcp_reset(struct bictcp *ca)
75{
76 ca->cnt = 0;
77 ca->last_max_cwnd = 0;
78 ca->loss_cwnd = 0;
79 ca->last_cwnd = 0;
80 ca->last_time = 0;
81 ca->bic_origin_point = 0;
82 ca->bic_K = 0;
83 ca->delay_min = 0;
84 ca->epoch_start = 0;
85 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
86 ca->ack_cnt = 0;
87 ca->tcp_cwnd = 0;
88}
89
90static void bictcp_init(struct sock *sk)
91{
92 bictcp_reset(inet_csk_ca(sk));
93 if (initial_ssthresh)
94 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
95}
96
97/* 64bit divisor, dividend and result. dynamic precision */
98static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
99{
100 u_int32_t d = divisor;
101
102 if (divisor > 0xffffffffULL) {
103 unsigned int shift = fls(divisor >> 32);
104
105 d = divisor >> shift;
106 dividend >>= shift;
107 }
108
109 /* avoid 64 bit division if possible */
110 if (dividend >> 32)
111 do_div(dividend, d);
112 else
113 dividend = (uint32_t) dividend / d;
114
115 return dividend;
116}
117
118/*
119 * calculate the cubic root of x using Newton-Raphson
120 */
121static u32 cubic_root(u64 a)
122{
123 u32 x, x1;
124
125 /* Initial estimate is based on:
126 * cbrt(x) = exp(log(x) / 3)
127 */
128 x = 1u << (fls64(a)/3);
129
130 /*
131 * Iteration based on:
132 * 2
133 * x = ( 2 * x + a / x ) / 3
134 * k+1 k k
135 */
136 do {
137 x1 = x;
138 x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
139 } while (abs(x1 - x) > 1);
140
141 return x;
142}
143
144/*
145 * Compute congestion window to use.
146 */
147static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
148{
149 u64 offs;
150 u32 delta, t, bic_target, min_cnt, max_cnt;
151
152 ca->ack_cnt++; /* count the number of ACKs */
153
154 if (ca->last_cwnd == cwnd &&
155 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
156 return;
157
158 ca->last_cwnd = cwnd;
159 ca->last_time = tcp_time_stamp;
160
161 if (ca->epoch_start == 0) {
162 ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
163 ca->ack_cnt = 1; /* start counting */
164 ca->tcp_cwnd = cwnd; /* syn with cubic */
165
166 if (ca->last_max_cwnd <= cwnd) {
167 ca->bic_K = 0;
168 ca->bic_origin_point = cwnd;
169 } else {
170 /* Compute new K based on
171 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
172 */
173 ca->bic_K = cubic_root(cube_factor
174 * (ca->last_max_cwnd - cwnd));
175 ca->bic_origin_point = ca->last_max_cwnd;
176 }
177 }
178
179 /* cubic function - calc*/
180 /* calculate c * time^3 / rtt,
181 * while considering overflow in calculation of time^3
182 * (so time^3 is done by using 64 bit)
183 * and without the support of division of 64bit numbers
184 * (so all divisions are done by using 32 bit)
185 * also NOTE the unit of those veriables
186 * time = (t - K) / 2^bictcp_HZ
187 * c = bic_scale >> 10
188 * rtt = (srtt >> 3) / HZ
189 * !!! The following code does not have overflow problems,
190 * if the cwnd < 1 million packets !!!
191 */
192
193 /* change the unit from HZ to bictcp_HZ */
194 t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
195 << BICTCP_HZ) / HZ;
196
197 if (t < ca->bic_K) /* t - K */
198 offs = ca->bic_K - t;
199 else
200 offs = t - ca->bic_K;
201
202 /* c/rtt * (t-K)^3 */
203 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
204 if (t < ca->bic_K) /* below origin*/
205 bic_target = ca->bic_origin_point - delta;
206 else /* above origin*/
207 bic_target = ca->bic_origin_point + delta;
208
209 /* cubic function - calc bictcp_cnt*/
210 if (bic_target > cwnd) {
211 ca->cnt = cwnd / (bic_target - cwnd);
212 } else {
213 ca->cnt = 100 * cwnd; /* very small increment*/
214 }
215
216 if (ca->delay_min > 0) {
217 /* max increment = Smax * rtt / 0.1 */
218 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
219 if (ca->cnt < min_cnt)
220 ca->cnt = min_cnt;
221 }
222
223 /* slow start and low utilization */
224 if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
225 ca->cnt = 50;
226
227 /* TCP Friendly */
228 if (tcp_friendliness) {
229 u32 scale = beta_scale;
230 delta = (cwnd * scale) >> 3;
231 while (ca->ack_cnt > delta) { /* update tcp cwnd */
232 ca->ack_cnt -= delta;
233 ca->tcp_cwnd++;
234 }
235
236 if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
237 delta = ca->tcp_cwnd - cwnd;
238 max_cnt = cwnd / delta;
239 if (ca->cnt > max_cnt)
240 ca->cnt = max_cnt;
241 }
242 }
243
244 ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
245 if (ca->cnt == 0) /* cannot be zero */
246 ca->cnt = 1;
247}
248
249
250/* Keep track of minimum rtt */
251static inline void measure_delay(struct sock *sk)
252{
253 const struct tcp_sock *tp = tcp_sk(sk);
254 struct bictcp *ca = inet_csk_ca(sk);
255 u32 delay;
256
257 /* No time stamp */
258 if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
259 /* Discard delay samples right after fast recovery */
260 (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
261 return;
262
263 delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
264 if (delay == 0)
265 delay = 1;
266
267 /* first time call or link delay decreases */
268 if (ca->delay_min == 0 || ca->delay_min > delay)
269 ca->delay_min = delay;
270}
271
272static void bictcp_cong_avoid(struct sock *sk, u32 ack,
273 u32 seq_rtt, u32 in_flight, int data_acked)
274{
275 struct tcp_sock *tp = tcp_sk(sk);
276 struct bictcp *ca = inet_csk_ca(sk);
277
278 if (data_acked)
279 measure_delay(sk);
280
281 if (!tcp_is_cwnd_limited(sk, in_flight))
282 return;
283
284 if (tp->snd_cwnd <= tp->snd_ssthresh)
285 tcp_slow_start(tp);
286 else {
287 bictcp_update(ca, tp->snd_cwnd);
288
289 /* In dangerous area, increase slowly.
290 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
291 */
292 if (tp->snd_cwnd_cnt >= ca->cnt) {
293 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
294 tp->snd_cwnd++;
295 tp->snd_cwnd_cnt = 0;
296 } else
297 tp->snd_cwnd_cnt++;
298 }
299
300}
301
302static u32 bictcp_recalc_ssthresh(struct sock *sk)
303{
304 const struct tcp_sock *tp = tcp_sk(sk);
305 struct bictcp *ca = inet_csk_ca(sk);
306
307 ca->epoch_start = 0; /* end of epoch */
308
309 /* Wmax and fast convergence */
310 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
311 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
312 / (2 * BICTCP_BETA_SCALE);
313 else
314 ca->last_max_cwnd = tp->snd_cwnd;
315
316 ca->loss_cwnd = tp->snd_cwnd;
317
318 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
319}
320
321static u32 bictcp_undo_cwnd(struct sock *sk)
322{
323 struct bictcp *ca = inet_csk_ca(sk);
324
325 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
326}
327
328static u32 bictcp_min_cwnd(struct sock *sk)
329{
330 return tcp_sk(sk)->snd_ssthresh;
331}
332
333static void bictcp_state(struct sock *sk, u8 new_state)
334{
335 if (new_state == TCP_CA_Loss)
336 bictcp_reset(inet_csk_ca(sk));
337}
338
339/* Track delayed acknowledgment ratio using sliding window
340 * ratio = (15*ratio + sample) / 16
341 */
342static void bictcp_acked(struct sock *sk, u32 cnt)
343{
344 const struct inet_connection_sock *icsk = inet_csk(sk);
345
346 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
347 struct bictcp *ca = inet_csk_ca(sk);
348 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
349 ca->delayed_ack += cnt;
350 }
351}
352
353
354static struct tcp_congestion_ops cubictcp = {
355 .init = bictcp_init,
356 .ssthresh = bictcp_recalc_ssthresh,
357 .cong_avoid = bictcp_cong_avoid,
358 .set_state = bictcp_state,
359 .undo_cwnd = bictcp_undo_cwnd,
360 .min_cwnd = bictcp_min_cwnd,
361 .pkts_acked = bictcp_acked,
362 .owner = THIS_MODULE,
363 .name = "cubic",
364};
365
366static int __init cubictcp_register(void)
367{
368 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
369
370 /* Precompute a bunch of the scaling factors that are used per-packet
371 * based on SRTT of 100ms
372 */
373
374 beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
375
376 cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
377
378 /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
379 * so K = cubic_root( (wmax-cwnd)*rtt/c )
380 * the unit of K is bictcp_HZ=2^10, not HZ
381 *
382 * c = bic_scale >> 10
383 * rtt = 100ms
384 *
385 * the following code has been designed and tested for
386 * cwnd < 1 million packets
387 * RTT < 100 seconds
388 * HZ < 1,000,00 (corresponding to 10 nano-second)
389 */
390
391 /* 1/c * 2^2*bictcp_HZ * srtt */
392 cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
393
394 /* divide by bic_scale and by constant Srtt (100ms) */
395 do_div(cube_factor, bic_scale * 10);
396
397 return tcp_register_congestion_control(&cubictcp);
398}
399
400static void __exit cubictcp_unregister(void)
401{
402 tcp_unregister_congestion_control(&cubictcp);
403}
404
405module_init(cubictcp_register);
406module_exit(cubictcp_unregister);
407
408MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
409MODULE_LICENSE("GPL");
410MODULE_DESCRIPTION("CUBIC TCP");
411MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bce..a97ed5416c28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
115/* Adapt the MSS value used to make delayed ack decision to the 115/* Adapt the MSS value used to make delayed ack decision to the
116 * real world. 116 * real world.
117 */ 117 */
118static inline void tcp_measure_rcv_mss(struct sock *sk, 118static void tcp_measure_rcv_mss(struct sock *sk,
119 const struct sk_buff *skb) 119 const struct sk_buff *skb)
120{ 120{
121 struct inet_connection_sock *icsk = inet_csk(sk); 121 struct inet_connection_sock *icsk = inet_csk(sk);
122 const unsigned int lss = icsk->icsk_ack.last_seg_size; 122 const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
246 return 0; 246 return 0;
247} 247}
248 248
249static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, 249static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
250 struct sk_buff *skb) 250 struct sk_buff *skb)
251{ 251{
252 /* Check #1 */ 252 /* Check #1 */
253 if (tp->rcv_ssthresh < tp->window_clamp && 253 if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
342} 342}
343 343
344
345/* Initialize RCV_MSS value.
346 * RCV_MSS is an our guess about MSS used by the peer.
347 * We haven't any direct information about the MSS.
348 * It's better to underestimate the RCV_MSS rather than overestimate.
349 * Overestimations make us ACKing less frequently than needed.
350 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
351 */
352void tcp_initialize_rcv_mss(struct sock *sk)
353{
354 struct tcp_sock *tp = tcp_sk(sk);
355 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
356
357 hint = min(hint, tp->rcv_wnd/2);
358 hint = min(hint, TCP_MIN_RCVMSS);
359 hint = max(hint, TCP_MIN_MSS);
360
361 inet_csk(sk)->icsk_ack.rcv_mss = hint;
362}
363
344/* Receiver "autotuning" code. 364/* Receiver "autotuning" code.
345 * 365 *
346 * The algorithm for RTT estimation w/o timestamps is based on 366 * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
735 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 755 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
736} 756}
737 757
758/* Set slow start threshold and cwnd not falling to slow start */
759void tcp_enter_cwr(struct sock *sk)
760{
761 struct tcp_sock *tp = tcp_sk(sk);
762
763 tp->prior_ssthresh = 0;
764 tp->bytes_acked = 0;
765 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
766 tp->undo_marker = 0;
767 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
768 tp->snd_cwnd = min(tp->snd_cwnd,
769 tcp_packets_in_flight(tp) + 1U);
770 tp->snd_cwnd_cnt = 0;
771 tp->high_seq = tp->snd_nxt;
772 tp->snd_cwnd_stamp = tcp_time_stamp;
773 TCP_ECN_queue_cwr(tp);
774
775 tcp_set_ca_state(sk, TCP_CA_CWR);
776 }
777}
778
738/* Initialize metrics on socket. */ 779/* Initialize metrics on socket. */
739 780
740static void tcp_init_metrics(struct sock *sk) 781static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2070 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2111 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2071} 2112}
2072 2113
2073static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, 2114static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2074 u32 in_flight, int good) 2115 u32 in_flight, int good)
2075{ 2116{
2076 const struct inet_connection_sock *icsk = inet_csk(sk); 2117 const struct inet_connection_sock *icsk = inet_csk(sk);
2077 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); 2118 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2082 * RFC2988 recommends to restart timer to now+rto. 2123 * RFC2988 recommends to restart timer to now+rto.
2083 */ 2124 */
2084 2125
2085static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) 2126static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
2086{ 2127{
2087 if (!tp->packets_out) { 2128 if (!tp->packets_out) {
2088 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2129 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2147 return acked; 2188 return acked;
2148} 2189}
2149 2190
2150static inline u32 tcp_usrtt(const struct sk_buff *skb) 2191static u32 tcp_usrtt(const struct sk_buff *skb)
2151{ 2192{
2152 struct timeval tv, now; 2193 struct timeval tv, now;
2153 2194
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2342 2383
2343 if (nwin > tp->max_window) { 2384 if (nwin > tp->max_window) {
2344 tp->max_window = nwin; 2385 tp->max_window = nwin;
2345 tcp_sync_mss(sk, tp->pmtu_cookie); 2386 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
2346 } 2387 }
2347 } 2388 }
2348 } 2389 }
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2583/* Fast parse options. This hopes to only see timestamps. 2624/* Fast parse options. This hopes to only see timestamps.
2584 * If it is wrong it falls back on tcp_parse_options(). 2625 * If it is wrong it falls back on tcp_parse_options().
2585 */ 2626 */
2586static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 2627static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
2587 struct tcp_sock *tp) 2628 struct tcp_sock *tp)
2588{ 2629{
2589 if (th->doff == sizeof(struct tcphdr)>>2) { 2630 if (th->doff == sizeof(struct tcphdr)>>2) {
2590 tp->rx_opt.saw_tstamp = 0; 2631 tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
2804 } 2845 }
2805} 2846}
2806 2847
2807static __inline__ int 2848static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2808tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2809{ 2849{
2810 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 2850 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
2811 if (before(seq, sp->start_seq)) 2851 if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2817 return 0; 2857 return 0;
2818} 2858}
2819 2859
2820static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) 2860static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2821{ 2861{
2822 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { 2862 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
2823 if (before(seq, tp->rcv_nxt)) 2863 if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2832 } 2872 }
2833} 2873}
2834 2874
2835static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) 2875static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
2836{ 2876{
2837 if (!tp->rx_opt.dsack) 2877 if (!tp->rx_opt.dsack)
2838 tcp_dsack_set(tp, seq, end_seq); 2878 tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
2890 } 2930 }
2891} 2931}
2892 2932
2893static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) 2933static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
2894{ 2934{
2895 __u32 tmp; 2935 __u32 tmp;
2896 2936
@@ -3307,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3307 int offset = start - TCP_SKB_CB(skb)->seq; 3347 int offset = start - TCP_SKB_CB(skb)->seq;
3308 int size = TCP_SKB_CB(skb)->end_seq - start; 3348 int size = TCP_SKB_CB(skb)->end_seq - start;
3309 3349
3310 if (offset < 0) BUG(); 3350 BUG_ON(offset < 0);
3311 if (size > 0) { 3351 if (size > 0) {
3312 size = min(copy, size); 3352 size = min(copy, size);
3313 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) 3353 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
3455 tp->snd_cwnd_stamp = tcp_time_stamp; 3495 tp->snd_cwnd_stamp = tcp_time_stamp;
3456} 3496}
3457 3497
3458static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) 3498static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
3459{ 3499{
3460 /* If the user specified a specific send buffer setting, do 3500 /* If the user specified a specific send buffer setting, do
3461 * not modify it. 3501 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
3502 sk->sk_write_space(sk); 3542 sk->sk_write_space(sk);
3503} 3543}
3504 3544
3505static inline void tcp_check_space(struct sock *sk) 3545static void tcp_check_space(struct sock *sk)
3506{ 3546{
3507 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { 3547 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
3508 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); 3548 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
3512 } 3552 }
3513} 3553}
3514 3554
3515static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) 3555static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
3516{ 3556{
3517 tcp_push_pending_frames(sk, tp); 3557 tcp_push_pending_frames(sk, tp);
3518 tcp_check_space(sk); 3558 tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
3544 } 3584 }
3545} 3585}
3546 3586
3547static __inline__ void tcp_ack_snd_check(struct sock *sk) 3587static inline void tcp_ack_snd_check(struct sock *sk)
3548{ 3588{
3549 if (!inet_csk_ack_scheduled(sk)) { 3589 if (!inet_csk_ack_scheduled(sk)) {
3550 /* We sent a data segment already. */ 3590 /* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3692 return result; 3732 return result;
3693} 3733}
3694 3734
3695static __inline__ int 3735static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3696tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3697{ 3736{
3698 return skb->ip_summed != CHECKSUM_UNNECESSARY && 3737 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
3699 __tcp_checksum_complete_user(sk, skb); 3738 __tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3967 struct tcphdr *th, unsigned len) 4006 struct tcphdr *th, unsigned len)
3968{ 4007{
3969 struct tcp_sock *tp = tcp_sk(sk); 4008 struct tcp_sock *tp = tcp_sk(sk);
4009 struct inet_connection_sock *icsk = inet_csk(sk);
3970 int saved_clamp = tp->rx_opt.mss_clamp; 4010 int saved_clamp = tp->rx_opt.mss_clamp;
3971 4011
3972 tcp_parse_options(skb, &tp->rx_opt, 0); 4012 tcp_parse_options(skb, &tp->rx_opt, 0);
3973 4013
3974 if (th->ack) { 4014 if (th->ack) {
3975 struct inet_connection_sock *icsk;
3976 /* rfc793: 4015 /* rfc793:
3977 * "If the state is SYN-SENT then 4016 * "If the state is SYN-SENT then
3978 * first check the ACK bit 4017 * first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4061 if (tp->rx_opt.sack_ok && sysctl_tcp_fack) 4100 if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
4062 tp->rx_opt.sack_ok |= 2; 4101 tp->rx_opt.sack_ok |= 2;
4063 4102
4064 tcp_sync_mss(sk, tp->pmtu_cookie); 4103 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4065 tcp_initialize_rcv_mss(sk); 4104 tcp_initialize_rcv_mss(sk);
4066 4105
4067 /* Remember, tcp_poll() does not lock socket! 4106 /* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4072 tcp_set_state(sk, TCP_ESTABLISHED); 4111 tcp_set_state(sk, TCP_ESTABLISHED);
4073 4112
4074 /* Make sure socket is routed, for correct metrics. */ 4113 /* Make sure socket is routed, for correct metrics. */
4075 tp->af_specific->rebuild_header(sk); 4114 icsk->icsk_af_ops->rebuild_header(sk);
4076 4115
4077 tcp_init_metrics(sk); 4116 tcp_init_metrics(sk);
4078 4117
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4098 sk_wake_async(sk, 0, POLL_OUT); 4137 sk_wake_async(sk, 0, POLL_OUT);
4099 } 4138 }
4100 4139
4101 icsk = inet_csk(sk);
4102
4103 if (sk->sk_write_pending || 4140 if (sk->sk_write_pending ||
4104 icsk->icsk_accept_queue.rskq_defer_accept || 4141 icsk->icsk_accept_queue.rskq_defer_accept ||
4105 icsk->icsk_ack.pingpong) { 4142 icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
4173 if (tp->ecn_flags&TCP_ECN_OK) 4210 if (tp->ecn_flags&TCP_ECN_OK)
4174 sock_set_flag(sk, SOCK_NO_LARGESEND); 4211 sock_set_flag(sk, SOCK_NO_LARGESEND);
4175 4212
4176 tcp_sync_mss(sk, tp->pmtu_cookie); 4213 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4177 tcp_initialize_rcv_mss(sk); 4214 tcp_initialize_rcv_mss(sk);
4178 4215
4179 4216
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4220 struct tcphdr *th, unsigned len) 4257 struct tcphdr *th, unsigned len)
4221{ 4258{
4222 struct tcp_sock *tp = tcp_sk(sk); 4259 struct tcp_sock *tp = tcp_sk(sk);
4260 struct inet_connection_sock *icsk = inet_csk(sk);
4223 int queued = 0; 4261 int queued = 0;
4224 4262
4225 tp->rx_opt.saw_tstamp = 0; 4263 tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4236 goto discard; 4274 goto discard;
4237 4275
4238 if(th->syn) { 4276 if(th->syn) {
4239 if(tp->af_specific->conn_request(sk, skb) < 0) 4277 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
4240 return 1; 4278 return 1;
4241 4279
4242 /* Now we have several options: In theory there is 4280 /* Now we have several options: In theory there is
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4349 /* Make sure socket is routed, for 4387 /* Make sure socket is routed, for
4350 * correct metrics. 4388 * correct metrics.
4351 */ 4389 */
4352 tp->af_specific->rebuild_header(sk); 4390 icsk->icsk_af_ops->rebuild_header(sk);
4353 4391
4354 tcp_init_metrics(sk); 4392 tcp_init_metrics(sk);
4355 4393
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
4475EXPORT_SYMBOL(tcp_parse_options); 4513EXPORT_SYMBOL(tcp_parse_options);
4476EXPORT_SYMBOL(tcp_rcv_established); 4514EXPORT_SYMBOL(tcp_rcv_established);
4477EXPORT_SYMBOL(tcp_rcv_state_process); 4515EXPORT_SYMBOL(tcp_rcv_state_process);
4516EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929b..6ea353907af5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
69#include <net/transp_v6.h> 69#include <net/transp_v6.h>
70#include <net/ipv6.h> 70#include <net/ipv6.h>
71#include <net/inet_common.h> 71#include <net/inet_common.h>
72#include <net/timewait_sock.h>
72#include <net/xfrm.h> 73#include <net/xfrm.h>
73 74
74#include <linux/inet.h> 75#include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
86/* Socket used for sending RSTs */ 87/* Socket used for sending RSTs */
87static struct socket *tcp_socket; 88static struct socket *tcp_socket;
88 89
89void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
90 struct sk_buff *skb);
91 91
92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
97 97
98static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 98static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
99{ 99{
100 return inet_csk_get_port(&tcp_hashinfo, sk, snum); 100 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
101 inet_csk_bind_conflict);
101} 102}
102 103
103static void tcp_v4_hash(struct sock *sk) 104static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
118 skb->h.th->source); 119 skb->h.th->source);
119} 120}
120 121
121/* called with local bh disabled */ 122int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
122static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
123 struct inet_timewait_sock **twp)
124{ 123{
125 struct inet_sock *inet = inet_sk(sk); 124 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
126 u32 daddr = inet->rcv_saddr; 125 struct tcp_sock *tp = tcp_sk(sk);
127 u32 saddr = inet->daddr;
128 int dif = sk->sk_bound_dev_if;
129 INET_ADDR_COOKIE(acookie, saddr, daddr)
130 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
131 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
132 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
133 struct sock *sk2;
134 const struct hlist_node *node;
135 struct inet_timewait_sock *tw;
136
137 prefetch(head->chain.first);
138 write_lock(&head->lock);
139
140 /* Check TIME-WAIT sockets first. */
141 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
142 tw = inet_twsk(sk2);
143
144 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
145 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
146 struct tcp_sock *tp = tcp_sk(sk);
147
148 /* With PAWS, it is safe from the viewpoint
149 of data integrity. Even without PAWS it
150 is safe provided sequence spaces do not
151 overlap i.e. at data rates <= 80Mbit/sec.
152
153 Actually, the idea is close to VJ's one,
154 only timestamp cache is held not per host,
155 but per port pair and TW bucket is used
156 as state holder.
157 126
158 If TW bucket has been already destroyed we 127 /* With PAWS, it is safe from the viewpoint
159 fall back to VJ's scheme and use initial 128 of data integrity. Even without PAWS it is safe provided sequence
160 timestamp retrieved from peer table. 129 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
161 */
162 if (tcptw->tw_ts_recent_stamp &&
163 (!twp || (sysctl_tcp_tw_reuse &&
164 xtime.tv_sec -
165 tcptw->tw_ts_recent_stamp > 1))) {
166 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
167 if (tp->write_seq == 0)
168 tp->write_seq = 1;
169 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
170 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
171 sock_hold(sk2);
172 goto unique;
173 } else
174 goto not_unique;
175 }
176 }
177 tw = NULL;
178 130
179 /* And established part... */ 131 Actually, the idea is close to VJ's one, only timestamp cache is
180 sk_for_each(sk2, node, &head->chain) { 132 held not per host, but per port pair and TW bucket is used as state
181 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) 133 holder.
182 goto not_unique;
183 }
184 134
185unique: 135 If TW bucket has been already destroyed we fall back to VJ's scheme
186 /* Must record num and sport now. Otherwise we will see 136 and use initial timestamp retrieved from peer table.
187 * in hash table socket with a funny identity. */ 137 */
188 inet->num = lport; 138 if (tcptw->tw_ts_recent_stamp &&
189 inet->sport = htons(lport); 139 (twp == NULL || (sysctl_tcp_tw_reuse &&
190 sk->sk_hash = hash; 140 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
191 BUG_TRAP(sk_unhashed(sk)); 141 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
192 __sk_add_node(sk, &head->chain); 142 if (tp->write_seq == 0)
193 sock_prot_inc_use(sk->sk_prot); 143 tp->write_seq = 1;
194 write_unlock(&head->lock); 144 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
195 145 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
196 if (twp) { 146 sock_hold(sktw);
197 *twp = tw; 147 return 1;
198 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
199 } else if (tw) {
200 /* Silly. Should hash-dance instead... */
201 inet_twsk_deschedule(tw, &tcp_death_row);
202 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
203
204 inet_twsk_put(tw);
205 } 148 }
206 149
207 return 0; 150 return 0;
208
209not_unique:
210 write_unlock(&head->lock);
211 return -EADDRNOTAVAIL;
212} 151}
213 152
214static inline u32 connect_port_offset(const struct sock *sk) 153EXPORT_SYMBOL_GPL(tcp_twsk_unique);
215{
216 const struct inet_sock *inet = inet_sk(sk);
217
218 return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
219 inet->dport);
220}
221
222/*
223 * Bind a port for a connect operation and hash it.
224 */
225static inline int tcp_v4_hash_connect(struct sock *sk)
226{
227 const unsigned short snum = inet_sk(sk)->num;
228 struct inet_bind_hashbucket *head;
229 struct inet_bind_bucket *tb;
230 int ret;
231
232 if (!snum) {
233 int low = sysctl_local_port_range[0];
234 int high = sysctl_local_port_range[1];
235 int range = high - low;
236 int i;
237 int port;
238 static u32 hint;
239 u32 offset = hint + connect_port_offset(sk);
240 struct hlist_node *node;
241 struct inet_timewait_sock *tw = NULL;
242
243 local_bh_disable();
244 for (i = 1; i <= range; i++) {
245 port = low + (i + offset) % range;
246 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
247 spin_lock(&head->lock);
248
249 /* Does not bother with rcv_saddr checks,
250 * because the established check is already
251 * unique enough.
252 */
253 inet_bind_bucket_for_each(tb, node, &head->chain) {
254 if (tb->port == port) {
255 BUG_TRAP(!hlist_empty(&tb->owners));
256 if (tb->fastreuse >= 0)
257 goto next_port;
258 if (!__tcp_v4_check_established(sk,
259 port,
260 &tw))
261 goto ok;
262 goto next_port;
263 }
264 }
265
266 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
267 if (!tb) {
268 spin_unlock(&head->lock);
269 break;
270 }
271 tb->fastreuse = -1;
272 goto ok;
273
274 next_port:
275 spin_unlock(&head->lock);
276 }
277 local_bh_enable();
278
279 return -EADDRNOTAVAIL;
280
281ok:
282 hint += i;
283
284 /* Head lock still held and bh's disabled */
285 inet_bind_hash(sk, tb, port);
286 if (sk_unhashed(sk)) {
287 inet_sk(sk)->sport = htons(port);
288 __inet_hash(&tcp_hashinfo, sk, 0);
289 }
290 spin_unlock(&head->lock);
291
292 if (tw) {
293 inet_twsk_deschedule(tw, &tcp_death_row);;
294 inet_twsk_put(tw);
295 }
296
297 ret = 0;
298 goto out;
299 }
300
301 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
302 tb = inet_csk(sk)->icsk_bind_hash;
303 spin_lock_bh(&head->lock);
304 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
305 __inet_hash(&tcp_hashinfo, sk, 0);
306 spin_unlock_bh(&head->lock);
307 return 0;
308 } else {
309 spin_unlock(&head->lock);
310 /* No definite answer... Walk to established hash table */
311 ret = __tcp_v4_check_established(sk, snum, NULL);
312out:
313 local_bh_enable();
314 return ret;
315 }
316}
317 154
318/* This will initiate an outgoing connection. */ 155/* This will initiate an outgoing connection. */
319int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 156int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
383 inet->dport = usin->sin_port; 220 inet->dport = usin->sin_port;
384 inet->daddr = daddr; 221 inet->daddr = daddr;
385 222
386 tp->ext_header_len = 0; 223 inet_csk(sk)->icsk_ext_hdr_len = 0;
387 if (inet->opt) 224 if (inet->opt)
388 tp->ext_header_len = inet->opt->optlen; 225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
389 226
390 tp->rx_opt.mss_clamp = 536; 227 tp->rx_opt.mss_clamp = 536;
391 228
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
395 * complete initialization after this. 232 * complete initialization after this.
396 */ 233 */
397 tcp_set_state(sk, TCP_SYN_SENT); 234 tcp_set_state(sk, TCP_SYN_SENT);
398 err = tcp_v4_hash_connect(sk); 235 err = inet_hash_connect(&tcp_death_row, sk);
399 if (err) 236 if (err)
400 goto failure; 237 goto failure;
401 238
@@ -433,12 +270,10 @@ failure:
433/* 270/*
434 * This routine does path mtu discovery as defined in RFC1191. 271 * This routine does path mtu discovery as defined in RFC1191.
435 */ 272 */
436static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, 273static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
437 u32 mtu)
438{ 274{
439 struct dst_entry *dst; 275 struct dst_entry *dst;
440 struct inet_sock *inet = inet_sk(sk); 276 struct inet_sock *inet = inet_sk(sk);
441 struct tcp_sock *tp = tcp_sk(sk);
442 277
443 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 278 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
444 * send out by Linux are always <576bytes so they should go through 279 * send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
467 mtu = dst_mtu(dst); 302 mtu = dst_mtu(dst);
468 303
469 if (inet->pmtudisc != IP_PMTUDISC_DONT && 304 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
470 tp->pmtu_cookie > mtu) { 305 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
471 tcp_sync_mss(sk, mtu); 306 tcp_sync_mss(sk, mtu);
472 307
473 /* Resend the TCP packet because it's 308 /* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
644} 479}
645 480
646/* This routine computes an IPv4 TCP checksum. */ 481/* This routine computes an IPv4 TCP checksum. */
647void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 482void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
648 struct sk_buff *skb)
649{ 483{
650 struct inet_sock *inet = inet_sk(sk); 484 struct inet_sock *inet = inet_sk(sk);
485 struct tcphdr *th = skb->h.th;
651 486
652 if (skb->ip_summed == CHECKSUM_HW) { 487 if (skb->ip_summed == CHECKSUM_HW) {
653 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); 488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
826 kfree(inet_rsk(req)->opt); 661 kfree(inet_rsk(req)->opt);
827} 662}
828 663
829static inline void syn_flood_warning(struct sk_buff *skb) 664#ifdef CONFIG_SYN_COOKIES
665static void syn_flood_warning(struct sk_buff *skb)
830{ 666{
831 static unsigned long warntime; 667 static unsigned long warntime;
832 668
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
837 ntohs(skb->h.th->dest)); 673 ntohs(skb->h.th->dest));
838 } 674 }
839} 675}
676#endif
840 677
841/* 678/*
842 * Save and compile IPv4 options into the request_sock if needed. 679 * Save and compile IPv4 options into the request_sock if needed.
843 */ 680 */
844static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 681static struct ip_options *tcp_v4_save_options(struct sock *sk,
845 struct sk_buff *skb) 682 struct sk_buff *skb)
846{ 683{
847 struct ip_options *opt = &(IPCB(skb)->opt); 684 struct ip_options *opt = &(IPCB(skb)->opt);
848 struct ip_options *dopt = NULL; 685 struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
869 .send_reset = tcp_v4_send_reset, 706 .send_reset = tcp_v4_send_reset,
870}; 707};
871 708
709static struct timewait_sock_ops tcp_timewait_sock_ops = {
710 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
711 .twsk_unique = tcp_twsk_unique,
712};
713
872int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 714int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
873{ 715{
874 struct inet_request_sock *ireq; 716 struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1053 ireq->opt = NULL; 895 ireq->opt = NULL;
1054 newinet->mc_index = inet_iif(skb); 896 newinet->mc_index = inet_iif(skb);
1055 newinet->mc_ttl = skb->nh.iph->ttl; 897 newinet->mc_ttl = skb->nh.iph->ttl;
1056 newtp->ext_header_len = 0; 898 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1057 if (newinet->opt) 899 if (newinet->opt)
1058 newtp->ext_header_len = newinet->opt->optlen; 900 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1059 newinet->id = newtp->write_seq ^ jiffies; 901 newinet->id = newtp->write_seq ^ jiffies;
1060 902
1061 tcp_sync_mss(newsk, dst_mtu(dst)); 903 tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1238,6 +1080,7 @@ process:
1238 1080
1239 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1081 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1240 goto discard_and_relse; 1082 goto discard_and_relse;
1083 nf_reset(skb);
1241 1084
1242 if (sk_filter(sk, skb, 0)) 1085 if (sk_filter(sk, skb, 0))
1243 goto discard_and_relse; 1086 goto discard_and_relse;
@@ -1314,16 +1157,6 @@ do_time_wait:
1314 goto discard_it; 1157 goto discard_it;
1315} 1158}
1316 1159
1317static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1318{
1319 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1320 struct inet_sock *inet = inet_sk(sk);
1321
1322 sin->sin_family = AF_INET;
1323 sin->sin_addr.s_addr = inet->daddr;
1324 sin->sin_port = inet->dport;
1325}
1326
1327/* VJ's idea. Save last timestamp seen from this destination 1160/* VJ's idea. Save last timestamp seen from this destination
1328 * and hold it at least for normal timewait interval to use for duplicate 1161 * and hold it at least for normal timewait interval to use for duplicate
1329 * segment detection in subsequent connections, before they enter synchronized 1162 * segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1215,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1382 return 0; 1215 return 0;
1383} 1216}
1384 1217
1385struct tcp_func ipv4_specific = { 1218struct inet_connection_sock_af_ops ipv4_specific = {
1386 .queue_xmit = ip_queue_xmit, 1219 .queue_xmit = ip_queue_xmit,
1387 .send_check = tcp_v4_send_check, 1220 .send_check = tcp_v4_send_check,
1388 .rebuild_header = inet_sk_rebuild_header, 1221 .rebuild_header = inet_sk_rebuild_header,
@@ -1392,7 +1225,7 @@ struct tcp_func ipv4_specific = {
1392 .net_header_len = sizeof(struct iphdr), 1225 .net_header_len = sizeof(struct iphdr),
1393 .setsockopt = ip_setsockopt, 1226 .setsockopt = ip_setsockopt,
1394 .getsockopt = ip_getsockopt, 1227 .getsockopt = ip_getsockopt,
1395 .addr2sockaddr = v4_addr2sockaddr, 1228 .addr2sockaddr = inet_csk_addr2sockaddr,
1396 .sockaddr_len = sizeof(struct sockaddr_in), 1229 .sockaddr_len = sizeof(struct sockaddr_in),
1397}; 1230};
1398 1231
@@ -1433,7 +1266,8 @@ static int tcp_v4_init_sock(struct sock *sk)
1433 sk->sk_write_space = sk_stream_write_space; 1266 sk->sk_write_space = sk_stream_write_space;
1434 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1267 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1435 1268
1436 tp->af_specific = &ipv4_specific; 1269 icsk->icsk_af_ops = &ipv4_specific;
1270 icsk->icsk_sync_mss = tcp_sync_mss;
1437 1271
1438 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 1272 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1439 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1273 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1823,7 @@ struct proto tcp_prot = {
1989 .sysctl_rmem = sysctl_tcp_rmem, 1823 .sysctl_rmem = sysctl_tcp_rmem,
1990 .max_header = MAX_TCP_HEADER, 1824 .max_header = MAX_TCP_HEADER,
1991 .obj_size = sizeof(struct tcp_sock), 1825 .obj_size = sizeof(struct tcp_sock),
1992 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1826 .twsk_prot = &tcp_timewait_sock_ops,
1993 .rsk_prot = &tcp_request_sock_ops, 1827 .rsk_prot = &tcp_request_sock_ops,
1994}; 1828};
1995 1829
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac4321..2b9b7f6c7f7c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
274void tcp_time_wait(struct sock *sk, int state, int timeo) 274void tcp_time_wait(struct sock *sk, int state, int timeo)
275{ 275{
276 struct inet_timewait_sock *tw = NULL; 276 struct inet_timewait_sock *tw = NULL;
277 const struct inet_connection_sock *icsk = inet_csk(sk);
277 const struct tcp_sock *tp = tcp_sk(sk); 278 const struct tcp_sock *tp = tcp_sk(sk);
278 int recycle_ok = 0; 279 int recycle_ok = 0;
279 280
280 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) 281 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
281 recycle_ok = tp->af_specific->remember_stamp(sk); 282 recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
282 283
283 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) 284 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
284 tw = inet_twsk_alloc(sk, state); 285 tw = inet_twsk_alloc(sk, state);
285 286
286 if (tw != NULL) { 287 if (tw != NULL) {
287 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 288 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
288 const struct inet_connection_sock *icsk = inet_csk(sk);
289 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); 289 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
290 290
291 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 291 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
298#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 298#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
299 if (tw->tw_family == PF_INET6) { 299 if (tw->tw_family == PF_INET6) {
300 struct ipv6_pinfo *np = inet6_sk(sk); 300 struct ipv6_pinfo *np = inet6_sk(sk);
301 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); 301 struct inet6_timewait_sock *tw6;
302 302
303 ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); 303 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
304 ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); 304 tw6 = inet6_twsk((struct sock *)tw);
305 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
306 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
305 tw->tw_ipv6only = np->ipv6only; 307 tw->tw_ipv6only = np->ipv6only;
306 } 308 }
307#endif 309#endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
456 struct request_sock **prev) 458 struct request_sock **prev)
457{ 459{
458 struct tcphdr *th = skb->h.th; 460 struct tcphdr *th = skb->h.th;
459 struct tcp_sock *tp = tcp_sk(sk);
460 u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 461 u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
461 int paws_reject = 0; 462 int paws_reject = 0;
462 struct tcp_options_received tmp_opt; 463 struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
613 * ESTABLISHED STATE. If it will be dropped after 614 * ESTABLISHED STATE. If it will be dropped after
614 * socket is created, wait for troubles. 615 * socket is created, wait for troubles.
615 */ 616 */
616 child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); 617 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
618 req, NULL);
617 if (child == NULL) 619 if (child == NULL)
618 goto listen_overflow; 620 goto listen_overflow;
619 621
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7325e0b406a..a7623ead39a8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 3; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
56{ 56{
57 sk->sk_send_head = skb->next; 57 sk->sk_send_head = skb->next;
58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) 58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
124 tp->snd_cwnd_used = 0; 124 tp->snd_cwnd_used = 0;
125} 125}
126 126
127static inline void tcp_event_data_sent(struct tcp_sock *tp, 127static void tcp_event_data_sent(struct tcp_sock *tp,
128 struct sk_buff *skb, struct sock *sk) 128 struct sk_buff *skb, struct sock *sk)
129{ 129{
130 struct inet_connection_sock *icsk = inet_csk(sk); 130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const u32 now = tcp_time_stamp; 131 const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
142 icsk->icsk_ack.pingpong = 1; 142 icsk->icsk_ack.pingpong = 1;
143} 143}
144 144
145static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 145static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
146{ 146{
147 tcp_dec_quickack_mode(sk, pkts); 147 tcp_dec_quickack_mode(sk, pkts);
148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
212 * value can be stuffed directly into th->window for an outgoing 212 * value can be stuffed directly into th->window for an outgoing
213 * frame. 213 * frame.
214 */ 214 */
215static __inline__ u16 tcp_select_window(struct sock *sk) 215static u16 tcp_select_window(struct sock *sk)
216{ 216{
217 struct tcp_sock *tp = tcp_sk(sk); 217 struct tcp_sock *tp = tcp_sk(sk);
218 u32 cur_win = tcp_receive_window(tp); 218 u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
250 return new_win; 250 return new_win;
251} 251}
252 252
253static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
254 __u32 tstamp)
255{
256 if (tp->rx_opt.tstamp_ok) {
257 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
258 (TCPOPT_NOP << 16) |
259 (TCPOPT_TIMESTAMP << 8) |
260 TCPOLEN_TIMESTAMP);
261 *ptr++ = htonl(tstamp);
262 *ptr++ = htonl(tp->rx_opt.ts_recent);
263 }
264 if (tp->rx_opt.eff_sacks) {
265 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
266 int this_sack;
267
268 *ptr++ = htonl((TCPOPT_NOP << 24) |
269 (TCPOPT_NOP << 16) |
270 (TCPOPT_SACK << 8) |
271 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
272 TCPOLEN_SACK_PERBLOCK)));
273 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
274 *ptr++ = htonl(sp[this_sack].start_seq);
275 *ptr++ = htonl(sp[this_sack].end_seq);
276 }
277 if (tp->rx_opt.dsack) {
278 tp->rx_opt.dsack = 0;
279 tp->rx_opt.eff_sacks--;
280 }
281 }
282}
283
284/* Construct a tcp options header for a SYN or SYN_ACK packet.
285 * If this is every changed make sure to change the definition of
286 * MAX_SYN_SIZE to match the new maximum number of options that you
287 * can generate.
288 */
289static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
290 int offer_wscale, int wscale, __u32 tstamp,
291 __u32 ts_recent)
292{
293 /* We always get an MSS option.
294 * The option bytes which will be seen in normal data
295 * packets should timestamps be used, must be in the MSS
296 * advertised. But we subtract them from tp->mss_cache so
297 * that calculations in tcp_sendmsg are simpler etc.
298 * So account for this fact here if necessary. If we
299 * don't do this correctly, as a receiver we won't
300 * recognize data packets as being full sized when we
301 * should, and thus we won't abide by the delayed ACK
302 * rules correctly.
303 * SACKs don't matter, we never delay an ACK when we
304 * have any of those going out.
305 */
306 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
307 if (ts) {
308 if(sack)
309 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
310 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
311 else
312 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
313 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
314 *ptr++ = htonl(tstamp); /* TSVAL */
315 *ptr++ = htonl(ts_recent); /* TSECR */
316 } else if(sack)
317 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
318 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
319 if (offer_wscale)
320 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
321}
253 322
254/* This routine actually transmits TCP packets queued in by 323/* This routine actually transmits TCP packets queued in by
255 * tcp_do_sendmsg(). This is used by both the initial 324 * tcp_do_sendmsg(). This is used by both the initial
@@ -371,7 +440,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
371 TCP_ECN_send(sk, tp, skb, tcp_header_size); 440 TCP_ECN_send(sk, tp, skb, tcp_header_size);
372 } 441 }
373 442
374 tp->af_specific->send_check(sk, th, skb->len, skb); 443 icsk->icsk_af_ops->send_check(sk, skb->len, skb);
375 444
376 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 445 if (likely(tcb->flags & TCPCB_FLAG_ACK))
377 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 446 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -381,7 +450,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
381 450
382 TCP_INC_STATS(TCP_MIB_OUTSEGS); 451 TCP_INC_STATS(TCP_MIB_OUTSEGS);
383 452
384 err = tp->af_specific->queue_xmit(skb, 0); 453 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
385 if (unlikely(err <= 0)) 454 if (unlikely(err <= 0))
386 return err; 455 return err;
387 456
@@ -621,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
621 It is minimum of user_mss and mss received with SYN. 690 It is minimum of user_mss and mss received with SYN.
622 It also does not include TCP options. 691 It also does not include TCP options.
623 692
624 tp->pmtu_cookie is last pmtu, seen by this function. 693 inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
625 694
626 tp->mss_cache is current effective sending mss, including 695 tp->mss_cache is current effective sending mss, including
627 all tcp options except for SACKs. It is evaluated, 696 all tcp options except for SACKs. It is evaluated,
@@ -631,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
631 NOTE1. rfc1122 clearly states that advertised MSS 700 NOTE1. rfc1122 clearly states that advertised MSS
632 DOES NOT include either tcp or ip options. 701 DOES NOT include either tcp or ip options.
633 702
634 NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside 703 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
635 this function. --ANK (980731) 704 are READ ONLY outside this function. --ANK (980731)
636 */ 705 */
637 706
638unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 707unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
639{ 708{
640 struct tcp_sock *tp = tcp_sk(sk); 709 struct tcp_sock *tp = tcp_sk(sk);
641 int mss_now; 710 struct inet_connection_sock *icsk = inet_csk(sk);
642
643 /* Calculate base mss without TCP options: 711 /* Calculate base mss without TCP options:
644 It is MMS_S - sizeof(tcphdr) of rfc1122 712 It is MMS_S - sizeof(tcphdr) of rfc1122
645 */ 713 */
646 mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); 714 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
715 sizeof(struct tcphdr));
647 716
648 /* Clamp it (mss_clamp does not include tcp options) */ 717 /* Clamp it (mss_clamp does not include tcp options) */
649 if (mss_now > tp->rx_opt.mss_clamp) 718 if (mss_now > tp->rx_opt.mss_clamp)
650 mss_now = tp->rx_opt.mss_clamp; 719 mss_now = tp->rx_opt.mss_clamp;
651 720
652 /* Now subtract optional transport overhead */ 721 /* Now subtract optional transport overhead */
653 mss_now -= tp->ext_header_len; 722 mss_now -= icsk->icsk_ext_hdr_len;
654 723
655 /* Then reserve room for full set of TCP options and 8 bytes of data */ 724 /* Then reserve room for full set of TCP options and 8 bytes of data */
656 if (mss_now < 48) 725 if (mss_now < 48)
@@ -664,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
664 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); 733 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
665 734
666 /* And store cached results */ 735 /* And store cached results */
667 tp->pmtu_cookie = pmtu; 736 icsk->icsk_pmtu_cookie = pmtu;
668 tp->mss_cache = mss_now; 737 tp->mss_cache = mss_now;
669 738
670 return mss_now; 739 return mss_now;
@@ -694,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
694 763
695 if (dst) { 764 if (dst) {
696 u32 mtu = dst_mtu(dst); 765 u32 mtu = dst_mtu(dst);
697 if (mtu != tp->pmtu_cookie) 766 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
698 mss_now = tcp_sync_mss(sk, mtu); 767 mss_now = tcp_sync_mss(sk, mtu);
699 } 768 }
700 769
@@ -705,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
705 xmit_size_goal = mss_now; 774 xmit_size_goal = mss_now;
706 775
707 if (doing_tso) { 776 if (doing_tso) {
708 xmit_size_goal = 65535 - 777 xmit_size_goal = (65535 -
709 tp->af_specific->net_header_len - 778 inet_csk(sk)->icsk_af_ops->net_header_len -
710 tp->ext_header_len - tp->tcp_header_len; 779 inet_csk(sk)->icsk_ext_hdr_len -
780 tp->tcp_header_len);
711 781
712 if (tp->max_window && 782 if (tp->max_window &&
713 (xmit_size_goal > (tp->max_window >> 1))) 783 (xmit_size_goal > (tp->max_window >> 1)))
@@ -723,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
723 793
724/* Congestion window validation. (RFC2861) */ 794/* Congestion window validation. (RFC2861) */
725 795
726static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) 796static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
727{ 797{
728 __u32 packets_out = tp->packets_out; 798 __u32 packets_out = tp->packets_out;
729 799
@@ -772,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
772/* This must be invoked the first time we consider transmitting 842/* This must be invoked the first time we consider transmitting
773 * SKB onto the wire. 843 * SKB onto the wire.
774 */ 844 */
775static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 845static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
776{ 846{
777 int tso_segs = tcp_skb_pcount(skb); 847 int tso_segs = tcp_skb_pcount(skb);
778 848
@@ -1422,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1422 (sysctl_tcp_retrans_collapse != 0)) 1492 (sysctl_tcp_retrans_collapse != 0))
1423 tcp_retrans_try_collapse(sk, skb, cur_mss); 1493 tcp_retrans_try_collapse(sk, skb, cur_mss);
1424 1494
1425 if(tp->af_specific->rebuild_header(sk)) 1495 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1426 return -EHOSTUNREACH; /* Routing failure or similar. */ 1496 return -EHOSTUNREACH; /* Routing failure or similar. */
1427 1497
1428 /* Some Solaris stacks overoptimize and ignore the FIN on a 1498 /* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1793,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1793/* 1863/*
1794 * Do all connect socket setups that can be done AF independent. 1864 * Do all connect socket setups that can be done AF independent.
1795 */ 1865 */
1796static inline void tcp_connect_init(struct sock *sk) 1866static void tcp_connect_init(struct sock *sk)
1797{ 1867{
1798 struct dst_entry *dst = __sk_dst_get(sk); 1868 struct dst_entry *dst = __sk_dst_get(sk);
1799 struct tcp_sock *tp = tcp_sk(sk); 1869 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13e7e6e8df16..3b7403495052 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -330,6 +330,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
330 vegas->cntRTT = 0; 330 vegas->cntRTT = 0;
331 vegas->minRTT = 0x7fffffff; 331 vegas->minRTT = 0x7fffffff;
332 } 332 }
333 /* Use normal slow start */
334 else if (tp->snd_cwnd <= tp->snd_ssthresh)
335 tcp_slow_start(tp);
336
333} 337}
334 338
335/* Extract info for Tcp socket info provided via netlink. */ 339/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195d..00840474a449 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
86#include <linux/module.h> 86#include <linux/module.h>
87#include <linux/socket.h> 87#include <linux/socket.h>
88#include <linux/sockios.h> 88#include <linux/sockios.h>
89#include <linux/igmp.h>
89#include <linux/in.h> 90#include <linux/in.h>
90#include <linux/errno.h> 91#include <linux/errno.h>
91#include <linux/timer.h> 92#include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
846csum_copy_err: 847csum_copy_err:
847 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 848 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
848 849
849 /* Clear queue. */ 850 skb_kill_datagram(sk, skb, flags);
850 if (flags&MSG_PEEK) {
851 int clear = 0;
852 spin_lock_bh(&sk->sk_receive_queue.lock);
853 if (skb == skb_peek(&sk->sk_receive_queue)) {
854 __skb_unlink(skb, &sk->sk_receive_queue);
855 clear = 1;
856 }
857 spin_unlock_bh(&sk->sk_receive_queue.lock);
858 if (clear)
859 kfree_skb(skb);
860 }
861
862 skb_free_datagram(sk, skb);
863 851
864 if (noblock) 852 if (noblock)
865 return -EAGAIN; 853 return -EAGAIN;
@@ -1001,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1001 kfree_skb(skb); 989 kfree_skb(skb);
1002 return -1; 990 return -1;
1003 } 991 }
992 nf_reset(skb);
1004 993
1005 if (up->encap_type) { 994 if (up->encap_type) {
1006 /* 995 /*
@@ -1094,7 +1083,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1094 * Otherwise, csum completion requires chacksumming packet body, 1083 * Otherwise, csum completion requires chacksumming packet body,
1095 * including udp header and folding it to skb->csum. 1084 * including udp header and folding it to skb->csum.
1096 */ 1085 */
1097static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, 1086static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1098 unsigned short ulen, u32 saddr, u32 daddr) 1087 unsigned short ulen, u32 saddr, u32 daddr)
1099{ 1088{
1100 if (uh->check == 0) { 1089 if (uh->check == 0) {
@@ -1108,7 +1097,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1108 /* Probably, we should checksum udp header (it should be in cache 1097 /* Probably, we should checksum udp header (it should be in cache
1109 * in any case) and data in tiny packets (< rx copybreak). 1098 * in any case) and data in tiny packets (< rx copybreak).
1110 */ 1099 */
1111 return 0;
1112} 1100}
1113 1101
1114/* 1102/*
@@ -1141,8 +1129,7 @@ int udp_rcv(struct sk_buff *skb)
1141 if (pskb_trim_rcsum(skb, ulen)) 1129 if (pskb_trim_rcsum(skb, ulen))
1142 goto short_packet; 1130 goto short_packet;
1143 1131
1144 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) 1132 udp_checksum_init(skb, uh, ulen, saddr, daddr);
1145 goto csum_error;
1146 1133
1147 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1134 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1148 return udp_v4_mcast_deliver(skb, uh, saddr, daddr); 1135 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
@@ -1163,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
1163 1150
1164 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1151 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1165 goto drop; 1152 goto drop;
1153 nf_reset(skb);
1166 1154
1167 /* No socket. Drop packet silently, if checksum is wrong */ 1155 /* No socket. Drop packet silently, if checksum is wrong */
1168 if (udp_checksum_complete(skb)) 1156 if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0f..850d919591d1 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv4.h>
14#include <net/inet_ecn.h> 16#include <net/inet_ecn.h>
15#include <net/ip.h> 17#include <net/ip.h>
16#include <net/xfrm.h> 18#include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
45 return xfrm_parse_spi(skb, nexthdr, spi, seq); 47 return xfrm_parse_spi(skb, nexthdr, spi, seq);
46} 48}
47 49
50#ifdef CONFIG_NETFILTER
51static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
52{
53 struct iphdr *iph = skb->nh.iph;
54
55 if (skb->dst == NULL) {
56 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
57 skb->dev))
58 goto drop;
59 }
60 return dst_input(skb);
61drop:
62 kfree_skb(skb);
63 return NET_RX_DROP;
64}
65#endif
66
48int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) 67int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
49{ 68{
50 int err; 69 int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
137 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); 156 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
138 skb->sp->len += xfrm_nr; 157 skb->sp->len += xfrm_nr;
139 158
159 nf_reset(skb);
160
140 if (decaps) { 161 if (decaps) {
141 if (!(skb->dev->flags&IFF_LOOPBACK)) { 162 if (!(skb->dev->flags&IFF_LOOPBACK)) {
142 dst_release(skb->dst); 163 dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
145 netif_rx(skb); 166 netif_rx(skb);
146 return 0; 167 return 0;
147 } else { 168 } else {
169#ifdef CONFIG_NETFILTER
170 __skb_push(skb, skb->data - skb->nh.raw);
171 skb->nh.iph->tot_len = htons(skb->len);
172 ip_send_check(skb->nh.iph);
173
174 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
175 xfrm4_rcv_encap_finish);
176 return 0;
177#else
148 return -skb->nh.iph->protocol; 178 return -skb->nh.iph->protocol;
179#endif
149 } 180 }
150 181
151drop_unlock: 182drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942a..d4df0ddd424b 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
8 * 2 of the License, or (at your option) any later version. 8 * 2 of the License, or (at your option) any later version.
9 */ 9 */
10 10
11#include <linux/compiler.h>
11#include <linux/skbuff.h> 12#include <linux/skbuff.h>
12#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/netfilter_ipv4.h>
13#include <net/inet_ecn.h> 15#include <net/inet_ecn.h>
14#include <net/ip.h> 16#include <net/ip.h>
15#include <net/xfrm.h> 17#include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
95 return ret; 97 return ret;
96} 98}
97 99
98int xfrm4_output(struct sk_buff *skb) 100static int xfrm4_output_one(struct sk_buff *skb)
99{ 101{
100 struct dst_entry *dst = skb->dst; 102 struct dst_entry *dst = skb->dst;
101 struct xfrm_state *x = dst->xfrm; 103 struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
113 goto error_nolock; 115 goto error_nolock;
114 } 116 }
115 117
116 spin_lock_bh(&x->lock); 118 do {
117 err = xfrm_state_check(x, skb); 119 spin_lock_bh(&x->lock);
118 if (err) 120 err = xfrm_state_check(x, skb);
119 goto error; 121 if (err)
122 goto error;
120 123
121 xfrm4_encap(skb); 124 xfrm4_encap(skb);
122 125
123 err = x->type->output(x, skb); 126 err = x->type->output(x, skb);
124 if (err) 127 if (err)
125 goto error; 128 goto error;
126 129
127 x->curlft.bytes += skb->len; 130 x->curlft.bytes += skb->len;
128 x->curlft.packets++; 131 x->curlft.packets++;
129 132
130 spin_unlock_bh(&x->lock); 133 spin_unlock_bh(&x->lock);
131 134
132 if (!(skb->dst = dst_pop(dst))) { 135 if (!(skb->dst = dst_pop(dst))) {
133 err = -EHOSTUNREACH; 136 err = -EHOSTUNREACH;
134 goto error_nolock; 137 goto error_nolock;
135 } 138 }
136 err = NET_XMIT_BYPASS; 139 dst = skb->dst;
140 x = dst->xfrm;
141 } while (x && !x->props.mode);
142
143 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
144 err = 0;
137 145
138out_exit: 146out_exit:
139 return err; 147 return err;
@@ -143,3 +151,33 @@ error_nolock:
143 kfree_skb(skb); 151 kfree_skb(skb);
144 goto out_exit; 152 goto out_exit;
145} 153}
154
155int xfrm4_output_finish(struct sk_buff *skb)
156{
157 int err;
158
159 while (likely((err = xfrm4_output_one(skb)) == 0)) {
160 nf_reset(skb);
161
162 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
163 skb->dst->dev, dst_output);
164 if (unlikely(err != 1))
165 break;
166
167 if (!skb->dst->xfrm)
168 return dst_output(skb);
169
170 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
171 skb->dst->dev, xfrm4_output_finish);
172 if (unlikely(err != 1))
173 break;
174 }
175
176 return err;
177}
178
179int xfrm4_output(struct sk_buff *skb)
180{
181 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
182 xfrm4_output_finish);
183}
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d23e07fc81fa..dbabf81a9b7b 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,6 +42,21 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
42 x->props.saddr = tmpl->saddr; 42 x->props.saddr = tmpl->saddr;
43 if (x->props.saddr.a4 == 0) 43 if (x->props.saddr.a4 == 0)
44 x->props.saddr.a4 = saddr->a4; 44 x->props.saddr.a4 = saddr->a4;
45 if (tmpl->mode && x->props.saddr.a4 == 0) {
46 struct rtable *rt;
47 struct flowi fl_tunnel = {
48 .nl_u = {
49 .ip4_u = {
50 .daddr = x->id.daddr.a4,
51 }
52 }
53 };
54 if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
55 &fl_tunnel, AF_INET)) {
56 x->props.saddr.a4 = rt->rt_src;
57 dst_release(&rt->u.dst);
58 }
59 }
45 x->props.mode = tmpl->mode; 60 x->props.mode = tmpl->mode;
46 x->props.reqid = tmpl->reqid; 61 x->props.reqid = tmpl->reqid;
47 x->props.family = AF_INET; 62 x->props.family = AF_INET;