aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2006-12-03 01:07:13 -0500
committerDavid S. Miller <davem@davemloft.net>2006-12-03 01:07:13 -0500
commit5b1158e909ecbe1a052203e0d8df15633f829930 (patch)
tree1d29320fd6184b982b1a8a83e7e1e9f25537d3ff /net/ipv4
parentd2483ddefd38b06053cdce7206382ca61f6282b1 (diff)
[NETFILTER]: Add NAT support for nf_conntrack
Add NAT support for nf_conntrack. Joint work of Jozsef Kadlecsik, Yasuyuki Kozakai, Martin Josefsson and myself. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter/Kconfig30
-rw-r--r--net/ipv4/netfilter/Makefile7
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c29
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c647
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c433
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c86
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c148
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c138
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c54
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c343
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c406
16 files changed, 2337 insertions, 19 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 71485276b81..01789aeaeb5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -6,7 +6,7 @@ menu "IP: Netfilter Configuration"
6 depends on INET && NETFILTER 6 depends on INET && NETFILTER
7 7
8config NF_CONNTRACK_IPV4 8config NF_CONNTRACK_IPV4
9 tristate "IPv4 support for new connection tracking (EXPERIMENTAL)" 9 tristate "IPv4 support for new connection tracking (required for NAT) (EXPERIMENTAL)"
10 depends on EXPERIMENTAL && NF_CONNTRACK 10 depends on EXPERIMENTAL && NF_CONNTRACK
11 ---help--- 11 ---help---
12 Connection tracking keeps a record of what packets have passed 12 Connection tracking keeps a record of what packets have passed
@@ -387,7 +387,7 @@ config IP_NF_TARGET_TCPMSS
387 387
388 To compile it as a module, choose M here. If unsure, say N. 388 To compile it as a module, choose M here. If unsure, say N.
389 389
390# NAT + specific targets 390# NAT + specific targets: ip_conntrack
391config IP_NF_NAT 391config IP_NF_NAT
392 tristate "Full NAT" 392 tristate "Full NAT"
393 depends on IP_NF_IPTABLES && IP_NF_CONNTRACK 393 depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
@@ -398,14 +398,30 @@ config IP_NF_NAT
398 398
399 To compile it as a module, choose M here. If unsure, say N. 399 To compile it as a module, choose M here. If unsure, say N.
400 400
401# NAT + specific targets: nf_conntrack
402config NF_NAT
403 tristate "Full NAT"
404 depends on IP_NF_IPTABLES && NF_CONNTRACK
405 help
406 The Full NAT option allows masquerading, port forwarding and other
407 forms of full Network Address Port Translation. It is controlled by
408 the `nat' table in iptables: see the man page for iptables(8).
409
410 To compile it as a module, choose M here. If unsure, say N.
411
401config IP_NF_NAT_NEEDED 412config IP_NF_NAT_NEEDED
402 bool 413 bool
403 depends on IP_NF_NAT != n 414 depends on IP_NF_NAT
415 default y
416
417config NF_NAT_NEEDED
418 bool
419 depends on NF_NAT
404 default y 420 default y
405 421
406config IP_NF_TARGET_MASQUERADE 422config IP_NF_TARGET_MASQUERADE
407 tristate "MASQUERADE target support" 423 tristate "MASQUERADE target support"
408 depends on IP_NF_NAT 424 depends on (NF_NAT || IP_NF_NAT)
409 help 425 help
410 Masquerading is a special case of NAT: all outgoing connections are 426 Masquerading is a special case of NAT: all outgoing connections are
411 changed to seem to come from a particular interface's address, and 427 changed to seem to come from a particular interface's address, and
@@ -417,7 +433,7 @@ config IP_NF_TARGET_MASQUERADE
417 433
418config IP_NF_TARGET_REDIRECT 434config IP_NF_TARGET_REDIRECT
419 tristate "REDIRECT target support" 435 tristate "REDIRECT target support"
420 depends on IP_NF_NAT 436 depends on (NF_NAT || IP_NF_NAT)
421 help 437 help
422 REDIRECT is a special case of NAT: all incoming connections are 438 REDIRECT is a special case of NAT: all incoming connections are
423 mapped onto the incoming interface's address, causing the packets to 439 mapped onto the incoming interface's address, causing the packets to
@@ -428,7 +444,7 @@ config IP_NF_TARGET_REDIRECT
428 444
429config IP_NF_TARGET_NETMAP 445config IP_NF_TARGET_NETMAP
430 tristate "NETMAP target support" 446 tristate "NETMAP target support"
431 depends on IP_NF_NAT 447 depends on (NF_NAT || IP_NF_NAT)
432 help 448 help
433 NETMAP is an implementation of static 1:1 NAT mapping of network 449 NETMAP is an implementation of static 1:1 NAT mapping of network
434 addresses. It maps the network address part, while keeping the host 450 addresses. It maps the network address part, while keeping the host
@@ -439,7 +455,7 @@ config IP_NF_TARGET_NETMAP
439 455
440config IP_NF_TARGET_SAME 456config IP_NF_TARGET_SAME
441 tristate "SAME target support" 457 tristate "SAME target support"
442 depends on IP_NF_NAT 458 depends on (NF_NAT || IP_NF_NAT)
443 help 459 help
444 This option adds a `SAME' target, which works like the standard SNAT 460 This option adds a `SAME' target, which works like the standard SNAT
445 target, but attempts to give clients the same IP for all connections. 461 target, but attempts to give clients the same IP for all connections.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 21359d83f0c..ec31690764a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -5,7 +5,12 @@
5# objects for the standalone - connection tracking / NAT 5# objects for the standalone - connection tracking / NAT
6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o 6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
7ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o 7ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
8nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
9ifneq ($(CONFIG_NF_NAT),)
10iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
11else
8iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o 12iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
13endif
9 14
10ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o 15ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
11ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o 16ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
@@ -16,6 +21,7 @@ ip_nat_h323-objs := ip_nat_helper_h323.o
16# connection tracking 21# connection tracking
17obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o 22obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
18obj-$(CONFIG_IP_NF_NAT) += ip_nat.o 23obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
24obj-$(CONFIG_NF_NAT) += nf_nat.o
19 25
20# conntrack netlink interface 26# conntrack netlink interface
21obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o 27obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -50,6 +56,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
50obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o 56obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
51obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o 57obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
52obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o 58obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
59obj-$(CONFIG_NF_NAT) += iptable_nat.o
53obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 60obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
54 61
55# matches 62# matches
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index d85d2de5044..ad66328baa5 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -44,12 +44,6 @@
44#define DEBUGP(format, args...) 44#define DEBUGP(format, args...)
45#endif 45#endif
46 46
47#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
48 : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
49 : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
50 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
51 : "*ERROR*")))
52
53#ifdef CONFIG_XFRM 47#ifdef CONFIG_XFRM
54static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) 48static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
55{ 49{
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3dbfcfac8a8..28b9233956b 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -2,7 +2,7 @@
2 (depending on route). */ 2 (depending on route). */
3 3
4/* (C) 1999-2001 Paul `Rusty' Russell 4/* (C) 1999-2001 Paul `Rusty' Russell
5 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 5 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -20,7 +20,11 @@
20#include <net/checksum.h> 20#include <net/checksum.h>
21#include <net/route.h> 21#include <net/route.h>
22#include <linux/netfilter_ipv4.h> 22#include <linux/netfilter_ipv4.h>
23#ifdef CONFIG_NF_NAT_NEEDED
24#include <net/netfilter/nf_nat_rule.h>
25#else
23#include <linux/netfilter_ipv4/ip_nat_rule.h> 26#include <linux/netfilter_ipv4/ip_nat_rule.h>
27#endif
24#include <linux/netfilter_ipv4/ip_tables.h> 28#include <linux/netfilter_ipv4/ip_tables.h>
25 29
26MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
@@ -65,23 +69,33 @@ masquerade_target(struct sk_buff **pskb,
65 const struct xt_target *target, 69 const struct xt_target *target,
66 const void *targinfo) 70 const void *targinfo)
67{ 71{
72#ifdef CONFIG_NF_NAT_NEEDED
73 struct nf_conn_nat *nat;
74#endif
68 struct ip_conntrack *ct; 75 struct ip_conntrack *ct;
69 enum ip_conntrack_info ctinfo; 76 enum ip_conntrack_info ctinfo;
70 const struct ip_nat_multi_range_compat *mr;
71 struct ip_nat_range newrange; 77 struct ip_nat_range newrange;
78 const struct ip_nat_multi_range_compat *mr;
72 struct rtable *rt; 79 struct rtable *rt;
73 __be32 newsrc; 80 __be32 newsrc;
74 81
75 IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); 82 IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
76 83
77 ct = ip_conntrack_get(*pskb, &ctinfo); 84 ct = ip_conntrack_get(*pskb, &ctinfo);
85#ifdef CONFIG_NF_NAT_NEEDED
86 nat = nfct_nat(ct);
87#endif
78 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED 88 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
79 || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 89 || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
80 90
81 /* Source address is 0.0.0.0 - locally generated packet that is 91 /* Source address is 0.0.0.0 - locally generated packet that is
82 * probably not supposed to be masqueraded. 92 * probably not supposed to be masqueraded.
83 */ 93 */
94#ifdef CONFIG_NF_NAT_NEEDED
95 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
96#else
84 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) 97 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
98#endif
85 return NF_ACCEPT; 99 return NF_ACCEPT;
86 100
87 mr = targinfo; 101 mr = targinfo;
@@ -93,7 +107,11 @@ masquerade_target(struct sk_buff **pskb,
93 } 107 }
94 108
95 write_lock_bh(&masq_lock); 109 write_lock_bh(&masq_lock);
110#ifdef CONFIG_NF_NAT_NEEDED
111 nat->masq_index = out->ifindex;
112#else
96 ct->nat.masq_index = out->ifindex; 113 ct->nat.masq_index = out->ifindex;
114#endif
97 write_unlock_bh(&masq_lock); 115 write_unlock_bh(&masq_lock);
98 116
99 /* Transfer from original range. */ 117 /* Transfer from original range. */
@@ -109,10 +127,17 @@ masquerade_target(struct sk_buff **pskb,
109static inline int 127static inline int
110device_cmp(struct ip_conntrack *i, void *ifindex) 128device_cmp(struct ip_conntrack *i, void *ifindex)
111{ 129{
130#ifdef CONFIG_NF_NAT_NEEDED
131 struct nf_conn_nat *nat = nfct_nat(i);
132#endif
112 int ret; 133 int ret;
113 134
114 read_lock_bh(&masq_lock); 135 read_lock_bh(&masq_lock);
136#ifdef CONFIG_NF_NAT_NEEDED
137 ret = (nat->masq_index == (int)(long)ifindex);
138#else
115 ret = (i->nat.masq_index == (int)(long)ifindex); 139 ret = (i->nat.masq_index == (int)(long)ifindex);
140#endif
116 read_unlock_bh(&masq_lock); 141 read_unlock_bh(&masq_lock);
117 142
118 return ret; 143 return ret;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 58a88f22710..9390e90f2b2 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -15,7 +15,11 @@
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/netfilter.h> 16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv4.h> 17#include <linux/netfilter_ipv4.h>
18#ifdef CONFIG_NF_NAT_NEEDED
19#include <net/netfilter/nf_nat_rule.h>
20#else
18#include <linux/netfilter_ipv4/ip_nat_rule.h> 21#include <linux/netfilter_ipv4/ip_nat_rule.h>
22#endif
19 23
20#define MODULENAME "NETMAP" 24#define MODULENAME "NETMAP"
21MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c0dcfe9d610..462eceb3a1b 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -1,6 +1,6 @@
1/* Redirect. Simple mapping which alters dst to a local IP address. */ 1/* Redirect. Simple mapping which alters dst to a local IP address. */
2/* (C) 1999-2001 Paul `Rusty' Russell 2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -18,7 +18,11 @@
18#include <net/protocol.h> 18#include <net/protocol.h>
19#include <net/checksum.h> 19#include <net/checksum.h>
20#include <linux/netfilter_ipv4.h> 20#include <linux/netfilter_ipv4.h>
21#ifdef CONFIG_NF_NAT_NEEDED
22#include <net/netfilter/nf_nat_rule.h>
23#else
21#include <linux/netfilter_ipv4/ip_nat_rule.h> 24#include <linux/netfilter_ipv4/ip_nat_rule.h>
25#endif
22 26
23MODULE_LICENSE("GPL"); 27MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 28MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index b38b13328d7..3dcf2941133 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -34,7 +34,11 @@
34#include <net/protocol.h> 34#include <net/protocol.h>
35#include <net/checksum.h> 35#include <net/checksum.h>
36#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
37#ifdef CONFIG_NF_NAT_NEEDED
38#include <net/netfilter/nf_nat_rule.h>
39#else
37#include <linux/netfilter_ipv4/ip_nat_rule.h> 40#include <linux/netfilter_ipv4/ip_nat_rule.h>
41#endif
38#include <linux/netfilter_ipv4/ipt_SAME.h> 42#include <linux/netfilter_ipv4/ipt_SAME.h>
39 43
40MODULE_LICENSE("GPL"); 44MODULE_LICENSE("GPL");
@@ -152,11 +156,17 @@ same_target(struct sk_buff **pskb,
152 Here we calculate the index in same->iparray which 156 Here we calculate the index in same->iparray which
153 holds the ipaddress we should use */ 157 holds the ipaddress we should use */
154 158
159#ifdef CONFIG_NF_NAT_NEEDED
160 tmpip = ntohl(t->src.u3.ip);
161
162 if (!(same->info & IPT_SAME_NODST))
163 tmpip += ntohl(t->dst.u3.ip);
164#else
155 tmpip = ntohl(t->src.ip); 165 tmpip = ntohl(t->src.ip);
156 166
157 if (!(same->info & IPT_SAME_NODST)) 167 if (!(same->info & IPT_SAME_NODST))
158 tmpip += ntohl(t->dst.ip); 168 tmpip += ntohl(t->dst.ip);
159 169#endif
160 aindex = tmpip % same->ipnum; 170 aindex = tmpip % same->ipnum;
161 171
162 new_ip = htonl(same->iparray[aindex]); 172 new_ip = htonl(same->iparray[aindex]);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 653f57b8a10..5655109dcaf 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -111,10 +111,10 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
111 return NF_ACCEPT; 111 return NF_ACCEPT;
112} 112}
113 113
114int nat_module_is_loaded = 0; 114int nf_nat_module_is_loaded = 0;
115static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) 115static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
116{ 116{
117 if (nat_module_is_loaded) 117 if (nf_nat_module_is_loaded)
118 return NF_CT_F_NAT; 118 return NF_CT_F_NAT;
119 119
120 return NF_CT_F_BASIC; 120 return NF_CT_F_BASIC;
@@ -532,3 +532,6 @@ module_init(nf_conntrack_l3proto_ipv4_init);
532module_exit(nf_conntrack_l3proto_ipv4_fini); 532module_exit(nf_conntrack_l3proto_ipv4_fini);
533 533
534EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); 534EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
535#ifdef CONFIG_NF_NAT_NEEDED
536EXPORT_SYMBOL(nf_nat_module_is_loaded);
537#endif
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
new file mode 100644
index 00000000000..86a92272b05
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -0,0 +1,647 @@
1/* NAT for netfilter; shared with compatibility layer. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/timer.h>
14#include <linux/skbuff.h>
15#include <linux/vmalloc.h>
16#include <net/checksum.h>
17#include <net/icmp.h>
18#include <net/ip.h>
19#include <net/tcp.h> /* For tcp_prot in getorigdst */
20#include <linux/icmp.h>
21#include <linux/udp.h>
22#include <linux/jhash.h>
23
24#include <linux/netfilter_ipv4.h>
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_nat.h>
28#include <net/netfilter/nf_nat_protocol.h>
29#include <net/netfilter/nf_nat_core.h>
30#include <net/netfilter/nf_nat_helper.h>
31#include <net/netfilter/nf_conntrack_helper.h>
32#include <net/netfilter/nf_conntrack_l3proto.h>
33#include <net/netfilter/nf_conntrack_l4proto.h>
34
35#if 0
36#define DEBUGP printk
37#else
38#define DEBUGP(format, args...)
39#endif
40
41static DEFINE_RWLOCK(nf_nat_lock);
42
43static struct nf_conntrack_l3proto *l3proto = NULL;
44
45/* Calculated at init based on memory size */
46static unsigned int nf_nat_htable_size;
47
48static struct list_head *bysource;
49
50#define MAX_IP_NAT_PROTO 256
51static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
52
53static inline struct nf_nat_protocol *
54__nf_nat_proto_find(u_int8_t protonum)
55{
56 return nf_nat_protos[protonum];
57}
58
59struct nf_nat_protocol *
60nf_nat_proto_find_get(u_int8_t protonum)
61{
62 struct nf_nat_protocol *p;
63
64 /* we need to disable preemption to make sure 'p' doesn't get
65 * removed until we've grabbed the reference */
66 preempt_disable();
67 p = __nf_nat_proto_find(protonum);
68 if (!try_module_get(p->me))
69 p = &nf_nat_unknown_protocol;
70 preempt_enable();
71
72 return p;
73}
74EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
75
76void
77nf_nat_proto_put(struct nf_nat_protocol *p)
78{
79 module_put(p->me);
80}
81EXPORT_SYMBOL_GPL(nf_nat_proto_put);
82
83/* We keep an extra hash for each conntrack, for fast searching. */
84static inline unsigned int
85hash_by_src(const struct nf_conntrack_tuple *tuple)
86{
87 /* Original src, to ensure we map it consistently if poss. */
88 return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all,
89 tuple->dst.protonum, 0) % nf_nat_htable_size;
90}
91
92/* Noone using conntrack by the time this called. */
93static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
94{
95 struct nf_conn_nat *nat;
96 if (!(conn->status & IPS_NAT_DONE_MASK))
97 return;
98
99 nat = nfct_nat(conn);
100 write_lock_bh(&nf_nat_lock);
101 list_del(&nat->info.bysource);
102 write_unlock_bh(&nf_nat_lock);
103}
104
105/* Is this tuple already taken? (not by us) */
106int
107nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
108 const struct nf_conn *ignored_conntrack)
109{
110 /* Conntrack tracking doesn't keep track of outgoing tuples; only
111 incoming ones. NAT means they don't have a fixed mapping,
112 so we invert the tuple and look for the incoming reply.
113
114 We could keep a separate hash if this proves too slow. */
115 struct nf_conntrack_tuple reply;
116
117 nf_ct_invert_tuplepr(&reply, tuple);
118 return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
119}
120EXPORT_SYMBOL(nf_nat_used_tuple);
121
122/* If we source map this tuple so reply looks like reply_tuple, will
123 * that meet the constraints of range. */
124static int
125in_range(const struct nf_conntrack_tuple *tuple,
126 const struct nf_nat_range *range)
127{
128 struct nf_nat_protocol *proto;
129
130 proto = __nf_nat_proto_find(tuple->dst.protonum);
131 /* If we are supposed to map IPs, then we must be in the
132 range specified, otherwise let this drag us onto a new src IP. */
133 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
134 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
135 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
136 return 0;
137 }
138
139 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
140 proto->in_range(tuple, IP_NAT_MANIP_SRC,
141 &range->min, &range->max))
142 return 1;
143
144 return 0;
145}
146
147static inline int
148same_src(const struct nf_conn *ct,
149 const struct nf_conntrack_tuple *tuple)
150{
151 const struct nf_conntrack_tuple *t;
152
153 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
154 return (t->dst.protonum == tuple->dst.protonum &&
155 t->src.u3.ip == tuple->src.u3.ip &&
156 t->src.u.all == tuple->src.u.all);
157}
158
159/* Only called for SRC manip */
160static int
161find_appropriate_src(const struct nf_conntrack_tuple *tuple,
162 struct nf_conntrack_tuple *result,
163 const struct nf_nat_range *range)
164{
165 unsigned int h = hash_by_src(tuple);
166 struct nf_conn_nat *nat;
167 struct nf_conn *ct;
168
169 read_lock_bh(&nf_nat_lock);
170 list_for_each_entry(nat, &bysource[h], info.bysource) {
171 ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data));
172 if (same_src(ct, tuple)) {
173 /* Copy source part from reply tuple. */
174 nf_ct_invert_tuplepr(result,
175 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
176 result->dst = tuple->dst;
177
178 if (in_range(result, range)) {
179 read_unlock_bh(&nf_nat_lock);
180 return 1;
181 }
182 }
183 }
184 read_unlock_bh(&nf_nat_lock);
185 return 0;
186}
187
188/* For [FUTURE] fragmentation handling, we want the least-used
189 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
190 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
191 1-65535, we don't do pro-rata allocation based on ports; we choose
192 the ip with the lowest src-ip/dst-ip/proto usage.
193*/
194static void
195find_best_ips_proto(struct nf_conntrack_tuple *tuple,
196 const struct nf_nat_range *range,
197 const struct nf_conn *ct,
198 enum nf_nat_manip_type maniptype)
199{
200 __be32 *var_ipp;
201 /* Host order */
202 u_int32_t minip, maxip, j;
203
204 /* No IP mapping? Do nothing. */
205 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
206 return;
207
208 if (maniptype == IP_NAT_MANIP_SRC)
209 var_ipp = &tuple->src.u3.ip;
210 else
211 var_ipp = &tuple->dst.u3.ip;
212
213 /* Fast path: only one choice. */
214 if (range->min_ip == range->max_ip) {
215 *var_ipp = range->min_ip;
216 return;
217 }
218
219 /* Hashing source and destination IPs gives a fairly even
220 * spread in practice (if there are a small number of IPs
221 * involved, there usually aren't that many connections
222 * anyway). The consistency means that servers see the same
223 * client coming from the same IP (some Internet Banking sites
224 * like this), even across reboots. */
225 minip = ntohl(range->min_ip);
226 maxip = ntohl(range->max_ip);
227 j = jhash_2words((__force u32)tuple->src.u3.ip,
228 (__force u32)tuple->dst.u3.ip, 0);
229 *var_ipp = htonl(minip + j % (maxip - minip + 1));
230}
231
232/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
233 * we change the source to map into the range. For NF_IP_PRE_ROUTING
234 * and NF_IP_LOCAL_OUT, we change the destination to map into the
235 * range. It might not be possible to get a unique tuple, but we try.
236 * At worst (or if we race), we will end up with a final duplicate in
237 * __ip_conntrack_confirm and drop the packet. */
238static void
239get_unique_tuple(struct nf_conntrack_tuple *tuple,
240 const struct nf_conntrack_tuple *orig_tuple,
241 const struct nf_nat_range *range,
242 struct nf_conn *ct,
243 enum nf_nat_manip_type maniptype)
244{
245 struct nf_nat_protocol *proto;
246
247 /* 1) If this srcip/proto/src-proto-part is currently mapped,
248 and that same mapping gives a unique tuple within the given
249 range, use that.
250
251 This is only required for source (ie. NAT/masq) mappings.
252 So far, we don't do local source mappings, so multiple
253 manips not an issue. */
254 if (maniptype == IP_NAT_MANIP_SRC) {
255 if (find_appropriate_src(orig_tuple, tuple, range)) {
256 DEBUGP("get_unique_tuple: Found current src map\n");
257 if (!nf_nat_used_tuple(tuple, ct))
258 return;
259 }
260 }
261
262 /* 2) Select the least-used IP/proto combination in the given
263 range. */
264 *tuple = *orig_tuple;
265 find_best_ips_proto(tuple, range, ct, maniptype);
266
267 /* 3) The per-protocol part of the manip is made to map into
268 the range to make a unique tuple. */
269
270 proto = nf_nat_proto_find_get(orig_tuple->dst.protonum);
271
272 /* Only bother mapping if it's not already in range and unique */
273 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
274 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
275 !nf_nat_used_tuple(tuple, ct)) {
276 nf_nat_proto_put(proto);
277 return;
278 }
279
280 /* Last change: get protocol to try to obtain unique tuple. */
281 proto->unique_tuple(tuple, range, maniptype, ct);
282
283 nf_nat_proto_put(proto);
284}
285
286unsigned int
287nf_nat_setup_info(struct nf_conn *ct,
288 const struct nf_nat_range *range,
289 unsigned int hooknum)
290{
291 struct nf_conntrack_tuple curr_tuple, new_tuple;
292 struct nf_conn_nat *nat = nfct_nat(ct);
293 struct nf_nat_info *info = &nat->info;
294 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
295 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
296
297 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
298 hooknum == NF_IP_POST_ROUTING ||
299 hooknum == NF_IP_LOCAL_IN ||
300 hooknum == NF_IP_LOCAL_OUT);
301 BUG_ON(nf_nat_initialized(ct, maniptype));
302
303 /* What we've got will look like inverse of reply. Normally
304 this is what is in the conntrack, except for prior
305 manipulations (future optimization: if num_manips == 0,
306 orig_tp =
307 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
308 nf_ct_invert_tuplepr(&curr_tuple,
309 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
310
311 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
312
313 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
314 struct nf_conntrack_tuple reply;
315
316 /* Alter conntrack table so will recognize replies. */
317 nf_ct_invert_tuplepr(&reply, &new_tuple);
318 nf_conntrack_alter_reply(ct, &reply);
319
320 /* Non-atomic: we own this at the moment. */
321 if (maniptype == IP_NAT_MANIP_SRC)
322 ct->status |= IPS_SRC_NAT;
323 else
324 ct->status |= IPS_DST_NAT;
325 }
326
327 /* Place in source hash if this is the first time. */
328 if (have_to_hash) {
329 unsigned int srchash;
330
331 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
332 write_lock_bh(&nf_nat_lock);
333 list_add(&info->bysource, &bysource[srchash]);
334 write_unlock_bh(&nf_nat_lock);
335 }
336
337 /* It's done. */
338 if (maniptype == IP_NAT_MANIP_DST)
339 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
340 else
341 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
342
343 return NF_ACCEPT;
344}
345EXPORT_SYMBOL(nf_nat_setup_info);
346
347/* Returns true if succeeded. */
348static int
349manip_pkt(u_int16_t proto,
350 struct sk_buff **pskb,
351 unsigned int iphdroff,
352 const struct nf_conntrack_tuple *target,
353 enum nf_nat_manip_type maniptype)
354{
355 struct iphdr *iph;
356 struct nf_nat_protocol *p;
357
358 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
359 return 0;
360
361 iph = (void *)(*pskb)->data + iphdroff;
362
363 /* Manipulate protcol part. */
364 p = nf_nat_proto_find_get(proto);
365 if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
366 nf_nat_proto_put(p);
367 return 0;
368 }
369 nf_nat_proto_put(p);
370
371 iph = (void *)(*pskb)->data + iphdroff;
372
373 if (maniptype == IP_NAT_MANIP_SRC) {
374 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
375 iph->saddr = target->src.u3.ip;
376 } else {
377 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
378 iph->daddr = target->dst.u3.ip;
379 }
380 return 1;
381}
382
383/* Do packet manipulations according to nf_nat_setup_info. */
384unsigned int nf_nat_packet(struct nf_conn *ct,
385 enum ip_conntrack_info ctinfo,
386 unsigned int hooknum,
387 struct sk_buff **pskb)
388{
389 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
390 unsigned long statusbit;
391 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
392
393 if (mtype == IP_NAT_MANIP_SRC)
394 statusbit = IPS_SRC_NAT;
395 else
396 statusbit = IPS_DST_NAT;
397
398 /* Invert if this is reply dir. */
399 if (dir == IP_CT_DIR_REPLY)
400 statusbit ^= IPS_NAT_MASK;
401
402 /* Non-atomic: these bits don't change. */
403 if (ct->status & statusbit) {
404 struct nf_conntrack_tuple target;
405
406 /* We are aiming to look like inverse of other direction. */
407 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
408
409 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
410 return NF_DROP;
411 }
412 return NF_ACCEPT;
413}
414EXPORT_SYMBOL_GPL(nf_nat_packet);
415
416/* Dir is direction ICMP is coming from (opposite to packet it contains) */
417int nf_nat_icmp_reply_translation(struct nf_conn *ct,
418 enum ip_conntrack_info ctinfo,
419 unsigned int hooknum,
420 struct sk_buff **pskb)
421{
422 struct {
423 struct icmphdr icmp;
424 struct iphdr ip;
425 } *inside;
426 struct nf_conntrack_tuple inner, target;
427 int hdrlen = (*pskb)->nh.iph->ihl * 4;
428 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
429 unsigned long statusbit;
430 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
431
432 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
433 return 0;
434
435 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
436
437 /* We're actually going to mangle it beyond trivial checksum
438 adjustment, so make sure the current checksum is correct. */
439 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
440 return 0;
441
442 /* Must be RELATED */
443 NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
444 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
445
446 /* Redirects on non-null nats must be dropped, else they'll
447 start talking to each other without our translation, and be
448 confused... --RR */
449 if (inside->icmp.type == ICMP_REDIRECT) {
450 /* If NAT isn't finished, assume it and drop. */
451 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
452 return 0;
453
454 if (ct->status & IPS_NAT_MASK)
455 return 0;
456 }
457
458 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
459 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
460
461 if (!nf_ct_get_tuple(*pskb,
462 (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
463 (*pskb)->nh.iph->ihl*4 +
464 sizeof(struct icmphdr) + inside->ip.ihl*4,
465 (u_int16_t)AF_INET,
466 inside->ip.protocol,
467 &inner,
468 l3proto,
469 __nf_ct_l4proto_find((u_int16_t)PF_INET,
470 inside->ip.protocol)))
471 return 0;
472
473 /* Change inner back to look like incoming packet. We do the
474 opposite manip on this hook to normal, because it might not
475 pass all hooks (locally-generated ICMP). Consider incoming
476 packet: PREROUTING (DST manip), routing produces ICMP, goes
477 through POSTROUTING (which must correct the DST manip). */
478 if (!manip_pkt(inside->ip.protocol, pskb,
479 (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
480 &ct->tuplehash[!dir].tuple,
481 !manip))
482 return 0;
483
484 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
485 /* Reloading "inside" here since manip_pkt inner. */
486 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
487 inside->icmp.checksum = 0;
488 inside->icmp.checksum =
489 csum_fold(skb_checksum(*pskb, hdrlen,
490 (*pskb)->len - hdrlen, 0));
491 }
492
493 /* Change outer to look the reply to an incoming packet
494 * (proto 0 means don't invert per-proto part). */
495 if (manip == IP_NAT_MANIP_SRC)
496 statusbit = IPS_SRC_NAT;
497 else
498 statusbit = IPS_DST_NAT;
499
500 /* Invert if this is reply dir. */
501 if (dir == IP_CT_DIR_REPLY)
502 statusbit ^= IPS_NAT_MASK;
503
504 if (ct->status & statusbit) {
505 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
506 if (!manip_pkt(0, pskb, 0, &target, manip))
507 return 0;
508 }
509
510 return 1;
511}
512EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
513
514/* Protocol registration. */
515int nf_nat_protocol_register(struct nf_nat_protocol *proto)
516{
517 int ret = 0;
518
519 write_lock_bh(&nf_nat_lock);
520 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
521 ret = -EBUSY;
522 goto out;
523 }
524 nf_nat_protos[proto->protonum] = proto;
525 out:
526 write_unlock_bh(&nf_nat_lock);
527 return ret;
528}
529EXPORT_SYMBOL(nf_nat_protocol_register);
530
531/* Noone stores the protocol anywhere; simply delete it. */
532void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
533{
534 write_lock_bh(&nf_nat_lock);
535 nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol;
536 write_unlock_bh(&nf_nat_lock);
537
538 /* Someone could be still looking at the proto in a bh. */
539 synchronize_net();
540}
541EXPORT_SYMBOL(nf_nat_protocol_unregister);
542
543#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
544 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
545int
546nf_nat_port_range_to_nfattr(struct sk_buff *skb,
547 const struct nf_nat_range *range)
548{
549 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
550 &range->min.tcp.port);
551 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
552 &range->max.tcp.port);
553
554 return 0;
555
556nfattr_failure:
557 return -1;
558}
559EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range);
560
561int
562nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
563{
564 int ret = 0;
565
566 /* we have to return whether we actually parsed something or not */
567
568 if (tb[CTA_PROTONAT_PORT_MIN-1]) {
569 ret = 1;
570 range->min.tcp.port =
571 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
572 }
573
574 if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
575 if (ret)
576 range->max.tcp.port = range->min.tcp.port;
577 } else {
578 ret = 1;
579 range->max.tcp.port =
580 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
581 }
582
583 return ret;
584}
585EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
586#endif
587
588static int __init nf_nat_init(void)
589{
590 size_t i;
591
592 /* Leave them the same for the moment. */
593 nf_nat_htable_size = nf_conntrack_htable_size;
594
595 /* One vmalloc for both hash tables */
596 bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
597 if (!bysource)
598 return -ENOMEM;
599
600 /* Sew in builtin protocols. */
601 write_lock_bh(&nf_nat_lock);
602 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
603 nf_nat_protos[i] = &nf_nat_unknown_protocol;
604 nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp;
605 nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp;
606 nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp;
607 write_unlock_bh(&nf_nat_lock);
608
609 for (i = 0; i < nf_nat_htable_size; i++) {
610 INIT_LIST_HEAD(&bysource[i]);
611 }
612
613 /* FIXME: Man, this is a hack. <SIGH> */
614 NF_CT_ASSERT(nf_conntrack_destroyed == NULL);
615 nf_conntrack_destroyed = &nf_nat_cleanup_conntrack;
616
617 /* Initialize fake conntrack so that NAT will skip it */
618 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
619
620 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
621 return 0;
622}
623
624/* Clear NAT section of all conntracks, in case we're loaded again. */
625static int clean_nat(struct nf_conn *i, void *data)
626{
627 struct nf_conn_nat *nat = nfct_nat(i);
628
629 if (!nat)
630 return 0;
631 memset(nat, 0, sizeof(nat));
632 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
633 return 0;
634}
635
636static void __exit nf_nat_cleanup(void)
637{
638 nf_ct_iterate_cleanup(&clean_nat, NULL);
639 nf_conntrack_destroyed = NULL;
640 vfree(bysource);
641 nf_ct_l3proto_put(l3proto);
642}
643
644MODULE_LICENSE("GPL");
645
646module_init(nf_nat_init);
647module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
new file mode 100644
index 00000000000..98fbfc84d18
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -0,0 +1,433 @@
1/* ip_nat_helper.c - generic support functions for NAT helpers
2 *
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11#include <linux/kmod.h>
12#include <linux/types.h>
13#include <linux/timer.h>
14#include <linux/skbuff.h>
15#include <linux/tcp.h>
16#include <linux/udp.h>
17#include <net/checksum.h>
18#include <net/tcp.h>
19
20#include <linux/netfilter_ipv4.h>
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_helper.h>
23#include <net/netfilter/nf_conntrack_expect.h>
24#include <net/netfilter/nf_nat.h>
25#include <net/netfilter/nf_nat_protocol.h>
26#include <net/netfilter/nf_nat_core.h>
27#include <net/netfilter/nf_nat_helper.h>
28
29#if 0
30#define DEBUGP printk
31#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
32#else
33#define DEBUGP(format, args...)
34#define DUMP_OFFSET(x)
35#endif
36
37static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
38
39/* Setup TCP sequence correction given this change at this sequence */
40static inline void
41adjust_tcp_sequence(u32 seq,
42 int sizediff,
43 struct nf_conn *ct,
44 enum ip_conntrack_info ctinfo)
45{
46 int dir;
47 struct nf_nat_seq *this_way, *other_way;
48 struct nf_conn_nat *nat = nfct_nat(ct);
49
50 DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n",
51 (*skb)->len, new_size);
52
53 dir = CTINFO2DIR(ctinfo);
54
55 this_way = &nat->info.seq[dir];
56 other_way = &nat->info.seq[!dir];
57
58 DEBUGP("nf_nat_resize_packet: Seq_offset before: ");
59 DUMP_OFFSET(this_way);
60
61 spin_lock_bh(&nf_nat_seqofs_lock);
62
63 /* SYN adjust. If it's uninitialized, or this is after last
64 * correction, record it: we don't handle more than one
65 * adjustment in the window, but do deal with common case of a
66 * retransmit */
67 if (this_way->offset_before == this_way->offset_after ||
68 before(this_way->correction_pos, seq)) {
69 this_way->correction_pos = seq;
70 this_way->offset_before = this_way->offset_after;
71 this_way->offset_after += sizediff;
72 }
73 spin_unlock_bh(&nf_nat_seqofs_lock);
74
75 DEBUGP("nf_nat_resize_packet: Seq_offset after: ");
76 DUMP_OFFSET(this_way);
77}
78
79/* Frobs data inside this packet, which is linear. */
80static void mangle_contents(struct sk_buff *skb,
81 unsigned int dataoff,
82 unsigned int match_offset,
83 unsigned int match_len,
84 const char *rep_buffer,
85 unsigned int rep_len)
86{
87 unsigned char *data;
88
89 BUG_ON(skb_is_nonlinear(skb));
90 data = (unsigned char *)skb->nh.iph + dataoff;
91
92 /* move post-replacement */
93 memmove(data + match_offset + rep_len,
94 data + match_offset + match_len,
95 skb->tail - (data + match_offset + match_len));
96
97 /* insert data from buffer */
98 memcpy(data + match_offset, rep_buffer, rep_len);
99
100 /* update skb info */
101 if (rep_len > match_len) {
102 DEBUGP("nf_nat_mangle_packet: Extending packet by "
103 "%u from %u bytes\n", rep_len - match_len,
104 skb->len);
105 skb_put(skb, rep_len - match_len);
106 } else {
107 DEBUGP("nf_nat_mangle_packet: Shrinking packet from "
108 "%u from %u bytes\n", match_len - rep_len,
109 skb->len);
110 __skb_trim(skb, skb->len + rep_len - match_len);
111 }
112
113 /* fix IP hdr checksum information */
114 skb->nh.iph->tot_len = htons(skb->len);
115 ip_send_check(skb->nh.iph);
116}
117
118/* Unusual, but possible case. */
119static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
120{
121 struct sk_buff *nskb;
122
123 if ((*pskb)->len + extra > 65535)
124 return 0;
125
126 nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
127 if (!nskb)
128 return 0;
129
130 /* Transfer socket to new skb. */
131 if ((*pskb)->sk)
132 skb_set_owner_w(nskb, (*pskb)->sk);
133 kfree_skb(*pskb);
134 *pskb = nskb;
135 return 1;
136}
137
138/* Generic function for mangling variable-length address changes inside
139 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
140 * command in FTP).
141 *
142 * Takes care about all the nasty sequence number changes, checksumming,
143 * skb enlargement, ...
144 *
145 * */
146int
147nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
148 struct nf_conn *ct,
149 enum ip_conntrack_info ctinfo,
150 unsigned int match_offset,
151 unsigned int match_len,
152 const char *rep_buffer,
153 unsigned int rep_len)
154{
155 struct iphdr *iph;
156 struct tcphdr *tcph;
157 int oldlen, datalen;
158
159 if (!skb_make_writable(pskb, (*pskb)->len))
160 return 0;
161
162 if (rep_len > match_len &&
163 rep_len - match_len > skb_tailroom(*pskb) &&
164 !enlarge_skb(pskb, rep_len - match_len))
165 return 0;
166
167 SKB_LINEAR_ASSERT(*pskb);
168
169 iph = (*pskb)->nh.iph;
170 tcph = (void *)iph + iph->ihl*4;
171
172 oldlen = (*pskb)->len - iph->ihl*4;
173 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
174 match_offset, match_len, rep_buffer, rep_len);
175
176 datalen = (*pskb)->len - iph->ihl*4;
177 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
178 tcph->check = 0;
179 tcph->check = tcp_v4_check(tcph, datalen,
180 iph->saddr, iph->daddr,
181 csum_partial((char *)tcph,
182 datalen, 0));
183 } else
184 nf_proto_csum_replace2(&tcph->check, *pskb,
185 htons(oldlen), htons(datalen), 1);
186
187 if (rep_len != match_len) {
188 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
189 adjust_tcp_sequence(ntohl(tcph->seq),
190 (int)rep_len - (int)match_len,
191 ct, ctinfo);
192 /* Tell TCP window tracking about seq change */
193 nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
194 ct, CTINFO2DIR(ctinfo));
195 }
196 return 1;
197}
198EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
199
200/* Generic function for mangling variable-length address changes inside
201 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
202 * command in the Amanda protocol)
203 *
204 * Takes care about all the nasty sequence number changes, checksumming,
205 * skb enlargement, ...
206 *
207 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
208 * should be fairly easy to do.
209 */
210int
211nf_nat_mangle_udp_packet(struct sk_buff **pskb,
212 struct nf_conn *ct,
213 enum ip_conntrack_info ctinfo,
214 unsigned int match_offset,
215 unsigned int match_len,
216 const char *rep_buffer,
217 unsigned int rep_len)
218{
219 struct iphdr *iph;
220 struct udphdr *udph;
221 int datalen, oldlen;
222
223 /* UDP helpers might accidentally mangle the wrong packet */
224 iph = (*pskb)->nh.iph;
225 if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
226 match_offset + match_len)
227 return 0;
228
229 if (!skb_make_writable(pskb, (*pskb)->len))
230 return 0;
231
232 if (rep_len > match_len &&
233 rep_len - match_len > skb_tailroom(*pskb) &&
234 !enlarge_skb(pskb, rep_len - match_len))
235 return 0;
236
237 iph = (*pskb)->nh.iph;
238 udph = (void *)iph + iph->ihl*4;
239
240 oldlen = (*pskb)->len - iph->ihl*4;
241 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
242 match_offset, match_len, rep_buffer, rep_len);
243
244 /* update the length of the UDP packet */
245 datalen = (*pskb)->len - iph->ihl*4;
246 udph->len = htons(datalen);
247
248 /* fix udp checksum if udp checksum was previously calculated */
249 if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
250 return 1;
251
252 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
253 udph->check = 0;
254 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
255 datalen, IPPROTO_UDP,
256 csum_partial((char *)udph,
257 datalen, 0));
258 if (!udph->check)
259 udph->check = CSUM_MANGLED_0;
260 } else
261 nf_proto_csum_replace2(&udph->check, *pskb,
262 htons(oldlen), htons(datalen), 1);
263
264 return 1;
265}
266EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
267
268/* Adjust one found SACK option including checksum correction */
269static void
270sack_adjust(struct sk_buff *skb,
271 struct tcphdr *tcph,
272 unsigned int sackoff,
273 unsigned int sackend,
274 struct nf_nat_seq *natseq)
275{
276 while (sackoff < sackend) {
277 struct tcp_sack_block_wire *sack;
278 __be32 new_start_seq, new_end_seq;
279
280 sack = (void *)skb->data + sackoff;
281 if (after(ntohl(sack->start_seq) - natseq->offset_before,
282 natseq->correction_pos))
283 new_start_seq = htonl(ntohl(sack->start_seq)
284 - natseq->offset_after);
285 else
286 new_start_seq = htonl(ntohl(sack->start_seq)
287 - natseq->offset_before);
288
289 if (after(ntohl(sack->end_seq) - natseq->offset_before,
290 natseq->correction_pos))
291 new_end_seq = htonl(ntohl(sack->end_seq)
292 - natseq->offset_after);
293 else
294 new_end_seq = htonl(ntohl(sack->end_seq)
295 - natseq->offset_before);
296
297 DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
298 ntohl(sack->start_seq), new_start_seq,
299 ntohl(sack->end_seq), new_end_seq);
300
301 nf_proto_csum_replace4(&tcph->check, skb,
302 sack->start_seq, new_start_seq, 0);
303 nf_proto_csum_replace4(&tcph->check, skb,
304 sack->end_seq, new_end_seq, 0);
305 sack->start_seq = new_start_seq;
306 sack->end_seq = new_end_seq;
307 sackoff += sizeof(*sack);
308 }
309}
310
311/* TCP SACK sequence number adjustment */
312static inline unsigned int
313nf_nat_sack_adjust(struct sk_buff **pskb,
314 struct tcphdr *tcph,
315 struct nf_conn *ct,
316 enum ip_conntrack_info ctinfo)
317{
318 unsigned int dir, optoff, optend;
319 struct nf_conn_nat *nat = nfct_nat(ct);
320
321 optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
322 optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
323
324 if (!skb_make_writable(pskb, optend))
325 return 0;
326
327 dir = CTINFO2DIR(ctinfo);
328
329 while (optoff < optend) {
330 /* Usually: option, length. */
331 unsigned char *op = (*pskb)->data + optoff;
332
333 switch (op[0]) {
334 case TCPOPT_EOL:
335 return 1;
336 case TCPOPT_NOP:
337 optoff++;
338 continue;
339 default:
340 /* no partial options */
341 if (optoff + 1 == optend ||
342 optoff + op[1] > optend ||
343 op[1] < 2)
344 return 0;
345 if (op[0] == TCPOPT_SACK &&
346 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
347 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
348 sack_adjust(*pskb, tcph, optoff+2,
349 optoff+op[1],
350 &nat->info.seq[!dir]);
351 optoff += op[1];
352 }
353 }
354 return 1;
355}
356
357/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
358int
359nf_nat_seq_adjust(struct sk_buff **pskb,
360 struct nf_conn *ct,
361 enum ip_conntrack_info ctinfo)
362{
363 struct tcphdr *tcph;
364 int dir;
365 __be32 newseq, newack;
366 struct nf_conn_nat *nat = nfct_nat(ct);
367 struct nf_nat_seq *this_way, *other_way;
368
369 dir = CTINFO2DIR(ctinfo);
370
371 this_way = &nat->info.seq[dir];
372 other_way = &nat->info.seq[!dir];
373
374 if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
375 return 0;
376
377 tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
378 if (after(ntohl(tcph->seq), this_way->correction_pos))
379 newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
380 else
381 newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
382
383 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
384 other_way->correction_pos))
385 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
386 else
387 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
388
389 nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
390 nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
391
392 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
393 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
394 ntohl(newack));
395
396 tcph->seq = newseq;
397 tcph->ack_seq = newack;
398
399 if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
400 return 0;
401
402 nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
403
404 return 1;
405}
406EXPORT_SYMBOL(nf_nat_seq_adjust);
407
408/* Setup NAT on this expected conntrack so it follows master. */
409/* If we fail to get a free NAT slot, we'll get dropped on confirm */
410void nf_nat_follow_master(struct nf_conn *ct,
411 struct nf_conntrack_expect *exp)
412{
413 struct nf_nat_range range;
414
415 /* This must be a fresh one. */
416 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
417
418 /* Change src to where master sends to */
419 range.flags = IP_NAT_RANGE_MAP_IPS;
420 range.min_ip = range.max_ip
421 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
422 /* hook doesn't matter, but it has to do source manip */
423 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
424
425 /* For DST manip, map port here to where it's expected. */
426 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
427 range.min = range.max = exp->saved_proto;
428 range.min_ip = range.max_ip
429 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
430 /* hook doesn't matter, but it has to do destination manip */
431 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
432}
433EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
new file mode 100644
index 00000000000..dcfd772972d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -0,0 +1,86 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/icmp.h>
13
14#include <linux/netfilter.h>
15#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19
20static int
21icmp_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type maniptype,
23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max)
25{
26 return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
28}
29
30static int
31icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
32 const struct nf_nat_range *range,
33 enum nf_nat_manip_type maniptype,
34 const struct nf_conn *ct)
35{
36 static u_int16_t id;
37 unsigned int range_size;
38 unsigned int i;
39
40 range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
41 /* If no range specified... */
42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
43 range_size = 0xFFFF;
44
45 for (i = 0; i < range_size; i++, id++) {
46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
47 (id % range_size));
48 if (!nf_nat_used_tuple(tuple, ct))
49 return 1;
50 }
51 return 0;
52}
53
54static int
55icmp_manip_pkt(struct sk_buff **pskb,
56 unsigned int iphdroff,
57 const struct nf_conntrack_tuple *tuple,
58 enum nf_nat_manip_type maniptype)
59{
60 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
61 struct icmphdr *hdr;
62 unsigned int hdroff = iphdroff + iph->ihl*4;
63
64 if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
65 return 0;
66
67 hdr = (struct icmphdr *)((*pskb)->data + hdroff);
68 nf_proto_csum_replace2(&hdr->checksum, *pskb,
69 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
70 hdr->un.echo.id = tuple->src.u.icmp.id;
71 return 1;
72}
73
74struct nf_nat_protocol nf_nat_protocol_icmp = {
75 .name = "ICMP",
76 .protonum = IPPROTO_ICMP,
77 .me = THIS_MODULE,
78 .manip_pkt = icmp_manip_pkt,
79 .in_range = icmp_in_range,
80 .unique_tuple = icmp_unique_tuple,
81#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
82 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
83 .range_to_nfattr = nf_nat_port_range_to_nfattr,
84 .nfattr_to_range = nf_nat_port_nfattr_to_range,
85#endif
86};
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 00000000000..7e26a7e9bee
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,148 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/tcp.h>
13
14#include <linux/netfilter.h>
15#include <linux/netfilter/nfnetlink_conntrack.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19#include <net/netfilter/nf_nat_core.h>
20
21static int
22tcp_in_range(const struct nf_conntrack_tuple *tuple,
23 enum nf_nat_manip_type maniptype,
24 const union nf_conntrack_man_proto *min,
25 const union nf_conntrack_man_proto *max)
26{
27 __be16 port;
28
29 if (maniptype == IP_NAT_MANIP_SRC)
30 port = tuple->src.u.tcp.port;
31 else
32 port = tuple->dst.u.tcp.port;
33
34 return ntohs(port) >= ntohs(min->tcp.port) &&
35 ntohs(port) <= ntohs(max->tcp.port);
36}
37
38static int
39tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
40 const struct nf_nat_range *range,
41 enum nf_nat_manip_type maniptype,
42 const struct nf_conn *ct)
43{
44 static u_int16_t port;
45 __be16 *portptr;
46 unsigned int range_size, min, i;
47
48 if (maniptype == IP_NAT_MANIP_SRC)
49 portptr = &tuple->src.u.tcp.port;
50 else
51 portptr = &tuple->dst.u.tcp.port;
52
53 /* If no range specified... */
54 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
55 /* If it's dst rewrite, can't change port */
56 if (maniptype == IP_NAT_MANIP_DST)
57 return 0;
58
59 /* Map privileged onto privileged. */
60 if (ntohs(*portptr) < 1024) {
61 /* Loose convention: >> 512 is credential passing */
62 if (ntohs(*portptr)<512) {
63 min = 1;
64 range_size = 511 - min + 1;
65 } else {
66 min = 600;
67 range_size = 1023 - min + 1;
68 }
69 } else {
70 min = 1024;
71 range_size = 65535 - 1024 + 1;
72 }
73 } else {
74 min = ntohs(range->min.tcp.port);
75 range_size = ntohs(range->max.tcp.port) - min + 1;
76 }
77
78 for (i = 0; i < range_size; i++, port++) {
79 *portptr = htons(min + port % range_size);
80 if (!nf_nat_used_tuple(tuple, ct))
81 return 1;
82 }
83 return 0;
84}
85
86static int
87tcp_manip_pkt(struct sk_buff **pskb,
88 unsigned int iphdroff,
89 const struct nf_conntrack_tuple *tuple,
90 enum nf_nat_manip_type maniptype)
91{
92 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
93 struct tcphdr *hdr;
94 unsigned int hdroff = iphdroff + iph->ihl*4;
95 __be32 oldip, newip;
96 __be16 *portptr, newport, oldport;
97 int hdrsize = 8; /* TCP connection tracking guarantees this much */
98
99 /* this could be a inner header returned in icmp packet; in such
100 cases we cannot update the checksum field since it is outside of
101 the 8 bytes of transport layer headers we are guaranteed */
102 if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
103 hdrsize = sizeof(struct tcphdr);
104
105 if (!skb_make_writable(pskb, hdroff + hdrsize))
106 return 0;
107
108 iph = (struct iphdr *)((*pskb)->data + iphdroff);
109 hdr = (struct tcphdr *)((*pskb)->data + hdroff);
110
111 if (maniptype == IP_NAT_MANIP_SRC) {
112 /* Get rid of src ip and src pt */
113 oldip = iph->saddr;
114 newip = tuple->src.u3.ip;
115 newport = tuple->src.u.tcp.port;
116 portptr = &hdr->source;
117 } else {
118 /* Get rid of dst ip and dst pt */
119 oldip = iph->daddr;
120 newip = tuple->dst.u3.ip;
121 newport = tuple->dst.u.tcp.port;
122 portptr = &hdr->dest;
123 }
124
125 oldport = *portptr;
126 *portptr = newport;
127
128 if (hdrsize < sizeof(*hdr))
129 return 1;
130
131 nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
132 nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
133 return 1;
134}
135
136struct nf_nat_protocol nf_nat_protocol_tcp = {
137 .name = "TCP",
138 .protonum = IPPROTO_TCP,
139 .me = THIS_MODULE,
140 .manip_pkt = tcp_manip_pkt,
141 .in_range = tcp_in_range,
142 .unique_tuple = tcp_unique_tuple,
143#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
144 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
145 .range_to_nfattr = nf_nat_port_range_to_nfattr,
146 .nfattr_to_range = nf_nat_port_nfattr_to_range,
147#endif
148};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 00000000000..ab0ce4c8699
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,138 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/udp.h>
13
14#include <linux/netfilter.h>
15#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19
20static int
21udp_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type maniptype,
23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max)
25{
26 __be16 port;
27
28 if (maniptype == IP_NAT_MANIP_SRC)
29 port = tuple->src.u.udp.port;
30 else
31 port = tuple->dst.u.udp.port;
32
33 return ntohs(port) >= ntohs(min->udp.port) &&
34 ntohs(port) <= ntohs(max->udp.port);
35}
36
37static int
38udp_unique_tuple(struct nf_conntrack_tuple *tuple,
39 const struct nf_nat_range *range,
40 enum nf_nat_manip_type maniptype,
41 const struct nf_conn *ct)
42{
43 static u_int16_t port;
44 __be16 *portptr;
45 unsigned int range_size, min, i;
46
47 if (maniptype == IP_NAT_MANIP_SRC)
48 portptr = &tuple->src.u.udp.port;
49 else
50 portptr = &tuple->dst.u.udp.port;
51
52 /* If no range specified... */
53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
54 /* If it's dst rewrite, can't change port */
55 if (maniptype == IP_NAT_MANIP_DST)
56 return 0;
57
58 if (ntohs(*portptr) < 1024) {
59 /* Loose convention: >> 512 is credential passing */
60 if (ntohs(*portptr)<512) {
61 min = 1;
62 range_size = 511 - min + 1;
63 } else {
64 min = 600;
65 range_size = 1023 - min + 1;
66 }
67 } else {
68 min = 1024;
69 range_size = 65535 - 1024 + 1;
70 }
71 } else {
72 min = ntohs(range->min.udp.port);
73 range_size = ntohs(range->max.udp.port) - min + 1;
74 }
75
76 for (i = 0; i < range_size; i++, port++) {
77 *portptr = htons(min + port % range_size);
78 if (!nf_nat_used_tuple(tuple, ct))
79 return 1;
80 }
81 return 0;
82}
83
84static int
85udp_manip_pkt(struct sk_buff **pskb,
86 unsigned int iphdroff,
87 const struct nf_conntrack_tuple *tuple,
88 enum nf_nat_manip_type maniptype)
89{
90 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
91 struct udphdr *hdr;
92 unsigned int hdroff = iphdroff + iph->ihl*4;
93 __be32 oldip, newip;
94 __be16 *portptr, newport;
95
96 if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
97 return 0;
98
99 iph = (struct iphdr *)((*pskb)->data + iphdroff);
100 hdr = (struct udphdr *)((*pskb)->data + hdroff);
101
102 if (maniptype == IP_NAT_MANIP_SRC) {
103 /* Get rid of src ip and src pt */
104 oldip = iph->saddr;
105 newip = tuple->src.u3.ip;
106 newport = tuple->src.u.udp.port;
107 portptr = &hdr->source;
108 } else {
109 /* Get rid of dst ip and dst pt */
110 oldip = iph->daddr;
111 newip = tuple->dst.u3.ip;
112 newport = tuple->dst.u.udp.port;
113 portptr = &hdr->dest;
114 }
115 if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
116 nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
117 nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
118 0);
119 if (!hdr->check)
120 hdr->check = CSUM_MANGLED_0;
121 }
122 *portptr = newport;
123 return 1;
124}
125
126struct nf_nat_protocol nf_nat_protocol_udp = {
127 .name = "UDP",
128 .protonum = IPPROTO_UDP,
129 .me = THIS_MODULE,
130 .manip_pkt = udp_manip_pkt,
131 .in_range = udp_in_range,
132 .unique_tuple = udp_unique_tuple,
133#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
134 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
135 .range_to_nfattr = nf_nat_port_range_to_nfattr,
136 .nfattr_to_range = nf_nat_port_nfattr_to_range,
137#endif
138};
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 00000000000..f50d0203f9c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,54 @@
1/* The "unknown" protocol. This is what is used for protocols we
2 * don't understand. It's returned by ip_ct_find_proto().
3 */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15
16#include <linux/netfilter.h>
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_nat_protocol.h>
20
21static int unknown_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type manip_type,
23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max)
25{
26 return 1;
27}
28
29static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct)
33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */
36 return 0;
37}
38
39static int
40unknown_manip_pkt(struct sk_buff **pskb,
41 unsigned int iphdroff,
42 const struct nf_conntrack_tuple *tuple,
43 enum nf_nat_manip_type maniptype)
44{
45 return 1;
46}
47
48struct nf_nat_protocol nf_nat_unknown_protocol = {
49 .name = "unknown",
50 /* .me isn't set: getting a ref to this cannot fail. */
51 .manip_pkt = unknown_manip_pkt,
52 .in_range = unknown_in_range,
53 .unique_tuple = unknown_unique_tuple,
54};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
new file mode 100644
index 00000000000..b868ee0195d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -0,0 +1,343 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* Everything about the rules for NAT. */
10#include <linux/types.h>
11#include <linux/ip.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4.h>
14#include <linux/module.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/proc_fs.h>
18#include <net/checksum.h>
19#include <net/route.h>
20#include <linux/bitops.h>
21
22#include <linux/netfilter_ipv4/ip_tables.h>
23#include <net/netfilter/nf_nat.h>
24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_rule.h>
26
27#if 0
28#define DEBUGP printk
29#else
30#define DEBUGP(format, args...)
31#endif
32
33#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
34
35static struct
36{
37 struct ipt_replace repl;
38 struct ipt_standard entries[3];
39 struct ipt_error term;
40} nat_initial_table __initdata = {
41 .repl = {
42 .name = "nat",
43 .valid_hooks = NAT_VALID_HOOKS,
44 .num_entries = 4,
45 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
46 .hook_entry = {
47 [NF_IP_PRE_ROUTING] = 0,
48 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
49 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
50 .underflow = {
51 [NF_IP_PRE_ROUTING] = 0,
52 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
53 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
54 },
55 .entries = {
56 /* PRE_ROUTING */
57 {
58 .entry = {
59 .target_offset = sizeof(struct ipt_entry),
60 .next_offset = sizeof(struct ipt_standard),
61 },
62 .target = {
63 .target = {
64 .u = {
65 .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
66 },
67 },
68 .verdict = -NF_ACCEPT - 1,
69 },
70 },
71 /* POST_ROUTING */
72 {
73 .entry = {
74 .target_offset = sizeof(struct ipt_entry),
75 .next_offset = sizeof(struct ipt_standard),
76 },
77 .target = {
78 .target = {
79 .u = {
80 .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
81 },
82 },
83 .verdict = -NF_ACCEPT - 1,
84 },
85 },
86 /* LOCAL_OUT */
87 {
88 .entry = {
89 .target_offset = sizeof(struct ipt_entry),
90 .next_offset = sizeof(struct ipt_standard),
91 },
92 .target = {
93 .target = {
94 .u = {
95 .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
96 },
97 },
98 .verdict = -NF_ACCEPT - 1,
99 },
100 },
101 },
102 /* ERROR */
103 .term = {
104 .entry = {
105 .target_offset = sizeof(struct ipt_entry),
106 .next_offset = sizeof(struct ipt_error),
107 },
108 .target = {
109 .target = {
110 .u = {
111 .user = {
112 .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)),
113 .name = IPT_ERROR_TARGET,
114 },
115 },
116 },
117 .errorname = "ERROR",
118 },
119 }
120};
121
122static struct ipt_table nat_table = {
123 .name = "nat",
124 .valid_hooks = NAT_VALID_HOOKS,
125 .lock = RW_LOCK_UNLOCKED,
126 .me = THIS_MODULE,
127 .af = AF_INET,
128};
129
130/* Source NAT */
131static unsigned int ipt_snat_target(struct sk_buff **pskb,
132 const struct net_device *in,
133 const struct net_device *out,
134 unsigned int hooknum,
135 const struct xt_target *target,
136 const void *targinfo)
137{
138 struct nf_conn *ct;
139 enum ip_conntrack_info ctinfo;
140 const struct nf_nat_multi_range_compat *mr = targinfo;
141
142 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
143
144 ct = nf_ct_get(*pskb, &ctinfo);
145
146 /* Connection must be valid and new. */
147 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
148 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
149 NF_CT_ASSERT(out);
150
151 return nf_nat_setup_info(ct, &mr->range[0], hooknum);
152}
153
154/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
155static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
156{
157 static int warned = 0;
158 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
159 struct rtable *rt;
160
161 if (ip_route_output_key(&rt, &fl) != 0)
162 return;
163
164 if (rt->rt_src != srcip && !warned) {
165 printk("NAT: no longer support implicit source local NAT\n");
166 printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
167 NIPQUAD(srcip), NIPQUAD(dstip));
168 warned = 1;
169 }
170 ip_rt_put(rt);
171}
172
173static unsigned int ipt_dnat_target(struct sk_buff **pskb,
174 const struct net_device *in,
175 const struct net_device *out,
176 unsigned int hooknum,
177 const struct xt_target *target,
178 const void *targinfo)
179{
180 struct nf_conn *ct;
181 enum ip_conntrack_info ctinfo;
182 const struct nf_nat_multi_range_compat *mr = targinfo;
183
184 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
185 hooknum == NF_IP_LOCAL_OUT);
186
187 ct = nf_ct_get(*pskb, &ctinfo);
188
189 /* Connection must be valid and new. */
190 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
191
192 if (hooknum == NF_IP_LOCAL_OUT &&
193 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
194 warn_if_extra_mangle((*pskb)->nh.iph->daddr,
195 mr->range[0].min_ip);
196
197 return nf_nat_setup_info(ct, &mr->range[0], hooknum);
198}
199
200static int ipt_snat_checkentry(const char *tablename,
201 const void *entry,
202 const struct xt_target *target,
203 void *targinfo,
204 unsigned int hook_mask)
205{
206 struct nf_nat_multi_range_compat *mr = targinfo;
207
208 /* Must be a valid range */
209 if (mr->rangesize != 1) {
210 printk("SNAT: multiple ranges no longer supported\n");
211 return 0;
212 }
213 return 1;
214}
215
216static int ipt_dnat_checkentry(const char *tablename,
217 const void *entry,
218 const struct xt_target *target,
219 void *targinfo,
220 unsigned int hook_mask)
221{
222 struct nf_nat_multi_range_compat *mr = targinfo;
223
224 /* Must be a valid range */
225 if (mr->rangesize != 1) {
226 printk("DNAT: multiple ranges no longer supported\n");
227 return 0;
228 }
229 return 1;
230}
231
232inline unsigned int
233alloc_null_binding(struct nf_conn *ct,
234 struct nf_nat_info *info,
235 unsigned int hooknum)
236{
237 /* Force range to this IP; let proto decide mapping for
238 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
239 Use reply in case it's already been mangled (eg local packet).
240 */
241 __be32 ip
242 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
243 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
244 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
245 struct nf_nat_range range
246 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
247
248 DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
249 ct, NIPQUAD(ip));
250 return nf_nat_setup_info(ct, &range, hooknum);
251}
252
253unsigned int
254alloc_null_binding_confirmed(struct nf_conn *ct,
255 struct nf_nat_info *info,
256 unsigned int hooknum)
257{
258 __be32 ip
259 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
260 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
261 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
262 u_int16_t all
263 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
264 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
265 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
266 struct nf_nat_range range
267 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
268
269 DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
270 ct, NIPQUAD(ip));
271 return nf_nat_setup_info(ct, &range, hooknum);
272}
273
274int nf_nat_rule_find(struct sk_buff **pskb,
275 unsigned int hooknum,
276 const struct net_device *in,
277 const struct net_device *out,
278 struct nf_conn *ct,
279 struct nf_nat_info *info)
280{
281 int ret;
282
283 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
284
285 if (ret == NF_ACCEPT) {
286 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
287 /* NUL mapping */
288 ret = alloc_null_binding(ct, info, hooknum);
289 }
290 return ret;
291}
292
293static struct ipt_target ipt_snat_reg = {
294 .name = "SNAT",
295 .target = ipt_snat_target,
296 .targetsize = sizeof(struct nf_nat_multi_range_compat),
297 .table = "nat",
298 .hooks = 1 << NF_IP_POST_ROUTING,
299 .checkentry = ipt_snat_checkentry,
300 .family = AF_INET,
301};
302
303static struct xt_target ipt_dnat_reg = {
304 .name = "DNAT",
305 .target = ipt_dnat_target,
306 .targetsize = sizeof(struct nf_nat_multi_range_compat),
307 .table = "nat",
308 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
309 .checkentry = ipt_dnat_checkentry,
310 .family = AF_INET,
311};
312
313int __init nf_nat_rule_init(void)
314{
315 int ret;
316
317 ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
318 if (ret != 0)
319 return ret;
320 ret = xt_register_target(&ipt_snat_reg);
321 if (ret != 0)
322 goto unregister_table;
323
324 ret = xt_register_target(&ipt_dnat_reg);
325 if (ret != 0)
326 goto unregister_snat;
327
328 return ret;
329
330 unregister_snat:
331 xt_unregister_target(&ipt_snat_reg);
332 unregister_table:
333 ipt_unregister_table(&nat_table);
334
335 return ret;
336}
337
338void nf_nat_rule_cleanup(void)
339{
340 xt_unregister_target(&ipt_dnat_reg);
341 xt_unregister_target(&ipt_snat_reg);
342 ipt_unregister_table(&nat_table);
343}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
new file mode 100644
index 00000000000..730a7a44c88
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -0,0 +1,406 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/types.h>
9#include <linux/icmp.h>
10#include <linux/ip.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter_ipv4.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <net/ip.h>
17#include <net/checksum.h>
18#include <linux/spinlock.h>
19
20#include <net/netfilter/nf_conntrack.h>
21#include <net/netfilter/nf_conntrack_core.h>
22#include <net/netfilter/nf_nat.h>
23#include <net/netfilter/nf_nat_rule.h>
24#include <net/netfilter/nf_nat_protocol.h>
25#include <net/netfilter/nf_nat_core.h>
26#include <net/netfilter/nf_nat_helper.h>
27#include <linux/netfilter_ipv4/ip_tables.h>
28
29#if 0
30#define DEBUGP printk
31#else
32#define DEBUGP(format, args...)
33#endif
34
35#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
36 : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
37 : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
38 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
39 : "*ERROR*")))
40
41#ifdef CONFIG_XFRM
42static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
43{
44 struct nf_conn *ct;
45 struct nf_conntrack_tuple *t;
46 enum ip_conntrack_info ctinfo;
47 enum ip_conntrack_dir dir;
48 unsigned long statusbit;
49
50 ct = nf_ct_get(skb, &ctinfo);
51 if (ct == NULL)
52 return;
53 dir = CTINFO2DIR(ctinfo);
54 t = &ct->tuplehash[dir].tuple;
55
56 if (dir == IP_CT_DIR_ORIGINAL)
57 statusbit = IPS_DST_NAT;
58 else
59 statusbit = IPS_SRC_NAT;
60
61 if (ct->status & statusbit) {
62 fl->fl4_dst = t->dst.u3.ip;
63 if (t->dst.protonum == IPPROTO_TCP ||
64 t->dst.protonum == IPPROTO_UDP)
65 fl->fl_ip_dport = t->dst.u.tcp.port;
66 }
67
68 statusbit ^= IPS_NAT_MASK;
69
70 if (ct->status & statusbit) {
71 fl->fl4_src = t->src.u3.ip;
72 if (t->dst.protonum == IPPROTO_TCP ||
73 t->dst.protonum == IPPROTO_UDP)
74 fl->fl_ip_sport = t->src.u.tcp.port;
75 }
76}
77#endif
78
79static unsigned int
80nf_nat_fn(unsigned int hooknum,
81 struct sk_buff **pskb,
82 const struct net_device *in,
83 const struct net_device *out,
84 int (*okfn)(struct sk_buff *))
85{
86 struct nf_conn *ct;
87 enum ip_conntrack_info ctinfo;
88 struct nf_conn_nat *nat;
89 struct nf_nat_info *info;
90 /* maniptype == SRC for postrouting. */
91 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
92
93 /* We never see fragments: conntrack defrags on pre-routing
94 and local-out, and nf_nat_out protects post-routing. */
95 NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
96 & htons(IP_MF|IP_OFFSET)));
97
98 ct = nf_ct_get(*pskb, &ctinfo);
99 /* Can't track? It's not due to stress, or conntrack would
100 have dropped it. Hence it's the user's responsibilty to
101 packet filter it out, or implement conntrack/NAT for that
102 protocol. 8) --RR */
103 if (!ct) {
104 /* Exception: ICMP redirect to new connection (not in
105 hash table yet). We must not let this through, in
106 case we're doing NAT to the same network. */
107 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
108 struct icmphdr _hdr, *hp;
109
110 hp = skb_header_pointer(*pskb,
111 (*pskb)->nh.iph->ihl*4,
112 sizeof(_hdr), &_hdr);
113 if (hp != NULL &&
114 hp->type == ICMP_REDIRECT)
115 return NF_DROP;
116 }
117 return NF_ACCEPT;
118 }
119
120 /* Don't try to NAT if this packet is not conntracked */
121 if (ct == &nf_conntrack_untracked)
122 return NF_ACCEPT;
123
124 nat = nfct_nat(ct);
125 if (!nat)
126 return NF_DROP;
127
128 switch (ctinfo) {
129 case IP_CT_RELATED:
130 case IP_CT_RELATED+IP_CT_IS_REPLY:
131 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
132 if (!nf_nat_icmp_reply_translation(ct, ctinfo,
133 hooknum, pskb))
134 return NF_DROP;
135 else
136 return NF_ACCEPT;
137 }
138 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
139 case IP_CT_NEW:
140 info = &nat->info;
141
142 /* Seen it before? This can happen for loopback, retrans,
143 or local packets.. */
144 if (!nf_nat_initialized(ct, maniptype)) {
145 unsigned int ret;
146
147 if (unlikely(nf_ct_is_confirmed(ct)))
148 /* NAT module was loaded late */
149 ret = alloc_null_binding_confirmed(ct, info,
150 hooknum);
151 else if (hooknum == NF_IP_LOCAL_IN)
152 /* LOCAL_IN hook doesn't have a chain! */
153 ret = alloc_null_binding(ct, info, hooknum);
154 else
155 ret = nf_nat_rule_find(pskb, hooknum, in, out,
156 ct, info);
157
158 if (ret != NF_ACCEPT) {
159 return ret;
160 }
161 } else
162 DEBUGP("Already setup manip %s for ct %p\n",
163 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
164 ct);
165 break;
166
167 default:
168 /* ESTABLISHED */
169 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
170 ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
171 info = &nat->info;
172 }
173
174 NF_CT_ASSERT(info);
175 return nf_nat_packet(ct, ctinfo, hooknum, pskb);
176}
177
178static unsigned int
179nf_nat_in(unsigned int hooknum,
180 struct sk_buff **pskb,
181 const struct net_device *in,
182 const struct net_device *out,
183 int (*okfn)(struct sk_buff *))
184{
185 unsigned int ret;
186 __be32 daddr = (*pskb)->nh.iph->daddr;
187
188 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
189 if (ret != NF_DROP && ret != NF_STOLEN &&
190 daddr != (*pskb)->nh.iph->daddr) {
191 dst_release((*pskb)->dst);
192 (*pskb)->dst = NULL;
193 }
194 return ret;
195}
196
197static unsigned int
198nf_nat_out(unsigned int hooknum,
199 struct sk_buff **pskb,
200 const struct net_device *in,
201 const struct net_device *out,
202 int (*okfn)(struct sk_buff *))
203{
204#ifdef CONFIG_XFRM
205 struct nf_conn *ct;
206 enum ip_conntrack_info ctinfo;
207#endif
208 unsigned int ret;
209
210 /* root is playing with raw sockets. */
211 if ((*pskb)->len < sizeof(struct iphdr) ||
212 (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
213 return NF_ACCEPT;
214
215 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
216#ifdef CONFIG_XFRM
217 if (ret != NF_DROP && ret != NF_STOLEN &&
218 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
219 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
220
221 if (ct->tuplehash[dir].tuple.src.u3.ip !=
222 ct->tuplehash[!dir].tuple.dst.u3.ip
223 || ct->tuplehash[dir].tuple.src.u.all !=
224 ct->tuplehash[!dir].tuple.dst.u.all
225 )
226 return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
227 }
228#endif
229 return ret;
230}
231
232static unsigned int
233nf_nat_local_fn(unsigned int hooknum,
234 struct sk_buff **pskb,
235 const struct net_device *in,
236 const struct net_device *out,
237 int (*okfn)(struct sk_buff *))
238{
239 struct nf_conn *ct;
240 enum ip_conntrack_info ctinfo;
241 unsigned int ret;
242
243 /* root is playing with raw sockets. */
244 if ((*pskb)->len < sizeof(struct iphdr) ||
245 (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
246 return NF_ACCEPT;
247
248 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
249 if (ret != NF_DROP && ret != NF_STOLEN &&
250 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
251 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
252
253 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
254 ct->tuplehash[!dir].tuple.src.u3.ip
255#ifdef CONFIG_XFRM
256 || ct->tuplehash[dir].tuple.dst.u.all !=
257 ct->tuplehash[!dir].tuple.src.u.all
258#endif
259 )
260 if (ip_route_me_harder(pskb, RTN_UNSPEC))
261 ret = NF_DROP;
262 }
263 return ret;
264}
265
266static unsigned int
267nf_nat_adjust(unsigned int hooknum,
268 struct sk_buff **pskb,
269 const struct net_device *in,
270 const struct net_device *out,
271 int (*okfn)(struct sk_buff *))
272{
273 struct nf_conn *ct;
274 enum ip_conntrack_info ctinfo;
275
276 ct = nf_ct_get(*pskb, &ctinfo);
277 if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
278 DEBUGP("nf_nat_standalone: adjusting sequence number\n");
279 if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
280 return NF_DROP;
281 }
282 return NF_ACCEPT;
283}
284
285/* We must be after connection tracking and before packet filtering. */
286
287static struct nf_hook_ops nf_nat_ops[] = {
288 /* Before packet filtering, change destination */
289 {
290 .hook = nf_nat_in,
291 .owner = THIS_MODULE,
292 .pf = PF_INET,
293 .hooknum = NF_IP_PRE_ROUTING,
294 .priority = NF_IP_PRI_NAT_DST,
295 },
296 /* After packet filtering, change source */
297 {
298 .hook = nf_nat_out,
299 .owner = THIS_MODULE,
300 .pf = PF_INET,
301 .hooknum = NF_IP_POST_ROUTING,
302 .priority = NF_IP_PRI_NAT_SRC,
303 },
304 /* After conntrack, adjust sequence number */
305 {
306 .hook = nf_nat_adjust,
307 .owner = THIS_MODULE,
308 .pf = PF_INET,
309 .hooknum = NF_IP_POST_ROUTING,
310 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
311 },
312 /* Before packet filtering, change destination */
313 {
314 .hook = nf_nat_local_fn,
315 .owner = THIS_MODULE,
316 .pf = PF_INET,
317 .hooknum = NF_IP_LOCAL_OUT,
318 .priority = NF_IP_PRI_NAT_DST,
319 },
320 /* After packet filtering, change source */
321 {
322 .hook = nf_nat_fn,
323 .owner = THIS_MODULE,
324 .pf = PF_INET,
325 .hooknum = NF_IP_LOCAL_IN,
326 .priority = NF_IP_PRI_NAT_SRC,
327 },
328 /* After conntrack, adjust sequence number */
329 {
330 .hook = nf_nat_adjust,
331 .owner = THIS_MODULE,
332 .pf = PF_INET,
333 .hooknum = NF_IP_LOCAL_IN,
334 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
335 },
336};
337
338static int __init nf_nat_standalone_init(void)
339{
340 int size, ret = 0;
341
342 need_conntrack();
343
344 size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) +
345 sizeof(struct nf_conn_nat);
346 ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size);
347 if (ret < 0) {
348 printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
349 return ret;
350 }
351
352 size = ALIGN(size, __alignof__(struct nf_conn_help)) +
353 sizeof(struct nf_conn_help);
354 ret = nf_conntrack_register_cache(NF_CT_F_NAT|NF_CT_F_HELP,
355 "nf_nat:help", size);
356 if (ret < 0) {
357 printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
358 goto cleanup_register_cache;
359 }
360#ifdef CONFIG_XFRM
361 BUG_ON(ip_nat_decode_session != NULL);
362 ip_nat_decode_session = nat_decode_session;
363#endif
364 ret = nf_nat_rule_init();
365 if (ret < 0) {
366 printk("nf_nat_init: can't setup rules.\n");
367 goto cleanup_decode_session;
368 }
369 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
370 if (ret < 0) {
371 printk("nf_nat_init: can't register hooks.\n");
372 goto cleanup_rule_init;
373 }
374 nf_nat_module_is_loaded = 1;
375 return ret;
376
377 cleanup_rule_init:
378 nf_nat_rule_cleanup();
379 cleanup_decode_session:
380#ifdef CONFIG_XFRM
381 ip_nat_decode_session = NULL;
382 synchronize_net();
383#endif
384 nf_conntrack_unregister_cache(NF_CT_F_NAT|NF_CT_F_HELP);
385 cleanup_register_cache:
386 nf_conntrack_unregister_cache(NF_CT_F_NAT);
387 return ret;
388}
389
390static void __exit nf_nat_standalone_fini(void)
391{
392 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
393 nf_nat_rule_cleanup();
394 nf_nat_module_is_loaded = 0;
395#ifdef CONFIG_XFRM
396 ip_nat_decode_session = NULL;
397 synchronize_net();
398#endif
399 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
400}
401
402module_init(nf_nat_standalone_init);
403module_exit(nf_nat_standalone_fini);
404
405MODULE_LICENSE("GPL");
406MODULE_ALIAS("ip_nat");