diff options
author | Hannes Eder <heder@google.com> | 2010-07-23 06:48:52 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-07-23 06:48:52 -0400 |
commit | 7f1c407579519e71a0dcadc05614fd98acec585e (patch) | |
tree | 623a63992113a539a3cd43031e58aad83f04bf34 | |
parent | 7b215ffc3885a38182d3d49ceb41d0a81c3e041a (diff) |
IPVS: make FTP work with full NAT support
Use nf_conntrack/nf_nat code to do the packet mangling and the TCP
sequence adjusting. The function 'ip_vs_skb_replace' is now dead
code, so it is removed.
To SNAT FTP, use something like:
% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
--vport 21 -j SNAT --to-source 192.168.10.10
and for the data connections in passive mode:
% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
--vportctl 21 -j SNAT --to-source 192.168.10.10
using '-m state --state RELATED' would also works.
Make sure the kernel modules ip_vs_ftp, nf_conntrack_ftp, and
nf_nat_ftp are loaded.
[ up-port and minor fixes by Simon Horman <horms@verge.net.au> ]
Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r-- | include/net/ip_vs.h | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/Kconfig | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_app.c | 43 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 176 |
5 files changed, 165 insertions, 59 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe82b1e10a29..1f9e51180bdb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -736,8 +736,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); | |||
736 | 736 | ||
737 | extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); | 737 | extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); |
738 | extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); | 738 | extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); |
739 | extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, | ||
740 | char *o_buf, int o_len, char *n_buf, int n_len); | ||
741 | extern int ip_vs_app_init(void); | 739 | extern int ip_vs_app_init(void); |
742 | extern void ip_vs_app_cleanup(void); | 740 | extern void ip_vs_app_cleanup(void); |
743 | 741 | ||
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 366244492ac7..be10f6526042 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig | |||
@@ -235,7 +235,7 @@ comment 'IPVS application helper' | |||
235 | 235 | ||
236 | config IP_VS_FTP | 236 | config IP_VS_FTP |
237 | tristate "FTP protocol helper" | 237 | tristate "FTP protocol helper" |
238 | depends on IP_VS_PROTO_TCP | 238 | depends on IP_VS_PROTO_TCP && NF_NAT |
239 | ---help--- | 239 | ---help--- |
240 | FTP is a protocol that transfers IP address and/or port number in | 240 | FTP is a protocol that transfers IP address and/or port number in |
241 | the payload. In the virtual server via Network Address Translation, | 241 | the payload. In the virtual server via Network Address Translation, |
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e834f8ff..e76f87f4aca8 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c | |||
@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { | |||
569 | }; | 569 | }; |
570 | #endif | 570 | #endif |
571 | 571 | ||
572 | |||
573 | /* | ||
574 | * Replace a segment of data with a new segment | ||
575 | */ | ||
576 | int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, | ||
577 | char *o_buf, int o_len, char *n_buf, int n_len) | ||
578 | { | ||
579 | int diff; | ||
580 | int o_offset; | ||
581 | int o_left; | ||
582 | |||
583 | EnterFunction(9); | ||
584 | |||
585 | diff = n_len - o_len; | ||
586 | o_offset = o_buf - (char *)skb->data; | ||
587 | /* The length of left data after o_buf+o_len in the skb data */ | ||
588 | o_left = skb->len - (o_offset + o_len); | ||
589 | |||
590 | if (diff <= 0) { | ||
591 | memmove(o_buf + n_len, o_buf + o_len, o_left); | ||
592 | memcpy(o_buf, n_buf, n_len); | ||
593 | skb_trim(skb, skb->len + diff); | ||
594 | } else if (diff <= skb_tailroom(skb)) { | ||
595 | skb_put(skb, diff); | ||
596 | memmove(o_buf + n_len, o_buf + o_len, o_left); | ||
597 | memcpy(o_buf, n_buf, n_len); | ||
598 | } else { | ||
599 | if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) | ||
600 | return -ENOMEM; | ||
601 | skb_put(skb, diff); | ||
602 | memmove(skb->data + o_offset + n_len, | ||
603 | skb->data + o_offset + o_len, o_left); | ||
604 | skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); | ||
605 | } | ||
606 | |||
607 | /* must update the iph total length here */ | ||
608 | ip_hdr(skb)->tot_len = htons(skb->len); | ||
609 | |||
610 | LeaveFunction(9); | ||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | |||
615 | int __init ip_vs_app_init(void) | 572 | int __init ip_vs_app_init(void) |
616 | { | 573 | { |
617 | /* we will replace it with proc_net_ipvs_create() soon */ | 574 | /* we will replace it with proc_net_ipvs_create() soon */ |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 58f82dfc950a..4f8ddba48011 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -54,7 +54,6 @@ | |||
54 | 54 | ||
55 | EXPORT_SYMBOL(register_ip_vs_scheduler); | 55 | EXPORT_SYMBOL(register_ip_vs_scheduler); |
56 | EXPORT_SYMBOL(unregister_ip_vs_scheduler); | 56 | EXPORT_SYMBOL(unregister_ip_vs_scheduler); |
57 | EXPORT_SYMBOL(ip_vs_skb_replace); | ||
58 | EXPORT_SYMBOL(ip_vs_proto_name); | 57 | EXPORT_SYMBOL(ip_vs_proto_name); |
59 | EXPORT_SYMBOL(ip_vs_conn_new); | 58 | EXPORT_SYMBOL(ip_vs_conn_new); |
60 | EXPORT_SYMBOL(ip_vs_conn_in_get); | 59 | EXPORT_SYMBOL(ip_vs_conn_in_get); |
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a376a5..f228a17ec649 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -20,6 +20,17 @@ | |||
20 | * | 20 | * |
21 | * Author: Wouter Gadeyne | 21 | * Author: Wouter Gadeyne |
22 | * | 22 | * |
23 | * | ||
24 | * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from | ||
25 | * http://www.ssi.bg/~ja/nfct/: | ||
26 | * | ||
27 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | ||
28 | * | ||
29 | * Portions Copyright (C) 2001-2002 | ||
30 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | ||
31 | * | ||
32 | * Portions Copyright (C) 2003-2008 | ||
33 | * Julian Anastasov | ||
23 | */ | 34 | */ |
24 | 35 | ||
25 | #define KMSG_COMPONENT "IPVS" | 36 | #define KMSG_COMPONENT "IPVS" |
@@ -32,6 +43,9 @@ | |||
32 | #include <linux/in.h> | 43 | #include <linux/in.h> |
33 | #include <linux/ip.h> | 44 | #include <linux/ip.h> |
34 | #include <linux/netfilter.h> | 45 | #include <linux/netfilter.h> |
46 | #include <net/netfilter/nf_conntrack.h> | ||
47 | #include <net/netfilter/nf_conntrack_expect.h> | ||
48 | #include <net/netfilter/nf_nat_helper.h> | ||
35 | #include <linux/gfp.h> | 49 | #include <linux/gfp.h> |
36 | #include <net/protocol.h> | 50 | #include <net/protocol.h> |
37 | #include <net/tcp.h> | 51 | #include <net/tcp.h> |
@@ -43,6 +57,16 @@ | |||
43 | #define SERVER_STRING "227 Entering Passive Mode (" | 57 | #define SERVER_STRING "227 Entering Passive Mode (" |
44 | #define CLIENT_STRING "PORT " | 58 | #define CLIENT_STRING "PORT " |
45 | 59 | ||
60 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | ||
61 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | ||
62 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | ||
63 | (T)->dst.protonum | ||
64 | |||
65 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | ||
66 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | ||
67 | &((C)->vaddr.ip), ntohs((C)->vport), \ | ||
68 | &((C)->daddr.ip), ntohs((C)->dport), \ | ||
69 | (C)->protocol, (C)->state | ||
46 | 70 | ||
47 | /* | 71 | /* |
48 | * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper | 72 | * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper |
@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, | |||
123 | return 1; | 147 | return 1; |
124 | } | 148 | } |
125 | 149 | ||
150 | /* | ||
151 | * Called from init_conntrack() as expectfn handler. | ||
152 | */ | ||
153 | static void | ||
154 | ip_vs_expect_callback(struct nf_conn *ct, | ||
155 | struct nf_conntrack_expect *exp) | ||
156 | { | ||
157 | struct nf_conntrack_tuple *orig, new_reply; | ||
158 | struct ip_vs_conn *cp; | ||
159 | |||
160 | if (exp->tuple.src.l3num != PF_INET) | ||
161 | return; | ||
162 | |||
163 | /* | ||
164 | * We assume that no NF locks are held before this callback. | ||
165 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | ||
166 | * expectations even if they use wildcard values, now we provide the | ||
167 | * actual values from the newly created original conntrack direction. | ||
168 | * The conntrack is confirmed when packet reaches IPVS hooks. | ||
169 | */ | ||
170 | |||
171 | /* RS->CLIENT */ | ||
172 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
173 | cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
174 | &orig->src.u3, orig->src.u.tcp.port, | ||
175 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
176 | if (cp) { | ||
177 | /* Change reply CLIENT->RS to CLIENT->VS */ | ||
178 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
179 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
180 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | ||
181 | __func__, ct, ct->status, | ||
182 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
183 | ARG_CONN(cp)); | ||
184 | new_reply.dst.u3 = cp->vaddr; | ||
185 | new_reply.dst.u.tcp.port = cp->vport; | ||
186 | IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | ||
187 | ", inout cp=" FMT_CONN "\n", | ||
188 | __func__, ct, | ||
189 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
190 | ARG_CONN(cp)); | ||
191 | goto alter; | ||
192 | } | ||
193 | |||
194 | /* CLIENT->VS */ | ||
195 | cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
196 | &orig->src.u3, orig->src.u.tcp.port, | ||
197 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
198 | if (cp) { | ||
199 | /* Change reply VS->CLIENT to RS->CLIENT */ | ||
200 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
201 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
202 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | ||
203 | __func__, ct, ct->status, | ||
204 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
205 | ARG_CONN(cp)); | ||
206 | new_reply.src.u3 = cp->daddr; | ||
207 | new_reply.src.u.tcp.port = cp->dport; | ||
208 | IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " | ||
209 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | ||
210 | __func__, ct, | ||
211 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
212 | ARG_CONN(cp)); | ||
213 | goto alter; | ||
214 | } | ||
215 | |||
216 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE | ||
217 | " - unknown expect\n", | ||
218 | __func__, ct, ct->status, ARG_TUPLE(orig)); | ||
219 | return; | ||
220 | |||
221 | alter: | ||
222 | /* Never alter conntrack for non-NAT conns */ | ||
223 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | ||
224 | nf_conntrack_alter_reply(ct, &new_reply); | ||
225 | ip_vs_conn_put(cp); | ||
226 | return; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Create NF conntrack expectation with wildcard (optional) source port. | ||
231 | * Then the default callback function will alter the reply and will confirm | ||
232 | * the conntrack entry when the first packet comes. | ||
233 | */ | ||
234 | static void | ||
235 | ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, | ||
236 | struct ip_vs_conn *cp, u_int8_t proto, | ||
237 | const __be16 *port, int from_rs) | ||
238 | { | ||
239 | struct nf_conntrack_expect *exp; | ||
240 | |||
241 | BUG_ON(!ct || ct == &nf_conntrack_untracked); | ||
242 | |||
243 | exp = nf_ct_expect_alloc(ct); | ||
244 | if (!exp) | ||
245 | return; | ||
246 | |||
247 | if (from_rs) | ||
248 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, | ||
249 | nf_ct_l3num(ct), &cp->daddr, &cp->caddr, | ||
250 | proto, port, &cp->cport); | ||
251 | else | ||
252 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, | ||
253 | nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, | ||
254 | proto, port, &cp->vport); | ||
255 | |||
256 | exp->expectfn = ip_vs_expect_callback; | ||
257 | |||
258 | IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", | ||
259 | __func__, ct, ARG_TUPLE(&exp->tuple)); | ||
260 | nf_ct_expect_related(exp); | ||
261 | nf_ct_expect_put(exp); | ||
262 | } | ||
126 | 263 | ||
127 | /* | 264 | /* |
128 | * Look at outgoing ftp packets to catch the response to a PASV command | 265 | * Look at outgoing ftp packets to catch the response to a PASV command |
@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
149 | struct ip_vs_conn *n_cp; | 286 | struct ip_vs_conn *n_cp; |
150 | char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ | 287 | char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ |
151 | unsigned buf_len; | 288 | unsigned buf_len; |
152 | int ret; | 289 | int ret = 0; |
290 | enum ip_conntrack_info ctinfo; | ||
291 | struct nf_conn *ct; | ||
153 | 292 | ||
154 | #ifdef CONFIG_IP_VS_IPV6 | 293 | #ifdef CONFIG_IP_VS_IPV6 |
155 | /* This application helper doesn't work with IPv6 yet, | 294 | /* This application helper doesn't work with IPv6 yet, |
@@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
219 | 358 | ||
220 | buf_len = strlen(buf); | 359 | buf_len = strlen(buf); |
221 | 360 | ||
361 | ct = nf_ct_get(skb, &ctinfo); | ||
362 | if (ct && !nf_ct_is_untracked(ct)) { | ||
363 | /* If mangling fails this function will return 0 | ||
364 | * which will cause the packet to be dropped. | ||
365 | * Mangling can only fail under memory pressure, | ||
366 | * hopefully it will succeed on the retransmitted | ||
367 | * packet. | ||
368 | */ | ||
369 | ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | ||
370 | start-data, end-start, | ||
371 | buf, buf_len); | ||
372 | if (ret) | ||
373 | ip_vs_expect_related(skb, ct, n_cp, | ||
374 | IPPROTO_TCP, NULL, 0); | ||
375 | } | ||
376 | |||
222 | /* | 377 | /* |
223 | * Calculate required delta-offset to keep TCP happy | 378 | * Not setting 'diff' is intentional, otherwise the sequence |
379 | * would be adjusted twice. | ||
224 | */ | 380 | */ |
225 | *diff = buf_len - (end-start); | ||
226 | |||
227 | if (*diff == 0) { | ||
228 | /* simply replace it with new passive address */ | ||
229 | memcpy(start, buf, buf_len); | ||
230 | ret = 1; | ||
231 | } else { | ||
232 | ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, | ||
233 | end-start, buf, buf_len); | ||
234 | } | ||
235 | 381 | ||
236 | cp->app_data = NULL; | 382 | cp->app_data = NULL; |
237 | ip_vs_tcp_conn_listen(n_cp); | 383 | ip_vs_tcp_conn_listen(n_cp); |
@@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
263 | union nf_inet_addr to; | 409 | union nf_inet_addr to; |
264 | __be16 port; | 410 | __be16 port; |
265 | struct ip_vs_conn *n_cp; | 411 | struct ip_vs_conn *n_cp; |
412 | struct nf_conn *ct; | ||
266 | 413 | ||
267 | #ifdef CONFIG_IP_VS_IPV6 | 414 | #ifdef CONFIG_IP_VS_IPV6 |
268 | /* This application helper doesn't work with IPv6 yet, | 415 | /* This application helper doesn't work with IPv6 yet, |
@@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
349 | ip_vs_control_add(n_cp, cp); | 496 | ip_vs_control_add(n_cp, cp); |
350 | } | 497 | } |
351 | 498 | ||
499 | ct = (struct nf_conn *)skb->nfct; | ||
500 | if (ct && ct != &nf_conntrack_untracked) | ||
501 | ip_vs_expect_related(skb, ct, n_cp, | ||
502 | IPPROTO_TCP, &n_cp->dport, 1); | ||
503 | |||
352 | /* | 504 | /* |
353 | * Move tunnel to listen state | 505 | * Move tunnel to listen state |
354 | */ | 506 | */ |