diff options
-rw-r--r-- | include/linux/ip_vs.h | 2 | ||||
-rw-r--r-- | include/net/ip_vs.h | 44 | ||||
-rw-r--r-- | net/netfilter/ipvs/Kconfig | 13 | ||||
-rw-r--r-- | net/netfilter/ipvs/Makefile | 5 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_conn.c | 13 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 46 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 12 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 146 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_nfct.c | 292 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 98 |
10 files changed, 475 insertions, 196 deletions
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 003d75f6ffe1..df7728613720 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h | |||
@@ -90,10 +90,12 @@ | |||
90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ | 90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ |
91 | 91 | ||
92 | /* Flags that are not sent to backup server start from bit 16 */ | 92 | /* Flags that are not sent to backup server start from bit 16 */ |
93 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ | ||
93 | 94 | ||
94 | /* Connection flags from destination that can be changed by user space */ | 95 | /* Connection flags from destination that can be changed by user space */ |
95 | #define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ | 96 | #define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ |
96 | IP_VS_CONN_F_ONE_PACKET | \ | 97 | IP_VS_CONN_F_ONE_PACKET | \ |
98 | IP_VS_CONN_F_NFCT | \ | ||
97 | 0) | 99 | 0) |
98 | 100 | ||
99 | #define IP_VS_SCHEDNAME_MAXLEN 16 | 101 | #define IP_VS_SCHEDNAME_MAXLEN 16 |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 62698a9c1631..e8ec5231eae9 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -25,7 +25,9 @@ | |||
25 | #include <linux/ip.h> | 25 | #include <linux/ip.h> |
26 | #include <linux/ipv6.h> /* for struct ipv6hdr */ | 26 | #include <linux/ipv6.h> /* for struct ipv6hdr */ |
27 | #include <net/ipv6.h> /* for ipv6_addr_copy */ | 27 | #include <net/ipv6.h> /* for ipv6_addr_copy */ |
28 | 28 | #ifdef CONFIG_IP_VS_NFCT | |
29 | #include <net/netfilter/nf_conntrack.h> | ||
30 | #endif | ||
29 | 31 | ||
30 | /* Connections' size value needed by ip_vs_ctl.c */ | 32 | /* Connections' size value needed by ip_vs_ctl.c */ |
31 | extern int ip_vs_conn_tab_size; | 33 | extern int ip_vs_conn_tab_size; |
@@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn; | |||
798 | extern int sysctl_ip_vs_expire_quiescent_template; | 800 | extern int sysctl_ip_vs_expire_quiescent_template; |
799 | extern int sysctl_ip_vs_sync_threshold[2]; | 801 | extern int sysctl_ip_vs_sync_threshold[2]; |
800 | extern int sysctl_ip_vs_nat_icmp_send; | 802 | extern int sysctl_ip_vs_nat_icmp_send; |
803 | extern int sysctl_ip_vs_conntrack; | ||
801 | extern struct ip_vs_stats ip_vs_stats; | 804 | extern struct ip_vs_stats ip_vs_stats; |
802 | extern const struct ctl_path net_vs_ctl_path[]; | 805 | extern const struct ctl_path net_vs_ctl_path[]; |
803 | 806 | ||
@@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) | |||
955 | return csum_partial(diff, sizeof(diff), oldsum); | 958 | return csum_partial(diff, sizeof(diff), oldsum); |
956 | } | 959 | } |
957 | 960 | ||
961 | #ifdef CONFIG_IP_VS_NFCT | ||
962 | /* | ||
963 | * Netfilter connection tracking | ||
964 | * (from ip_vs_nfct.c) | ||
965 | */ | ||
966 | static inline int ip_vs_conntrack_enabled(void) | ||
967 | { | ||
968 | return sysctl_ip_vs_conntrack; | ||
969 | } | ||
970 | |||
958 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, | 971 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, |
959 | int outin); | 972 | int outin); |
973 | extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); | ||
974 | extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | ||
975 | struct ip_vs_conn *cp, u_int8_t proto, | ||
976 | const __be16 port, int from_rs); | ||
977 | extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); | ||
978 | |||
979 | #else | ||
980 | |||
981 | static inline int ip_vs_conntrack_enabled(void) | ||
982 | { | ||
983 | return 0; | ||
984 | } | ||
985 | |||
986 | static inline void ip_vs_update_conntrack(struct sk_buff *skb, | ||
987 | struct ip_vs_conn *cp, int outin) | ||
988 | { | ||
989 | } | ||
990 | |||
991 | static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, | ||
992 | struct ip_vs_conn *cp) | ||
993 | { | ||
994 | return NF_ACCEPT; | ||
995 | } | ||
996 | |||
997 | static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | ||
998 | { | ||
999 | } | ||
1000 | /* CONFIG_IP_VS_NFCT */ | ||
1001 | #endif | ||
960 | 1002 | ||
961 | #endif /* __KERNEL__ */ | 1003 | #endif /* __KERNEL__ */ |
962 | 1004 | ||
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 46a77d5c3887..af3c9f48f2d7 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig | |||
@@ -3,7 +3,7 @@ | |||
3 | # | 3 | # |
4 | menuconfig IP_VS | 4 | menuconfig IP_VS |
5 | tristate "IP virtual server support" | 5 | tristate "IP virtual server support" |
6 | depends on NET && INET && NETFILTER && NF_CONNTRACK | 6 | depends on NET && INET && NETFILTER |
7 | ---help--- | 7 | ---help--- |
8 | IP Virtual Server support will let you build a high-performance | 8 | IP Virtual Server support will let you build a high-performance |
9 | virtual server based on cluster of two or more real servers. This | 9 | virtual server based on cluster of two or more real servers. This |
@@ -235,7 +235,8 @@ comment 'IPVS application helper' | |||
235 | 235 | ||
236 | config IP_VS_FTP | 236 | config IP_VS_FTP |
237 | tristate "FTP protocol helper" | 237 | tristate "FTP protocol helper" |
238 | depends on IP_VS_PROTO_TCP && NF_NAT | 238 | depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT |
239 | select IP_VS_NFCT | ||
239 | ---help--- | 240 | ---help--- |
240 | FTP is a protocol that transfers IP address and/or port number in | 241 | FTP is a protocol that transfers IP address and/or port number in |
241 | the payload. In the virtual server via Network Address Translation, | 242 | the payload. In the virtual server via Network Address Translation, |
@@ -247,4 +248,12 @@ config IP_VS_FTP | |||
247 | If you want to compile it in kernel, say Y. To compile it as a | 248 | If you want to compile it in kernel, say Y. To compile it as a |
248 | module, choose M here. If unsure, say N. | 249 | module, choose M here. If unsure, say N. |
249 | 250 | ||
251 | config IP_VS_NFCT | ||
252 | bool "Netfilter connection tracking" | ||
253 | depends on NF_CONNTRACK | ||
254 | ---help--- | ||
255 | The Netfilter connection tracking support allows the IPVS | ||
256 | connection state to be exported to the Netfilter framework | ||
257 | for filtering purposes. | ||
258 | |||
250 | endif # IP_VS | 259 | endif # IP_VS |
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index e3baefd7066e..349fe8819b89 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile | |||
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o | |||
9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o | 9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o |
10 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o | 10 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o |
11 | 11 | ||
12 | ip_vs-extra_objs-y := | ||
13 | ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o | ||
14 | |||
12 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ | 15 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ |
13 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ | 16 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ |
14 | ip_vs_est.o ip_vs_proto.o \ | 17 | ip_vs_est.o ip_vs_proto.o \ |
15 | $(ip_vs_proto-objs-y) | 18 | $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y) |
16 | 19 | ||
17 | 20 | ||
18 | # IPVS core | 21 | # IPVS core |
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9fe1da7bcf16..a970d9691496 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c | |||
@@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data) | |||
721 | if (cp->control) | 721 | if (cp->control) |
722 | ip_vs_control_del(cp); | 722 | ip_vs_control_del(cp); |
723 | 723 | ||
724 | if (cp->flags & IP_VS_CONN_F_NFCT) | ||
725 | ip_vs_conn_drop_conntrack(cp); | ||
726 | |||
724 | if (unlikely(cp->app != NULL)) | 727 | if (unlikely(cp->app != NULL)) |
725 | ip_vs_unbind_app(cp); | 728 | ip_vs_unbind_app(cp); |
726 | ip_vs_unbind_dest(cp); | 729 | ip_vs_unbind_dest(cp); |
@@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, | |||
816 | if (unlikely(pp && atomic_read(&pp->appcnt))) | 819 | if (unlikely(pp && atomic_read(&pp->appcnt))) |
817 | ip_vs_bind_app(cp, pp); | 820 | ip_vs_bind_app(cp, pp); |
818 | 821 | ||
822 | /* | ||
823 | * Allow conntrack to be preserved. By default, conntrack | ||
824 | * is created and destroyed for every packet. | ||
825 | * Sometimes keeping conntrack can be useful for | ||
826 | * IP_VS_CONN_F_ONE_PACKET too. | ||
827 | */ | ||
828 | |||
829 | if (ip_vs_conntrack_enabled()) | ||
830 | cp->flags |= IP_VS_CONN_F_NFCT; | ||
831 | |||
819 | /* Hash it in the ip_vs_conn_tab finally */ | 832 | /* Hash it in the ip_vs_conn_tab finally */ |
820 | ip_vs_conn_hash(cp); | 833 | ip_vs_conn_hash(cp); |
821 | 834 | ||
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 319991d4d251..7fbc80d81fe8 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -537,6 +537,23 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
537 | return NF_DROP; | 537 | return NF_DROP; |
538 | } | 538 | } |
539 | 539 | ||
540 | /* | ||
541 | * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING | ||
542 | * chain and is used to avoid double NAT and confirmation when we do | ||
543 | * not want to keep the conntrack structure | ||
544 | */ | ||
545 | static unsigned int ip_vs_post_routing(unsigned int hooknum, | ||
546 | struct sk_buff *skb, | ||
547 | const struct net_device *in, | ||
548 | const struct net_device *out, | ||
549 | int (*okfn)(struct sk_buff *)) | ||
550 | { | ||
551 | if (!skb->ipvs_property) | ||
552 | return NF_ACCEPT; | ||
553 | /* The packet was sent from IPVS, exit this chain */ | ||
554 | return NF_STOP; | ||
555 | } | ||
556 | |||
540 | __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) | 557 | __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) |
541 | { | 558 | { |
542 | return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); | 559 | return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); |
@@ -695,7 +712,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
695 | /* do the statistics and put it back */ | 712 | /* do the statistics and put it back */ |
696 | ip_vs_out_stats(cp, skb); | 713 | ip_vs_out_stats(cp, skb); |
697 | 714 | ||
698 | skb->ipvs_property = 1; | 715 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
716 | skb->ipvs_property = 1; | ||
717 | else | ||
718 | ip_vs_update_conntrack(skb, cp, 0); | ||
699 | verdict = NF_ACCEPT; | 719 | verdict = NF_ACCEPT; |
700 | 720 | ||
701 | out: | 721 | out: |
@@ -928,17 +948,19 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
928 | 948 | ||
929 | ip_vs_out_stats(cp, skb); | 949 | ip_vs_out_stats(cp, skb); |
930 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | 950 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); |
931 | ip_vs_update_conntrack(skb, cp, 0); | 951 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
952 | skb->ipvs_property = 1; | ||
953 | else | ||
954 | ip_vs_update_conntrack(skb, cp, 0); | ||
932 | ip_vs_conn_put(cp); | 955 | ip_vs_conn_put(cp); |
933 | 956 | ||
934 | skb->ipvs_property = 1; | ||
935 | |||
936 | LeaveFunction(11); | 957 | LeaveFunction(11); |
937 | return NF_ACCEPT; | 958 | return NF_ACCEPT; |
938 | 959 | ||
939 | drop: | 960 | drop: |
940 | ip_vs_conn_put(cp); | 961 | ip_vs_conn_put(cp); |
941 | kfree_skb(skb); | 962 | kfree_skb(skb); |
963 | LeaveFunction(11); | ||
942 | return NF_STOLEN; | 964 | return NF_STOLEN; |
943 | } | 965 | } |
944 | 966 | ||
@@ -1483,6 +1505,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1483 | .hooknum = NF_INET_FORWARD, | 1505 | .hooknum = NF_INET_FORWARD, |
1484 | .priority = 99, | 1506 | .priority = 99, |
1485 | }, | 1507 | }, |
1508 | /* Before the netfilter connection tracking, exit from POST_ROUTING */ | ||
1509 | { | ||
1510 | .hook = ip_vs_post_routing, | ||
1511 | .owner = THIS_MODULE, | ||
1512 | .pf = PF_INET, | ||
1513 | .hooknum = NF_INET_POST_ROUTING, | ||
1514 | .priority = NF_IP_PRI_NAT_SRC-1, | ||
1515 | }, | ||
1486 | #ifdef CONFIG_IP_VS_IPV6 | 1516 | #ifdef CONFIG_IP_VS_IPV6 |
1487 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | 1517 | /* After packet filtering, forward packet through VS/DR, VS/TUN, |
1488 | * or VS/NAT(change destination), so that filtering rules can be | 1518 | * or VS/NAT(change destination), so that filtering rules can be |
@@ -1511,6 +1541,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1511 | .hooknum = NF_INET_FORWARD, | 1541 | .hooknum = NF_INET_FORWARD, |
1512 | .priority = 99, | 1542 | .priority = 99, |
1513 | }, | 1543 | }, |
1544 | /* Before the netfilter connection tracking, exit from POST_ROUTING */ | ||
1545 | { | ||
1546 | .hook = ip_vs_post_routing, | ||
1547 | .owner = THIS_MODULE, | ||
1548 | .pf = PF_INET6, | ||
1549 | .hooknum = NF_INET_POST_ROUTING, | ||
1550 | .priority = NF_IP6_PRI_NAT_SRC-1, | ||
1551 | }, | ||
1514 | #endif | 1552 | #endif |
1515 | }; | 1553 | }; |
1516 | 1554 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7bd41d28080c..d2d842f292c6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -88,6 +88,9 @@ int sysctl_ip_vs_expire_nodest_conn = 0; | |||
88 | int sysctl_ip_vs_expire_quiescent_template = 0; | 88 | int sysctl_ip_vs_expire_quiescent_template = 0; |
89 | int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; | 89 | int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; |
90 | int sysctl_ip_vs_nat_icmp_send = 0; | 90 | int sysctl_ip_vs_nat_icmp_send = 0; |
91 | #ifdef CONFIG_IP_VS_NFCT | ||
92 | int sysctl_ip_vs_conntrack; | ||
93 | #endif | ||
91 | 94 | ||
92 | 95 | ||
93 | #ifdef CONFIG_IP_VS_DEBUG | 96 | #ifdef CONFIG_IP_VS_DEBUG |
@@ -1580,6 +1583,15 @@ static struct ctl_table vs_vars[] = { | |||
1580 | .mode = 0644, | 1583 | .mode = 0644, |
1581 | .proc_handler = proc_do_defense_mode, | 1584 | .proc_handler = proc_do_defense_mode, |
1582 | }, | 1585 | }, |
1586 | #ifdef CONFIG_IP_VS_NFCT | ||
1587 | { | ||
1588 | .procname = "conntrack", | ||
1589 | .data = &sysctl_ip_vs_conntrack, | ||
1590 | .maxlen = sizeof(int), | ||
1591 | .mode = 0644, | ||
1592 | .proc_handler = &proc_dointvec, | ||
1593 | }, | ||
1594 | #endif | ||
1583 | { | 1595 | { |
1584 | .procname = "secure_tcp", | 1596 | .procname = "secure_tcp", |
1585 | .data = &sysctl_ip_vs_secure_tcp, | 1597 | .data = &sysctl_ip_vs_secure_tcp, |
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 7e9af5b76d9e..9cd375f94d61 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -20,17 +20,6 @@ | |||
20 | * | 20 | * |
21 | * Author: Wouter Gadeyne | 21 | * Author: Wouter Gadeyne |
22 | * | 22 | * |
23 | * | ||
24 | * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from | ||
25 | * http://www.ssi.bg/~ja/nfct/: | ||
26 | * | ||
27 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | ||
28 | * | ||
29 | * Portions Copyright (C) 2001-2002 | ||
30 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | ||
31 | * | ||
32 | * Portions Copyright (C) 2003-2008 | ||
33 | * Julian Anastasov | ||
34 | */ | 23 | */ |
35 | 24 | ||
36 | #define KMSG_COMPONENT "IPVS" | 25 | #define KMSG_COMPONENT "IPVS" |
@@ -58,16 +47,6 @@ | |||
58 | #define SERVER_STRING "227 Entering Passive Mode (" | 47 | #define SERVER_STRING "227 Entering Passive Mode (" |
59 | #define CLIENT_STRING "PORT " | 48 | #define CLIENT_STRING "PORT " |
60 | 49 | ||
61 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | ||
62 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | ||
63 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | ||
64 | (T)->dst.protonum | ||
65 | |||
66 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | ||
67 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | ||
68 | &((C)->vaddr.ip), ntohs((C)->vport), \ | ||
69 | &((C)->daddr.ip), ntohs((C)->dport), \ | ||
70 | (C)->protocol, (C)->state | ||
71 | 50 | ||
72 | /* | 51 | /* |
73 | * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper | 52 | * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper |
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv; | |||
85 | static int | 64 | static int |
86 | ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) | 65 | ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) |
87 | { | 66 | { |
67 | /* We use connection tracking for the command connection */ | ||
68 | cp->flags |= IP_VS_CONN_F_NFCT; | ||
88 | return 0; | 69 | return 0; |
89 | } | 70 | } |
90 | 71 | ||
@@ -149,120 +130,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, | |||
149 | } | 130 | } |
150 | 131 | ||
151 | /* | 132 | /* |
152 | * Called from init_conntrack() as expectfn handler. | ||
153 | */ | ||
154 | static void | ||
155 | ip_vs_expect_callback(struct nf_conn *ct, | ||
156 | struct nf_conntrack_expect *exp) | ||
157 | { | ||
158 | struct nf_conntrack_tuple *orig, new_reply; | ||
159 | struct ip_vs_conn *cp; | ||
160 | |||
161 | if (exp->tuple.src.l3num != PF_INET) | ||
162 | return; | ||
163 | |||
164 | /* | ||
165 | * We assume that no NF locks are held before this callback. | ||
166 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | ||
167 | * expectations even if they use wildcard values, now we provide the | ||
168 | * actual values from the newly created original conntrack direction. | ||
169 | * The conntrack is confirmed when packet reaches IPVS hooks. | ||
170 | */ | ||
171 | |||
172 | /* RS->CLIENT */ | ||
173 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
174 | cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
175 | &orig->src.u3, orig->src.u.tcp.port, | ||
176 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
177 | if (cp) { | ||
178 | /* Change reply CLIENT->RS to CLIENT->VS */ | ||
179 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
180 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
181 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | ||
182 | __func__, ct, ct->status, | ||
183 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
184 | ARG_CONN(cp)); | ||
185 | new_reply.dst.u3 = cp->vaddr; | ||
186 | new_reply.dst.u.tcp.port = cp->vport; | ||
187 | IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | ||
188 | ", inout cp=" FMT_CONN "\n", | ||
189 | __func__, ct, | ||
190 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
191 | ARG_CONN(cp)); | ||
192 | goto alter; | ||
193 | } | ||
194 | |||
195 | /* CLIENT->VS */ | ||
196 | cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
197 | &orig->src.u3, orig->src.u.tcp.port, | ||
198 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
199 | if (cp) { | ||
200 | /* Change reply VS->CLIENT to RS->CLIENT */ | ||
201 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
202 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
203 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | ||
204 | __func__, ct, ct->status, | ||
205 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
206 | ARG_CONN(cp)); | ||
207 | new_reply.src.u3 = cp->daddr; | ||
208 | new_reply.src.u.tcp.port = cp->dport; | ||
209 | IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " | ||
210 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | ||
211 | __func__, ct, | ||
212 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
213 | ARG_CONN(cp)); | ||
214 | goto alter; | ||
215 | } | ||
216 | |||
217 | IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE | ||
218 | " - unknown expect\n", | ||
219 | __func__, ct, ct->status, ARG_TUPLE(orig)); | ||
220 | return; | ||
221 | |||
222 | alter: | ||
223 | /* Never alter conntrack for non-NAT conns */ | ||
224 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | ||
225 | nf_conntrack_alter_reply(ct, &new_reply); | ||
226 | ip_vs_conn_put(cp); | ||
227 | return; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Create NF conntrack expectation with wildcard (optional) source port. | ||
232 | * Then the default callback function will alter the reply and will confirm | ||
233 | * the conntrack entry when the first packet comes. | ||
234 | */ | ||
235 | static void | ||
236 | ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, | ||
237 | struct ip_vs_conn *cp, u_int8_t proto, | ||
238 | const __be16 *port, int from_rs) | ||
239 | { | ||
240 | struct nf_conntrack_expect *exp; | ||
241 | |||
242 | BUG_ON(!ct || ct == &nf_conntrack_untracked); | ||
243 | |||
244 | exp = nf_ct_expect_alloc(ct); | ||
245 | if (!exp) | ||
246 | return; | ||
247 | |||
248 | if (from_rs) | ||
249 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, | ||
250 | nf_ct_l3num(ct), &cp->daddr, &cp->caddr, | ||
251 | proto, port, &cp->cport); | ||
252 | else | ||
253 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, | ||
254 | nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, | ||
255 | proto, port, &cp->vport); | ||
256 | |||
257 | exp->expectfn = ip_vs_expect_callback; | ||
258 | |||
259 | IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", | ||
260 | __func__, ct, ARG_TUPLE(&exp->tuple)); | ||
261 | nf_ct_expect_related(exp); | ||
262 | nf_ct_expect_put(exp); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Look at outgoing ftp packets to catch the response to a PASV command | 133 | * Look at outgoing ftp packets to catch the response to a PASV command |
267 | * from the server (inside-to-outside). | 134 | * from the server (inside-to-outside). |
268 | * When we see one, we build a connection entry with the client address, | 135 | * When we see one, we build a connection entry with the client address, |
@@ -335,7 +202,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
335 | &cp->caddr, 0, | 202 | &cp->caddr, 0, |
336 | &cp->vaddr, port, | 203 | &cp->vaddr, port, |
337 | &from, port, | 204 | &from, port, |
338 | IP_VS_CONN_F_NO_CPORT, | 205 | IP_VS_CONN_F_NO_CPORT | |
206 | IP_VS_CONN_F_NFCT, | ||
339 | cp->dest); | 207 | cp->dest); |
340 | if (!n_cp) | 208 | if (!n_cp) |
341 | return 0; | 209 | return 0; |
@@ -371,8 +239,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
371 | start-data, end-start, | 239 | start-data, end-start, |
372 | buf, buf_len); | 240 | buf, buf_len); |
373 | if (ret) | 241 | if (ret) |
374 | ip_vs_expect_related(skb, ct, n_cp, | 242 | ip_vs_nfct_expect_related(skb, ct, n_cp, |
375 | IPPROTO_TCP, NULL, 0); | 243 | IPPROTO_TCP, 0, 0); |
376 | } | 244 | } |
377 | 245 | ||
378 | /* | 246 | /* |
@@ -487,7 +355,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
487 | &to, port, | 355 | &to, port, |
488 | &cp->vaddr, htons(ntohs(cp->vport)-1), | 356 | &cp->vaddr, htons(ntohs(cp->vport)-1), |
489 | &cp->daddr, htons(ntohs(cp->dport)-1), | 357 | &cp->daddr, htons(ntohs(cp->dport)-1), |
490 | 0, | 358 | IP_VS_CONN_F_NFCT, |
491 | cp->dest); | 359 | cp->dest); |
492 | if (!n_cp) | 360 | if (!n_cp) |
493 | return 0; | 361 | return 0; |
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c new file mode 100644 index 000000000000..c038458d0290 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_nfct.c | |||
@@ -0,0 +1,292 @@ | |||
1 | /* | ||
2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | ||
3 | * | ||
4 | * Portions Copyright (C) 2001-2002 | ||
5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | ||
6 | * | ||
7 | * Portions Copyright (C) 2003-2010 | ||
8 | * Julian Anastasov | ||
9 | * | ||
10 | * | ||
11 | * This code is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | * | ||
25 | * | ||
26 | * Authors: | ||
27 | * Ben North <ben@redfrontdoor.org> | ||
28 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | ||
29 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match | ||
30 | * | ||
31 | * | ||
32 | * Current status: | ||
33 | * | ||
34 | * - provide conntrack confirmation for new and related connections, by | ||
35 | * this way we can see their proper conntrack state in all hooks | ||
36 | * - support for all forwarding methods, not only NAT | ||
37 | * - FTP support (NAT), ability to support other NAT apps with expectations | ||
38 | * - to correctly create expectations for related NAT connections the proper | ||
39 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires | ||
40 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables | ||
41 | * NAT rules are needed) | ||
42 | * - alter reply for NAT when forwarding packet in original direction: | ||
43 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or | ||
44 | * when RELATED conntrack is created from real server (Active FTP DATA) | ||
45 | * - if iptables_nat is not loaded the Passive FTP will not work (the | ||
46 | * PASV response can not be NAT-ed) but Active FTP should work | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | #define KMSG_COMPONENT "IPVS" | ||
51 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
52 | |||
53 | #include <linux/module.h> | ||
54 | #include <linux/types.h> | ||
55 | #include <linux/kernel.h> | ||
56 | #include <linux/errno.h> | ||
57 | #include <linux/compiler.h> | ||
58 | #include <linux/vmalloc.h> | ||
59 | #include <linux/skbuff.h> | ||
60 | #include <net/ip.h> | ||
61 | #include <linux/netfilter.h> | ||
62 | #include <linux/netfilter_ipv4.h> | ||
63 | #include <net/ip_vs.h> | ||
64 | #include <net/netfilter/nf_conntrack_core.h> | ||
65 | #include <net/netfilter/nf_conntrack_expect.h> | ||
66 | #include <net/netfilter/nf_conntrack_helper.h> | ||
67 | #include <net/netfilter/nf_conntrack_zones.h> | ||
68 | |||
69 | |||
70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | ||
71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | ||
72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | ||
73 | (T)->dst.protonum | ||
74 | |||
75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | ||
76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | ||
77 | &((C)->vaddr.ip), ntohs((C)->vport), \ | ||
78 | &((C)->daddr.ip), ntohs((C)->dport), \ | ||
79 | (C)->protocol, (C)->state | ||
80 | |||
81 | void | ||
82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | ||
83 | { | ||
84 | enum ip_conntrack_info ctinfo; | ||
85 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
86 | struct nf_conntrack_tuple new_tuple; | ||
87 | |||
88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || | ||
89 | nf_ct_is_dying(ct)) | ||
90 | return; | ||
91 | |||
92 | /* Never alter conntrack for non-NAT conns */ | ||
93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | ||
94 | return; | ||
95 | |||
96 | /* Alter reply only in original direction */ | ||
97 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
98 | return; | ||
99 | |||
100 | /* | ||
101 | * The connection is not yet in the hashtable, so we update it. | ||
102 | * CIP->VIP will remain the same, so leave the tuple in | ||
103 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | ||
104 | * real-server we will see RIP->DIP. | ||
105 | */ | ||
106 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
107 | /* | ||
108 | * This will also take care of UDP and other protocols. | ||
109 | */ | ||
110 | if (outin) { | ||
111 | new_tuple.src.u3 = cp->daddr; | ||
112 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | ||
113 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | ||
114 | new_tuple.src.u.tcp.port = cp->dport; | ||
115 | } else { | ||
116 | new_tuple.dst.u3 = cp->vaddr; | ||
117 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | ||
118 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | ||
119 | new_tuple.dst.u.tcp.port = cp->vport; | ||
120 | } | ||
121 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " | ||
122 | "ctinfo=%d, old reply=" FMT_TUPLE | ||
123 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", | ||
124 | __func__, ct, ct->status, ctinfo, | ||
125 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), | ||
126 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); | ||
127 | nf_conntrack_alter_reply(ct, &new_tuple); | ||
128 | } | ||
129 | |||
130 | int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) | ||
131 | { | ||
132 | return nf_conntrack_confirm(skb); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Called from init_conntrack() as expectfn handler. | ||
137 | */ | ||
138 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | ||
139 | struct nf_conntrack_expect *exp) | ||
140 | { | ||
141 | struct nf_conntrack_tuple *orig, new_reply; | ||
142 | struct ip_vs_conn *cp; | ||
143 | |||
144 | if (exp->tuple.src.l3num != PF_INET) | ||
145 | return; | ||
146 | |||
147 | /* | ||
148 | * We assume that no NF locks are held before this callback. | ||
149 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | ||
150 | * expectations even if they use wildcard values, now we provide the | ||
151 | * actual values from the newly created original conntrack direction. | ||
152 | * The conntrack is confirmed when packet reaches IPVS hooks. | ||
153 | */ | ||
154 | |||
155 | /* RS->CLIENT */ | ||
156 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
157 | cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
158 | &orig->src.u3, orig->src.u.tcp.port, | ||
159 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
160 | if (cp) { | ||
161 | /* Change reply CLIENT->RS to CLIENT->VS */ | ||
162 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
163 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
164 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | ||
165 | __func__, ct, ct->status, | ||
166 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
167 | ARG_CONN(cp)); | ||
168 | new_reply.dst.u3 = cp->vaddr; | ||
169 | new_reply.dst.u.tcp.port = cp->vport; | ||
170 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | ||
171 | ", inout cp=" FMT_CONN "\n", | ||
172 | __func__, ct, | ||
173 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
174 | ARG_CONN(cp)); | ||
175 | goto alter; | ||
176 | } | ||
177 | |||
178 | /* CLIENT->VS */ | ||
179 | cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, | ||
180 | &orig->src.u3, orig->src.u.tcp.port, | ||
181 | &orig->dst.u3, orig->dst.u.tcp.port); | ||
182 | if (cp) { | ||
183 | /* Change reply VS->CLIENT to RS->CLIENT */ | ||
184 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
185 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | ||
186 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | ||
187 | __func__, ct, ct->status, | ||
188 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
189 | ARG_CONN(cp)); | ||
190 | new_reply.src.u3 = cp->daddr; | ||
191 | new_reply.src.u.tcp.port = cp->dport; | ||
192 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " | ||
193 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | ||
194 | __func__, ct, | ||
195 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | ||
196 | ARG_CONN(cp)); | ||
197 | goto alter; | ||
198 | } | ||
199 | |||
200 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE | ||
201 | " - unknown expect\n", | ||
202 | __func__, ct, ct->status, ARG_TUPLE(orig)); | ||
203 | return; | ||
204 | |||
205 | alter: | ||
206 | /* Never alter conntrack for non-NAT conns */ | ||
207 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | ||
208 | nf_conntrack_alter_reply(ct, &new_reply); | ||
209 | ip_vs_conn_put(cp); | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Create NF conntrack expectation with wildcard (optional) source port. | ||
215 | * Then the default callback function will alter the reply and will confirm | ||
216 | * the conntrack entry when the first packet comes. | ||
217 | * Use port 0 to expect connection from any port. | ||
218 | */ | ||
219 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | ||
220 | struct ip_vs_conn *cp, u_int8_t proto, | ||
221 | const __be16 port, int from_rs) | ||
222 | { | ||
223 | struct nf_conntrack_expect *exp; | ||
224 | |||
225 | if (ct == NULL || nf_ct_is_untracked(ct)) | ||
226 | return; | ||
227 | |||
228 | exp = nf_ct_expect_alloc(ct); | ||
229 | if (!exp) | ||
230 | return; | ||
231 | |||
232 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), | ||
233 | from_rs ? &cp->daddr : &cp->caddr, | ||
234 | from_rs ? &cp->caddr : &cp->vaddr, | ||
235 | proto, port ? &port : NULL, | ||
236 | from_rs ? &cp->cport : &cp->vport); | ||
237 | |||
238 | exp->expectfn = ip_vs_nfct_expect_callback; | ||
239 | |||
240 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | ||
241 | __func__, ct, ARG_TUPLE(&exp->tuple)); | ||
242 | nf_ct_expect_related(exp); | ||
243 | nf_ct_expect_put(exp); | ||
244 | } | ||
245 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); | ||
246 | |||
247 | /* | ||
248 | * Our connection was terminated, try to drop the conntrack immediately | ||
249 | */ | ||
250 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | ||
251 | { | ||
252 | struct nf_conntrack_tuple_hash *h; | ||
253 | struct nf_conn *ct; | ||
254 | struct nf_conntrack_tuple tuple; | ||
255 | |||
256 | if (!cp->cport) | ||
257 | return; | ||
258 | |||
259 | tuple = (struct nf_conntrack_tuple) { | ||
260 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | ||
261 | tuple.src.u3 = cp->caddr; | ||
262 | tuple.src.u.all = cp->cport; | ||
263 | tuple.src.l3num = cp->af; | ||
264 | tuple.dst.u3 = cp->vaddr; | ||
265 | tuple.dst.u.all = cp->vport; | ||
266 | |||
267 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | ||
268 | " for conn " FMT_CONN "\n", | ||
269 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | ||
270 | |||
271 | h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); | ||
272 | if (h) { | ||
273 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
274 | /* Show what happens instead of calling nf_ct_kill() */ | ||
275 | if (del_timer(&ct->timeout)) { | ||
276 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" | ||
277 | FMT_TUPLE "\n", | ||
278 | __func__, ct, ARG_TUPLE(&tuple)); | ||
279 | if (ct->timeout.function) | ||
280 | ct->timeout.function(ct->timeout.data); | ||
281 | } else { | ||
282 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | ||
283 | FMT_TUPLE "\n", | ||
284 | __func__, ct, ARG_TUPLE(&tuple)); | ||
285 | } | ||
286 | nf_ct_put(ct); | ||
287 | } else { | ||
288 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | ||
289 | __func__, ARG_TUPLE(&tuple)); | ||
290 | } | ||
291 | } | ||
292 | |||
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 49df6bea6a2d..8817afa34e6a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <net/ip6_route.h> | 28 | #include <net/ip6_route.h> |
29 | #include <linux/icmpv6.h> | 29 | #include <linux/icmpv6.h> |
30 | #include <linux/netfilter.h> | 30 | #include <linux/netfilter.h> |
31 | #include <net/netfilter/nf_conntrack.h> | ||
32 | #include <linux/netfilter_ipv4.h> | 31 | #include <linux/netfilter_ipv4.h> |
33 | 32 | ||
34 | #include <net/ip_vs.h> | 33 | #include <net/ip_vs.h> |
@@ -194,12 +193,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) | |||
194 | dst_release(old_dst); | 193 | dst_release(old_dst); |
195 | } | 194 | } |
196 | 195 | ||
197 | #define IP_VS_XMIT(pf, skb, rt) \ | 196 | #define IP_VS_XMIT_TUNNEL(skb, cp) \ |
197 | ({ \ | ||
198 | int __ret = NF_ACCEPT; \ | ||
199 | \ | ||
200 | if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ | ||
201 | __ret = ip_vs_confirm_conntrack(skb, cp); \ | ||
202 | if (__ret == NF_ACCEPT) { \ | ||
203 | nf_reset(skb); \ | ||
204 | (skb)->ip_summed = CHECKSUM_NONE; \ | ||
205 | } \ | ||
206 | __ret; \ | ||
207 | }) | ||
208 | |||
209 | #define IP_VS_XMIT_NAT(pf, skb, cp) \ | ||
198 | do { \ | 210 | do { \ |
199 | (skb)->ipvs_property = 1; \ | 211 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
212 | (skb)->ipvs_property = 1; \ | ||
213 | else \ | ||
214 | ip_vs_update_conntrack(skb, cp, 1); \ | ||
200 | skb_forward_csum(skb); \ | 215 | skb_forward_csum(skb); \ |
201 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 216 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
202 | (rt)->dst.dev, dst_output); \ | 217 | skb_dst(skb)->dev, dst_output); \ |
218 | } while (0) | ||
219 | |||
220 | #define IP_VS_XMIT(pf, skb, cp) \ | ||
221 | do { \ | ||
222 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | ||
223 | (skb)->ipvs_property = 1; \ | ||
224 | skb_forward_csum(skb); \ | ||
225 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | ||
226 | skb_dst(skb)->dev, dst_output); \ | ||
203 | } while (0) | 227 | } while (0) |
204 | 228 | ||
205 | 229 | ||
@@ -271,7 +295,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
271 | /* Another hack: avoid icmp_send in ip_fragment */ | 295 | /* Another hack: avoid icmp_send in ip_fragment */ |
272 | skb->local_df = 1; | 296 | skb->local_df = 1; |
273 | 297 | ||
274 | IP_VS_XMIT(NFPROTO_IPV4, skb, rt); | 298 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); |
275 | 299 | ||
276 | LeaveFunction(10); | 300 | LeaveFunction(10); |
277 | return NF_STOLEN; | 301 | return NF_STOLEN; |
@@ -335,7 +359,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
335 | /* Another hack: avoid icmp_send in ip_fragment */ | 359 | /* Another hack: avoid icmp_send in ip_fragment */ |
336 | skb->local_df = 1; | 360 | skb->local_df = 1; |
337 | 361 | ||
338 | IP_VS_XMIT(NFPROTO_IPV6, skb, rt); | 362 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); |
339 | 363 | ||
340 | LeaveFunction(10); | 364 | LeaveFunction(10); |
341 | return NF_STOLEN; | 365 | return NF_STOLEN; |
@@ -349,36 +373,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
349 | } | 373 | } |
350 | #endif | 374 | #endif |
351 | 375 | ||
352 | void | ||
353 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | ||
354 | { | ||
355 | struct nf_conn *ct = (struct nf_conn *)skb->nfct; | ||
356 | struct nf_conntrack_tuple new_tuple; | ||
357 | |||
358 | if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) | ||
359 | return; | ||
360 | |||
361 | /* | ||
362 | * The connection is not yet in the hashtable, so we update it. | ||
363 | * CIP->VIP will remain the same, so leave the tuple in | ||
364 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | ||
365 | * real-server we will see RIP->DIP. | ||
366 | */ | ||
367 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
368 | if (outin) | ||
369 | new_tuple.src.u3 = cp->daddr; | ||
370 | else | ||
371 | new_tuple.dst.u3 = cp->vaddr; | ||
372 | /* | ||
373 | * This will also take care of UDP and other protocols. | ||
374 | */ | ||
375 | if (outin) | ||
376 | new_tuple.src.u.tcp.port = cp->dport; | ||
377 | else | ||
378 | new_tuple.dst.u.tcp.port = cp->vport; | ||
379 | nf_conntrack_alter_reply(ct, &new_tuple); | ||
380 | } | ||
381 | |||
382 | /* | 376 | /* |
383 | * NAT transmitter (only for outside-to-inside nat forwarding) | 377 | * NAT transmitter (only for outside-to-inside nat forwarding) |
384 | * Not used for related ICMP | 378 | * Not used for related ICMP |
@@ -434,8 +428,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
434 | 428 | ||
435 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 429 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
436 | 430 | ||
437 | ip_vs_update_conntrack(skb, cp, 1); | ||
438 | |||
439 | /* FIXME: when application helper enlarges the packet and the length | 431 | /* FIXME: when application helper enlarges the packet and the length |
440 | is larger than the MTU of outgoing device, there will be still | 432 | is larger than the MTU of outgoing device, there will be still |
441 | MTU problem. */ | 433 | MTU problem. */ |
@@ -443,7 +435,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
443 | /* Another hack: avoid icmp_send in ip_fragment */ | 435 | /* Another hack: avoid icmp_send in ip_fragment */ |
444 | skb->local_df = 1; | 436 | skb->local_df = 1; |
445 | 437 | ||
446 | IP_VS_XMIT(NFPROTO_IPV4, skb, rt); | 438 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); |
447 | 439 | ||
448 | LeaveFunction(10); | 440 | LeaveFunction(10); |
449 | return NF_STOLEN; | 441 | return NF_STOLEN; |
@@ -451,8 +443,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
451 | tx_error_icmp: | 443 | tx_error_icmp: |
452 | dst_link_failure(skb); | 444 | dst_link_failure(skb); |
453 | tx_error: | 445 | tx_error: |
454 | LeaveFunction(10); | ||
455 | kfree_skb(skb); | 446 | kfree_skb(skb); |
447 | LeaveFunction(10); | ||
456 | return NF_STOLEN; | 448 | return NF_STOLEN; |
457 | tx_error_put: | 449 | tx_error_put: |
458 | ip_rt_put(rt); | 450 | ip_rt_put(rt); |
@@ -512,8 +504,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
512 | 504 | ||
513 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 505 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
514 | 506 | ||
515 | ip_vs_update_conntrack(skb, cp, 1); | ||
516 | |||
517 | /* FIXME: when application helper enlarges the packet and the length | 507 | /* FIXME: when application helper enlarges the packet and the length |
518 | is larger than the MTU of outgoing device, there will be still | 508 | is larger than the MTU of outgoing device, there will be still |
519 | MTU problem. */ | 509 | MTU problem. */ |
@@ -521,7 +511,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
521 | /* Another hack: avoid icmp_send in ip_fragment */ | 511 | /* Another hack: avoid icmp_send in ip_fragment */ |
522 | skb->local_df = 1; | 512 | skb->local_df = 1; |
523 | 513 | ||
524 | IP_VS_XMIT(NFPROTO_IPV6, skb, rt); | 514 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); |
525 | 515 | ||
526 | LeaveFunction(10); | 516 | LeaveFunction(10); |
527 | return NF_STOLEN; | 517 | return NF_STOLEN; |
@@ -571,6 +561,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
571 | struct iphdr *iph; /* Our new IP header */ | 561 | struct iphdr *iph; /* Our new IP header */ |
572 | unsigned int max_headroom; /* The extra header space needed */ | 562 | unsigned int max_headroom; /* The extra header space needed */ |
573 | int mtu; | 563 | int mtu; |
564 | int ret; | ||
574 | 565 | ||
575 | EnterFunction(10); | 566 | EnterFunction(10); |
576 | 567 | ||
@@ -655,7 +646,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
655 | /* Another hack: avoid icmp_send in ip_fragment */ | 646 | /* Another hack: avoid icmp_send in ip_fragment */ |
656 | skb->local_df = 1; | 647 | skb->local_df = 1; |
657 | 648 | ||
658 | ip_local_out(skb); | 649 | ret = IP_VS_XMIT_TUNNEL(skb, cp); |
650 | if (ret == NF_ACCEPT) | ||
651 | ip_local_out(skb); | ||
652 | else if (ret == NF_DROP) | ||
653 | kfree_skb(skb); | ||
659 | 654 | ||
660 | LeaveFunction(10); | 655 | LeaveFunction(10); |
661 | 656 | ||
@@ -681,6 +676,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
681 | struct ipv6hdr *iph; /* Our new IP header */ | 676 | struct ipv6hdr *iph; /* Our new IP header */ |
682 | unsigned int max_headroom; /* The extra header space needed */ | 677 | unsigned int max_headroom; /* The extra header space needed */ |
683 | int mtu; | 678 | int mtu; |
679 | int ret; | ||
684 | 680 | ||
685 | EnterFunction(10); | 681 | EnterFunction(10); |
686 | 682 | ||
@@ -761,7 +757,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
761 | /* Another hack: avoid icmp_send in ip_fragment */ | 757 | /* Another hack: avoid icmp_send in ip_fragment */ |
762 | skb->local_df = 1; | 758 | skb->local_df = 1; |
763 | 759 | ||
764 | ip6_local_out(skb); | 760 | ret = IP_VS_XMIT_TUNNEL(skb, cp); |
761 | if (ret == NF_ACCEPT) | ||
762 | ip6_local_out(skb); | ||
763 | else if (ret == NF_DROP) | ||
764 | kfree_skb(skb); | ||
765 | 765 | ||
766 | LeaveFunction(10); | 766 | LeaveFunction(10); |
767 | 767 | ||
@@ -820,7 +820,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
820 | /* Another hack: avoid icmp_send in ip_fragment */ | 820 | /* Another hack: avoid icmp_send in ip_fragment */ |
821 | skb->local_df = 1; | 821 | skb->local_df = 1; |
822 | 822 | ||
823 | IP_VS_XMIT(NFPROTO_IPV4, skb, rt); | 823 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); |
824 | 824 | ||
825 | LeaveFunction(10); | 825 | LeaveFunction(10); |
826 | return NF_STOLEN; | 826 | return NF_STOLEN; |
@@ -873,7 +873,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
873 | /* Another hack: avoid icmp_send in ip_fragment */ | 873 | /* Another hack: avoid icmp_send in ip_fragment */ |
874 | skb->local_df = 1; | 874 | skb->local_df = 1; |
875 | 875 | ||
876 | IP_VS_XMIT(NFPROTO_IPV6, skb, rt); | 876 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); |
877 | 877 | ||
878 | LeaveFunction(10); | 878 | LeaveFunction(10); |
879 | return NF_STOLEN; | 879 | return NF_STOLEN; |
@@ -947,7 +947,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
947 | /* Another hack: avoid icmp_send in ip_fragment */ | 947 | /* Another hack: avoid icmp_send in ip_fragment */ |
948 | skb->local_df = 1; | 948 | skb->local_df = 1; |
949 | 949 | ||
950 | IP_VS_XMIT(NFPROTO_IPV4, skb, rt); | 950 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); |
951 | 951 | ||
952 | rc = NF_STOLEN; | 952 | rc = NF_STOLEN; |
953 | goto out; | 953 | goto out; |
@@ -1022,7 +1022,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1022 | /* Another hack: avoid icmp_send in ip_fragment */ | 1022 | /* Another hack: avoid icmp_send in ip_fragment */ |
1023 | skb->local_df = 1; | 1023 | skb->local_df = 1; |
1024 | 1024 | ||
1025 | IP_VS_XMIT(NFPROTO_IPV6, skb, rt); | 1025 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); |
1026 | 1026 | ||
1027 | rc = NF_STOLEN; | 1027 | rc = NF_STOLEN; |
1028 | goto out; | 1028 | goto out; |