diff options
author | Malcolm Turnbull <malcolm@loadbalancer.org> | 2008-09-04 21:17:13 -0400 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2008-09-04 21:17:13 -0400 |
commit | 4856c84c1358b79852743ac64e50c1e9d5118f05 (patch) | |
tree | 9d36ac3cb5571f85da582f4cd9fd082eaf6e0cab /net/ipv4/ipvs | |
parent | f94fd041402e4e70d2b4ed00008b9bb857e6ae87 (diff) |
ipvs: load balance IPv4 connections from a local process
This allows IPVS to load balance connections made by a local process.
For example a proxy server running locally.
External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer
Signed-off-by: Siim Põder <siim@p6drad-teel.net>
Signed-off-by: Malcolm Turnbull <malcolm@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net/ipv4/ipvs')
-rw-r--r-- | net/ipv4/ipvs/ip_vs_core.c | 224 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_tcp.c | 4 |
2 files changed, 134 insertions, 94 deletions
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 7d3de9db5ac5..26e3d99bbeea 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
651 | } | 651 | } |
652 | #endif | 652 | #endif |
653 | 653 | ||
654 | /* Handle relevant response ICMP messages - forward to the right | ||
655 | * destination host. Used for NAT and local client. | ||
656 | */ | ||
657 | static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph, | ||
658 | struct iphdr *cih, struct ip_vs_conn *cp, | ||
659 | struct ip_vs_protocol *pp, | ||
660 | unsigned int offset, unsigned int ihl) | ||
661 | { | ||
662 | unsigned int verdict = NF_DROP; | ||
663 | |||
664 | if (IP_VS_FWD_METHOD(cp) != 0) { | ||
665 | IP_VS_ERR("shouldn't reach here, because the box is on the " | ||
666 | "half connection in the tun/dr module.\n"); | ||
667 | } | ||
668 | |||
669 | /* Ensure the checksum is correct */ | ||
670 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | ||
671 | /* Failed checksum! */ | ||
672 | IP_VS_DBG(1, | ||
673 | "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", | ||
674 | NIPQUAD(iph->saddr)); | ||
675 | goto out; | ||
676 | } | ||
677 | |||
678 | if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) | ||
679 | offset += 2 * sizeof(__u16); | ||
680 | if (!skb_make_writable(skb, offset)) | ||
681 | goto out; | ||
682 | |||
683 | ip_vs_nat_icmp(skb, pp, cp, 1); | ||
684 | |||
685 | /* do the statistics and put it back */ | ||
686 | ip_vs_out_stats(cp, skb); | ||
687 | |||
688 | skb->ipvs_property = 1; | ||
689 | verdict = NF_ACCEPT; | ||
690 | |||
691 | out: | ||
692 | __ip_vs_conn_put(cp); | ||
693 | |||
694 | return verdict; | ||
695 | } | ||
696 | |||
654 | /* | 697 | /* |
655 | * Handle ICMP messages in the inside-to-outside direction (outgoing). | 698 | * Handle ICMP messages in the inside-to-outside direction (outgoing). |
656 | * Find any that might be relevant, check against existing connections, | 699 | * Find any that might be relevant, check against existing connections. |
657 | * forward to the right destination host if relevant. | ||
658 | * Currently handles error types - unreachable, quench, ttl exceeded. | 700 | * Currently handles error types - unreachable, quench, ttl exceeded. |
659 | * (Only used in VS/NAT) | ||
660 | */ | 701 | */ |
661 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | 702 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related) |
662 | { | 703 | { |
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
666 | struct ip_vs_iphdr ciph; | 707 | struct ip_vs_iphdr ciph; |
667 | struct ip_vs_conn *cp; | 708 | struct ip_vs_conn *cp; |
668 | struct ip_vs_protocol *pp; | 709 | struct ip_vs_protocol *pp; |
669 | unsigned int offset, ihl, verdict; | 710 | unsigned int offset, ihl; |
670 | 711 | ||
671 | *related = 1; | 712 | *related = 1; |
672 | 713 | ||
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
725 | if (!cp) | 766 | if (!cp) |
726 | return NF_ACCEPT; | 767 | return NF_ACCEPT; |
727 | 768 | ||
728 | verdict = NF_DROP; | 769 | return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl); |
729 | |||
730 | if (IP_VS_FWD_METHOD(cp) != 0) { | ||
731 | IP_VS_ERR("shouldn't reach here, because the box is on the " | ||
732 | "half connection in the tun/dr module.\n"); | ||
733 | } | ||
734 | |||
735 | /* Ensure the checksum is correct */ | ||
736 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | ||
737 | /* Failed checksum! */ | ||
738 | IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", | ||
739 | NIPQUAD(iph->saddr)); | ||
740 | goto out; | ||
741 | } | ||
742 | |||
743 | if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) | ||
744 | offset += 2 * sizeof(__u16); | ||
745 | if (!skb_make_writable(skb, offset)) | ||
746 | goto out; | ||
747 | |||
748 | ip_vs_nat_icmp(skb, pp, cp, 1); | ||
749 | |||
750 | /* do the statistics and put it back */ | ||
751 | ip_vs_out_stats(cp, skb); | ||
752 | |||
753 | skb->ipvs_property = 1; | ||
754 | verdict = NF_ACCEPT; | ||
755 | |||
756 | out: | ||
757 | __ip_vs_conn_put(cp); | ||
758 | |||
759 | return verdict; | ||
760 | } | 770 | } |
761 | 771 | ||
762 | #ifdef CONFIG_IP_VS_IPV6 | 772 | #ifdef CONFIG_IP_VS_IPV6 |
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) | |||
875 | return th->rst; | 885 | return th->rst; |
876 | } | 886 | } |
877 | 887 | ||
888 | /* Handle response packets: rewrite addresses and send away... | ||
889 | * Used for NAT and local client. | ||
890 | */ | ||
891 | static unsigned int | ||
892 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
893 | struct ip_vs_conn *cp, int ihl) | ||
894 | { | ||
895 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | ||
896 | |||
897 | if (!skb_make_writable(skb, ihl)) | ||
898 | goto drop; | ||
899 | |||
900 | /* mangle the packet */ | ||
901 | if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | ||
902 | goto drop; | ||
903 | |||
904 | #ifdef CONFIG_IP_VS_IPV6 | ||
905 | if (af == AF_INET6) | ||
906 | ipv6_hdr(skb)->saddr = cp->vaddr.in6; | ||
907 | else | ||
908 | #endif | ||
909 | { | ||
910 | ip_hdr(skb)->saddr = cp->vaddr.ip; | ||
911 | ip_send_check(ip_hdr(skb)); | ||
912 | } | ||
913 | |||
914 | /* For policy routing, packets originating from this | ||
915 | * machine itself may be routed differently to packets | ||
916 | * passing through. We want this packet to be routed as | ||
917 | * if it came from this machine itself. So re-compute | ||
918 | * the routing information. | ||
919 | */ | ||
920 | #ifdef CONFIG_IP_VS_IPV6 | ||
921 | if (af == AF_INET6) { | ||
922 | if (ip6_route_me_harder(skb) != 0) | ||
923 | goto drop; | ||
924 | } else | ||
925 | #endif | ||
926 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
927 | goto drop; | ||
928 | |||
929 | /* For policy routing, packets originating from this | ||
930 | * machine itself may be routed differently to packets | ||
931 | * passing through. We want this packet to be routed as | ||
932 | * if it came from this machine itself. So re-compute | ||
933 | * the routing information. | ||
934 | */ | ||
935 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
936 | goto drop; | ||
937 | |||
938 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | ||
939 | |||
940 | ip_vs_out_stats(cp, skb); | ||
941 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | ||
942 | ip_vs_conn_put(cp); | ||
943 | |||
944 | skb->ipvs_property = 1; | ||
945 | |||
946 | LeaveFunction(11); | ||
947 | return NF_ACCEPT; | ||
948 | |||
949 | drop: | ||
950 | ip_vs_conn_put(cp); | ||
951 | kfree_skb(skb); | ||
952 | return NF_STOLEN; | ||
953 | } | ||
954 | |||
878 | /* | 955 | /* |
879 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | 956 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. |
880 | * Check if outgoing packet belongs to the established ip_vs_conn, | 957 | * Check if outgoing packet belongs to the established ip_vs_conn. |
881 | * rewrite addresses of the packet and send it on its way... | ||
882 | */ | 958 | */ |
883 | static unsigned int | 959 | static unsigned int |
884 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | 960 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, |
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
987 | return NF_ACCEPT; | 1063 | return NF_ACCEPT; |
988 | } | 1064 | } |
989 | 1065 | ||
990 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | 1066 | return handle_response(af, skb, pp, cp, iph.len); |
991 | |||
992 | if (!skb_make_writable(skb, iph.len)) | ||
993 | goto drop; | ||
994 | |||
995 | /* mangle the packet */ | ||
996 | if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | ||
997 | goto drop; | ||
998 | |||
999 | #ifdef CONFIG_IP_VS_IPV6 | ||
1000 | if (af == AF_INET6) | ||
1001 | ipv6_hdr(skb)->saddr = cp->vaddr.in6; | ||
1002 | else | ||
1003 | #endif | ||
1004 | { | ||
1005 | ip_hdr(skb)->saddr = cp->vaddr.ip; | ||
1006 | ip_send_check(ip_hdr(skb)); | ||
1007 | } | ||
1008 | |||
1009 | /* For policy routing, packets originating from this | ||
1010 | * machine itself may be routed differently to packets | ||
1011 | * passing through. We want this packet to be routed as | ||
1012 | * if it came from this machine itself. So re-compute | ||
1013 | * the routing information. | ||
1014 | */ | ||
1015 | #ifdef CONFIG_IP_VS_IPV6 | ||
1016 | if (af == AF_INET6) { | ||
1017 | if (ip6_route_me_harder(skb) != 0) | ||
1018 | goto drop; | ||
1019 | } else | ||
1020 | #endif | ||
1021 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
1022 | goto drop; | ||
1023 | |||
1024 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | ||
1025 | |||
1026 | ip_vs_out_stats(cp, skb); | ||
1027 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | ||
1028 | ip_vs_conn_put(cp); | ||
1029 | |||
1030 | skb->ipvs_property = 1; | ||
1031 | |||
1032 | LeaveFunction(11); | ||
1033 | return NF_ACCEPT; | ||
1034 | |||
1035 | drop: | ||
1036 | ip_vs_conn_put(cp); | ||
1037 | kfree_skb(skb); | ||
1038 | return NF_STOLEN; | ||
1039 | } | 1067 | } |
1040 | 1068 | ||
1041 | 1069 | ||
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1111 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | 1139 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); |
1112 | /* The embedded headers contain source and dest in reverse order */ | 1140 | /* The embedded headers contain source and dest in reverse order */ |
1113 | cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); | 1141 | cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); |
1114 | if (!cp) | 1142 | if (!cp) { |
1143 | /* The packet could also belong to a local client */ | ||
1144 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | ||
1145 | if (cp) | ||
1146 | return handle_response_icmp(skb, iph, cih, cp, pp, | ||
1147 | offset, ihl); | ||
1115 | return NF_ACCEPT; | 1148 | return NF_ACCEPT; |
1149 | } | ||
1116 | 1150 | ||
1117 | verdict = NF_DROP; | 1151 | verdict = NF_DROP; |
1118 | 1152 | ||
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
1244 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1278 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1245 | 1279 | ||
1246 | /* | 1280 | /* |
1247 | * Big tappo: only PACKET_HOST (neither loopback nor mcasts) | 1281 | * Big tappo: only PACKET_HOST, including loopback for local client |
1248 | * ... don't know why 1st test DOES NOT include 2nd (?) | 1282 | * Don't handle local packets on IPv6 for now |
1249 | */ | 1283 | */ |
1250 | if (unlikely(skb->pkt_type != PACKET_HOST | 1284 | if (unlikely(skb->pkt_type != PACKET_HOST || |
1251 | || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { | 1285 | (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK || |
1286 | skb->sk)))) { | ||
1252 | IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", | 1287 | IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", |
1253 | skb->pkt_type, | 1288 | skb->pkt_type, |
1254 | iph.protocol, | 1289 | iph.protocol, |
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
1277 | if (unlikely(!cp)) { | 1312 | if (unlikely(!cp)) { |
1278 | int v; | 1313 | int v; |
1279 | 1314 | ||
1315 | /* For local client packets, it could be a response */ | ||
1316 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | ||
1317 | if (cp) | ||
1318 | return handle_response(af, skb, pp, cp, iph.len); | ||
1319 | |||
1280 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | 1320 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) |
1281 | return v; | 1321 | return v; |
1282 | } | 1322 | } |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index de8ed73997c7..808e8be0280a 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb, | |||
166 | tcph->source = cp->vport; | 166 | tcph->source = cp->vport; |
167 | 167 | ||
168 | /* Adjust TCP checksums */ | 168 | /* Adjust TCP checksums */ |
169 | if (!cp->app) { | 169 | if (!cp->app && (tcph->check != 0)) { |
170 | /* Only port and addr are changed, do fast csum update */ | 170 | /* Only port and addr are changed, do fast csum update */ |
171 | tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | 171 | tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, |
172 | cp->dport, cp->vport); | 172 | cp->dport, cp->vport); |
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
235 | /* | 235 | /* |
236 | * Adjust TCP checksums | 236 | * Adjust TCP checksums |
237 | */ | 237 | */ |
238 | if (!cp->app) { | 238 | if (!cp->app && (tcph->check != 0)) { |
239 | /* Only port and addr are changed, do fast csum update */ | 239 | /* Only port and addr are changed, do fast csum update */ |
240 | tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, | 240 | tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, |
241 | cp->vport, cp->dport); | 241 | cp->vport, cp->dport); |