aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ipvs
diff options
context:
space:
mode:
authorMalcolm Turnbull <malcolm@loadbalancer.org>2008-09-04 21:17:13 -0400
committerSimon Horman <horms@verge.net.au>2008-09-04 21:17:13 -0400
commit4856c84c1358b79852743ac64e50c1e9d5118f05 (patch)
tree9d36ac3cb5571f85da582f4cd9fd082eaf6e0cab /net/ipv4/ipvs
parentf94fd041402e4e70d2b4ed00008b9bb857e6ae87 (diff)
ipvs: load balance IPv4 connections from a local process
This allows IPVS to load balance connections made by a local process. For example a proxy server running locally. External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer Signed-off-by: Siim Põder <siim@p6drad-teel.net> Signed-off-by: Malcolm Turnbull <malcolm@loadbalancer.org> Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net/ipv4/ipvs')
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c224
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c4
2 files changed, 134 insertions, 94 deletions
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 7d3de9db5ac5..26e3d99bbeea 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
651} 651}
652#endif 652#endif
653 653
654/* Handle relevant response ICMP messages - forward to the right
655 * destination host. Used for NAT and local client.
656 */
657static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
658 struct iphdr *cih, struct ip_vs_conn *cp,
659 struct ip_vs_protocol *pp,
660 unsigned int offset, unsigned int ihl)
661{
662 unsigned int verdict = NF_DROP;
663
664 if (IP_VS_FWD_METHOD(cp) != 0) {
665 IP_VS_ERR("shouldn't reach here, because the box is on the "
666 "half connection in the tun/dr module.\n");
667 }
668
669 /* Ensure the checksum is correct */
670 if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
671 /* Failed checksum! */
672 IP_VS_DBG(1,
673 "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
674 NIPQUAD(iph->saddr));
675 goto out;
676 }
677
678 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
679 offset += 2 * sizeof(__u16);
680 if (!skb_make_writable(skb, offset))
681 goto out;
682
683 ip_vs_nat_icmp(skb, pp, cp, 1);
684
685 /* do the statistics and put it back */
686 ip_vs_out_stats(cp, skb);
687
688 skb->ipvs_property = 1;
689 verdict = NF_ACCEPT;
690
691out:
692 __ip_vs_conn_put(cp);
693
694 return verdict;
695}
696
654/* 697/*
655 * Handle ICMP messages in the inside-to-outside direction (outgoing). 698 * Handle ICMP messages in the inside-to-outside direction (outgoing).
656 * Find any that might be relevant, check against existing connections, 699 * Find any that might be relevant, check against existing connections.
657 * forward to the right destination host if relevant.
658 * Currently handles error types - unreachable, quench, ttl exceeded. 700 * Currently handles error types - unreachable, quench, ttl exceeded.
659 * (Only used in VS/NAT)
660 */ 701 */
661static int ip_vs_out_icmp(struct sk_buff *skb, int *related) 702static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
662{ 703{
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
666 struct ip_vs_iphdr ciph; 707 struct ip_vs_iphdr ciph;
667 struct ip_vs_conn *cp; 708 struct ip_vs_conn *cp;
668 struct ip_vs_protocol *pp; 709 struct ip_vs_protocol *pp;
669 unsigned int offset, ihl, verdict; 710 unsigned int offset, ihl;
670 711
671 *related = 1; 712 *related = 1;
672 713
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
725 if (!cp) 766 if (!cp)
726 return NF_ACCEPT; 767 return NF_ACCEPT;
727 768
728 verdict = NF_DROP; 769 return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
729
730 if (IP_VS_FWD_METHOD(cp) != 0) {
731 IP_VS_ERR("shouldn't reach here, because the box is on the "
732 "half connection in the tun/dr module.\n");
733 }
734
735 /* Ensure the checksum is correct */
736 if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
737 /* Failed checksum! */
738 IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
739 NIPQUAD(iph->saddr));
740 goto out;
741 }
742
743 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
744 offset += 2 * sizeof(__u16);
745 if (!skb_make_writable(skb, offset))
746 goto out;
747
748 ip_vs_nat_icmp(skb, pp, cp, 1);
749
750 /* do the statistics and put it back */
751 ip_vs_out_stats(cp, skb);
752
753 skb->ipvs_property = 1;
754 verdict = NF_ACCEPT;
755
756 out:
757 __ip_vs_conn_put(cp);
758
759 return verdict;
760} 770}
761 771
762#ifdef CONFIG_IP_VS_IPV6 772#ifdef CONFIG_IP_VS_IPV6
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
875 return th->rst; 885 return th->rst;
876} 886}
877 887
888/* Handle response packets: rewrite addresses and send away...
889 * Used for NAT and local client.
890 */
891static unsigned int
892handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
893 struct ip_vs_conn *cp, int ihl)
894{
895 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
896
897 if (!skb_make_writable(skb, ihl))
898 goto drop;
899
900 /* mangle the packet */
901 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
902 goto drop;
903
904#ifdef CONFIG_IP_VS_IPV6
905 if (af == AF_INET6)
906 ipv6_hdr(skb)->saddr = cp->vaddr.in6;
907 else
908#endif
909 {
910 ip_hdr(skb)->saddr = cp->vaddr.ip;
911 ip_send_check(ip_hdr(skb));
912 }
913
914 /* For policy routing, packets originating from this
915 * machine itself may be routed differently to packets
916 * passing through. We want this packet to be routed as
917 * if it came from this machine itself. So re-compute
918 * the routing information.
919 */
920#ifdef CONFIG_IP_VS_IPV6
921 if (af == AF_INET6) {
922 if (ip6_route_me_harder(skb) != 0)
923 goto drop;
924 } else
925#endif
926 if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
927 goto drop;
928
929 /* For policy routing, packets originating from this
930 * machine itself may be routed differently to packets
931 * passing through. We want this packet to be routed as
932 * if it came from this machine itself. So re-compute
933 * the routing information.
934 */
935 if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
936 goto drop;
937
938 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
939
940 ip_vs_out_stats(cp, skb);
941 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
942 ip_vs_conn_put(cp);
943
944 skb->ipvs_property = 1;
945
946 LeaveFunction(11);
947 return NF_ACCEPT;
948
949drop:
950 ip_vs_conn_put(cp);
951 kfree_skb(skb);
952 return NF_STOLEN;
953}
954
878/* 955/*
879 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. 956 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
880 * Check if outgoing packet belongs to the established ip_vs_conn, 957 * Check if outgoing packet belongs to the established ip_vs_conn.
881 * rewrite addresses of the packet and send it on its way...
882 */ 958 */
883static unsigned int 959static unsigned int
884ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 960ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
987 return NF_ACCEPT; 1063 return NF_ACCEPT;
988 } 1064 }
989 1065
990 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); 1066 return handle_response(af, skb, pp, cp, iph.len);
991
992 if (!skb_make_writable(skb, iph.len))
993 goto drop;
994
995 /* mangle the packet */
996 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
997 goto drop;
998
999#ifdef CONFIG_IP_VS_IPV6
1000 if (af == AF_INET6)
1001 ipv6_hdr(skb)->saddr = cp->vaddr.in6;
1002 else
1003#endif
1004 {
1005 ip_hdr(skb)->saddr = cp->vaddr.ip;
1006 ip_send_check(ip_hdr(skb));
1007 }
1008
1009 /* For policy routing, packets originating from this
1010 * machine itself may be routed differently to packets
1011 * passing through. We want this packet to be routed as
1012 * if it came from this machine itself. So re-compute
1013 * the routing information.
1014 */
1015#ifdef CONFIG_IP_VS_IPV6
1016 if (af == AF_INET6) {
1017 if (ip6_route_me_harder(skb) != 0)
1018 goto drop;
1019 } else
1020#endif
1021 if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
1022 goto drop;
1023
1024 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
1025
1026 ip_vs_out_stats(cp, skb);
1027 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
1028 ip_vs_conn_put(cp);
1029
1030 skb->ipvs_property = 1;
1031
1032 LeaveFunction(11);
1033 return NF_ACCEPT;
1034
1035 drop:
1036 ip_vs_conn_put(cp);
1037 kfree_skb(skb);
1038 return NF_STOLEN;
1039} 1067}
1040 1068
1041 1069
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1111 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1139 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
1112 /* The embedded headers contain source and dest in reverse order */ 1140 /* The embedded headers contain source and dest in reverse order */
1113 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1141 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
1114 if (!cp) 1142 if (!cp) {
1143 /* The packet could also belong to a local client */
1144 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
1145 if (cp)
1146 return handle_response_icmp(skb, iph, cih, cp, pp,
1147 offset, ihl);
1115 return NF_ACCEPT; 1148 return NF_ACCEPT;
1149 }
1116 1150
1117 verdict = NF_DROP; 1151 verdict = NF_DROP;
1118 1152
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1244 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1278 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1245 1279
1246 /* 1280 /*
1247 * Big tappo: only PACKET_HOST (neither loopback nor mcasts) 1281 * Big tappo: only PACKET_HOST, including loopback for local client
1248 * ... don't know why 1st test DOES NOT include 2nd (?) 1282 * Don't handle local packets on IPv6 for now
1249 */ 1283 */
1250 if (unlikely(skb->pkt_type != PACKET_HOST 1284 if (unlikely(skb->pkt_type != PACKET_HOST ||
1251 || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { 1285 (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
1286 skb->sk)))) {
1252 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", 1287 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
1253 skb->pkt_type, 1288 skb->pkt_type,
1254 iph.protocol, 1289 iph.protocol,
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1277 if (unlikely(!cp)) { 1312 if (unlikely(!cp)) {
1278 int v; 1313 int v;
1279 1314
1315 /* For local client packets, it could be a response */
1316 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1317 if (cp)
1318 return handle_response(af, skb, pp, cp, iph.len);
1319
1280 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1320 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
1281 return v; 1321 return v;
1282 } 1322 }
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index de8ed73997c7..808e8be0280a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb,
166 tcph->source = cp->vport; 166 tcph->source = cp->vport;
167 167
168 /* Adjust TCP checksums */ 168 /* Adjust TCP checksums */
169 if (!cp->app) { 169 if (!cp->app && (tcph->check != 0)) {
170 /* Only port and addr are changed, do fast csum update */ 170 /* Only port and addr are changed, do fast csum update */
171 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 171 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
172 cp->dport, cp->vport); 172 cp->dport, cp->vport);
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb,
235 /* 235 /*
236 * Adjust TCP checksums 236 * Adjust TCP checksums
237 */ 237 */
238 if (!cp->app) { 238 if (!cp->app && (tcph->check != 0)) {
239 /* Only port and addr are changed, do fast csum update */ 239 /* Only port and addr are changed, do fast csum update */
240 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, 240 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
241 cp->vport, cp->dport); 241 cp->vport, cp->dport);