diff options
author | Patrick McHardy <kaber@trash.net> | 2010-10-21 10:25:51 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-10-21 10:25:51 -0400 |
commit | 3b1a1ce6f418cb7ab35eb55c8a6575987a524e30 (patch) | |
tree | a3ebee69d6370631746a348f5852eeb955df5bd3 /net | |
parent | cc6eb433856983e91071469c4ce57accb6947ccb (diff) | |
parent | b0aeef30433ea6854e985c2e9842fa19f51b95cc (diff) |
Merge branch 'for-patrick' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/netfilter/nf_nat_core.c | 29 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_conn.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 586 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 18 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 7 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto.c | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 52 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_sctp.c | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_tcp.c | 52 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_udp.c | 51 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 503 |
11 files changed, 918 insertions, 398 deletions
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index e2e00c4da883..0047923c1f22 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -462,6 +462,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
462 | return 0; | 462 | return 0; |
463 | } | 463 | } |
464 | 464 | ||
465 | if (manip == IP_NAT_MANIP_SRC) | ||
466 | statusbit = IPS_SRC_NAT; | ||
467 | else | ||
468 | statusbit = IPS_DST_NAT; | ||
469 | |||
470 | /* Invert if this is reply dir. */ | ||
471 | if (dir == IP_CT_DIR_REPLY) | ||
472 | statusbit ^= IPS_NAT_MASK; | ||
473 | |||
474 | if (!(ct->status & statusbit)) | ||
475 | return 1; | ||
476 | |||
465 | pr_debug("icmp_reply_translation: translating error %p manip %u " | 477 | pr_debug("icmp_reply_translation: translating error %p manip %u " |
466 | "dir %s\n", skb, manip, | 478 | "dir %s\n", skb, manip, |
467 | dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); | 479 | dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); |
@@ -496,20 +508,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
496 | 508 | ||
497 | /* Change outer to look the reply to an incoming packet | 509 | /* Change outer to look the reply to an incoming packet |
498 | * (proto 0 means don't invert per-proto part). */ | 510 | * (proto 0 means don't invert per-proto part). */ |
499 | if (manip == IP_NAT_MANIP_SRC) | 511 | nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); |
500 | statusbit = IPS_SRC_NAT; | 512 | if (!manip_pkt(0, skb, 0, &target, manip)) |
501 | else | 513 | return 0; |
502 | statusbit = IPS_DST_NAT; | ||
503 | |||
504 | /* Invert if this is reply dir. */ | ||
505 | if (dir == IP_CT_DIR_REPLY) | ||
506 | statusbit ^= IPS_NAT_MASK; | ||
507 | |||
508 | if (ct->status & statusbit) { | ||
509 | nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
510 | if (!manip_pkt(0, skb, 0, &target, manip)) | ||
511 | return 0; | ||
512 | } | ||
513 | 514 | ||
514 | return 1; | 515 | return 1; |
515 | } | 516 | } |
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 1d1a529dbe24..e9adecdc8ca4 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c | |||
@@ -563,6 +563,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
563 | */ | 563 | */ |
564 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) | 564 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) |
565 | conn_flags &= ~IP_VS_CONN_F_INACTIVE; | 565 | conn_flags &= ~IP_VS_CONN_F_INACTIVE; |
566 | /* connections inherit forwarding method from dest */ | ||
567 | cp->flags &= ~IP_VS_CONN_F_FWD_MASK; | ||
566 | } | 568 | } |
567 | cp->flags |= conn_flags; | 569 | cp->flags |= conn_flags; |
568 | cp->dest = dest; | 570 | cp->dest = dest; |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e5fef7aef0d4..b4e51e9c5a04 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #ifdef CONFIG_IP_VS_IPV6 | 48 | #ifdef CONFIG_IP_VS_IPV6 |
49 | #include <net/ipv6.h> | 49 | #include <net/ipv6.h> |
50 | #include <linux/netfilter_ipv6.h> | 50 | #include <linux/netfilter_ipv6.h> |
51 | #include <net/ip6_route.h> | ||
51 | #endif | 52 | #endif |
52 | 53 | ||
53 | #include <net/ip_vs.h> | 54 | #include <net/ip_vs.h> |
@@ -342,7 +343,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
342 | * Protocols supported: TCP, UDP | 343 | * Protocols supported: TCP, UDP |
343 | */ | 344 | */ |
344 | struct ip_vs_conn * | 345 | struct ip_vs_conn * |
345 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) | 346 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, |
347 | struct ip_vs_protocol *pp, int *ignored) | ||
346 | { | 348 | { |
347 | struct ip_vs_conn *cp = NULL; | 349 | struct ip_vs_conn *cp = NULL; |
348 | struct ip_vs_iphdr iph; | 350 | struct ip_vs_iphdr iph; |
@@ -350,16 +352,44 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) | |||
350 | __be16 _ports[2], *pptr; | 352 | __be16 _ports[2], *pptr; |
351 | unsigned int flags; | 353 | unsigned int flags; |
352 | 354 | ||
355 | *ignored = 1; | ||
353 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); | 356 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); |
354 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); | 357 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); |
355 | if (pptr == NULL) | 358 | if (pptr == NULL) |
356 | return NULL; | 359 | return NULL; |
357 | 360 | ||
358 | /* | 361 | /* |
362 | * FTPDATA needs this check when using local real server. | ||
363 | * Never schedule Active FTPDATA connections from real server. | ||
364 | * For LVS-NAT they must be already created. For other methods | ||
365 | * with persistence the connection is created on SYN+ACK. | ||
366 | */ | ||
367 | if (pptr[0] == FTPDATA) { | ||
368 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, | ||
369 | "Not scheduling FTPDATA"); | ||
370 | return NULL; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Do not schedule replies from local real server. It is risky | ||
375 | * for fwmark services but mostly for persistent services. | ||
376 | */ | ||
377 | if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | ||
378 | (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && | ||
379 | (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { | ||
380 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, | ||
381 | "Not scheduling reply for existing connection"); | ||
382 | __ip_vs_conn_put(cp); | ||
383 | return NULL; | ||
384 | } | ||
385 | |||
386 | /* | ||
359 | * Persistent service | 387 | * Persistent service |
360 | */ | 388 | */ |
361 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) | 389 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) { |
390 | *ignored = 0; | ||
362 | return ip_vs_sched_persist(svc, skb, pptr); | 391 | return ip_vs_sched_persist(svc, skb, pptr); |
392 | } | ||
363 | 393 | ||
364 | /* | 394 | /* |
365 | * Non-persistent service | 395 | * Non-persistent service |
@@ -372,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) | |||
372 | return NULL; | 402 | return NULL; |
373 | } | 403 | } |
374 | 404 | ||
405 | *ignored = 0; | ||
406 | |||
375 | dest = svc->scheduler->schedule(svc, skb); | 407 | dest = svc->scheduler->schedule(svc, skb); |
376 | if (dest == NULL) { | 408 | if (dest == NULL) { |
377 | IP_VS_DBG(1, "Schedule: no dest found.\n"); | 409 | IP_VS_DBG(1, "Schedule: no dest found.\n"); |
@@ -498,35 +530,32 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
498 | * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ | 530 | * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ |
499 | */ | 531 | */ |
500 | #ifdef CONFIG_IP_VS_IPV6 | 532 | #ifdef CONFIG_IP_VS_IPV6 |
501 | if (svc->af == AF_INET6) | 533 | if (svc->af == AF_INET6) { |
534 | if (!skb->dev) { | ||
535 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
536 | |||
537 | skb->dev = net->loopback_dev; | ||
538 | } | ||
502 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); | 539 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); |
503 | else | 540 | } else |
504 | #endif | 541 | #endif |
505 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 542 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
506 | 543 | ||
507 | return NF_DROP; | 544 | return NF_DROP; |
508 | } | 545 | } |
509 | 546 | ||
510 | /* | 547 | __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) |
511 | * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING | ||
512 | * chain and is used to avoid double NAT and confirmation when we do | ||
513 | * not want to keep the conntrack structure | ||
514 | */ | ||
515 | static unsigned int ip_vs_post_routing(unsigned int hooknum, | ||
516 | struct sk_buff *skb, | ||
517 | const struct net_device *in, | ||
518 | const struct net_device *out, | ||
519 | int (*okfn)(struct sk_buff *)) | ||
520 | { | 548 | { |
521 | if (!skb->ipvs_property) | 549 | return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); |
522 | return NF_ACCEPT; | ||
523 | /* The packet was sent from IPVS, exit this chain */ | ||
524 | return NF_STOP; | ||
525 | } | 550 | } |
526 | 551 | ||
527 | __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) | 552 | static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) |
528 | { | 553 | { |
529 | return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); | 554 | if (NF_INET_LOCAL_IN == hooknum) |
555 | return IP_DEFRAG_VS_IN; | ||
556 | if (NF_INET_FORWARD == hooknum) | ||
557 | return IP_DEFRAG_VS_FWD; | ||
558 | return IP_DEFRAG_VS_OUT; | ||
530 | } | 559 | } |
531 | 560 | ||
532 | static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | 561 | static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) |
@@ -589,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
589 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 618 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
590 | 619 | ||
591 | if (inout) | 620 | if (inout) |
592 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | 621 | IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, |
593 | "Forwarding altered outgoing ICMP"); | 622 | "Forwarding altered outgoing ICMP"); |
594 | else | 623 | else |
595 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | 624 | IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, |
596 | "Forwarding altered incoming ICMP"); | 625 | "Forwarding altered incoming ICMP"); |
597 | } | 626 | } |
598 | 627 | ||
@@ -634,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
634 | skb->ip_summed = CHECKSUM_PARTIAL; | 663 | skb->ip_summed = CHECKSUM_PARTIAL; |
635 | 664 | ||
636 | if (inout) | 665 | if (inout) |
637 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | 666 | IP_VS_DBG_PKT(11, AF_INET6, pp, skb, |
638 | "Forwarding altered outgoing ICMPv6"); | 667 | (void *)ciph - (void *)iph, |
668 | "Forwarding altered outgoing ICMPv6"); | ||
639 | else | 669 | else |
640 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | 670 | IP_VS_DBG_PKT(11, AF_INET6, pp, skb, |
641 | "Forwarding altered incoming ICMPv6"); | 671 | (void *)ciph - (void *)iph, |
672 | "Forwarding altered incoming ICMPv6"); | ||
642 | } | 673 | } |
643 | #endif | 674 | #endif |
644 | 675 | ||
@@ -679,11 +710,23 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
679 | #endif | 710 | #endif |
680 | ip_vs_nat_icmp(skb, pp, cp, 1); | 711 | ip_vs_nat_icmp(skb, pp, cp, 1); |
681 | 712 | ||
713 | #ifdef CONFIG_IP_VS_IPV6 | ||
714 | if (af == AF_INET6) { | ||
715 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) | ||
716 | goto out; | ||
717 | } else | ||
718 | #endif | ||
719 | if ((sysctl_ip_vs_snat_reroute || | ||
720 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && | ||
721 | ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
722 | goto out; | ||
723 | |||
682 | /* do the statistics and put it back */ | 724 | /* do the statistics and put it back */ |
683 | ip_vs_out_stats(cp, skb); | 725 | ip_vs_out_stats(cp, skb); |
684 | 726 | ||
727 | skb->ipvs_property = 1; | ||
685 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) | 728 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
686 | skb->ipvs_property = 1; | 729 | ip_vs_notrack(skb); |
687 | else | 730 | else |
688 | ip_vs_update_conntrack(skb, cp, 0); | 731 | ip_vs_update_conntrack(skb, cp, 0); |
689 | verdict = NF_ACCEPT; | 732 | verdict = NF_ACCEPT; |
@@ -699,7 +742,8 @@ out: | |||
699 | * Find any that might be relevant, check against existing connections. | 742 | * Find any that might be relevant, check against existing connections. |
700 | * Currently handles error types - unreachable, quench, ttl exceeded. | 743 | * Currently handles error types - unreachable, quench, ttl exceeded. |
701 | */ | 744 | */ |
702 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | 745 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related, |
746 | unsigned int hooknum) | ||
703 | { | 747 | { |
704 | struct iphdr *iph; | 748 | struct iphdr *iph; |
705 | struct icmphdr _icmph, *ic; | 749 | struct icmphdr _icmph, *ic; |
@@ -714,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
714 | 758 | ||
715 | /* reassemble IP fragments */ | 759 | /* reassemble IP fragments */ |
716 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 760 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
717 | if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | 761 | if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) |
718 | return NF_STOLEN; | 762 | return NF_STOLEN; |
719 | } | 763 | } |
720 | 764 | ||
@@ -757,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
757 | pp->dont_defrag)) | 801 | pp->dont_defrag)) |
758 | return NF_ACCEPT; | 802 | return NF_ACCEPT; |
759 | 803 | ||
760 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); | 804 | IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, |
805 | "Checking outgoing ICMP for"); | ||
761 | 806 | ||
762 | offset += cih->ihl * 4; | 807 | offset += cih->ihl * 4; |
763 | 808 | ||
@@ -773,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
773 | } | 818 | } |
774 | 819 | ||
775 | #ifdef CONFIG_IP_VS_IPV6 | 820 | #ifdef CONFIG_IP_VS_IPV6 |
776 | static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) | 821 | static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, |
822 | unsigned int hooknum) | ||
777 | { | 823 | { |
778 | struct ipv6hdr *iph; | 824 | struct ipv6hdr *iph; |
779 | struct icmp6hdr _icmph, *ic; | 825 | struct icmp6hdr _icmph, *ic; |
@@ -789,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) | |||
789 | 835 | ||
790 | /* reassemble IP fragments */ | 836 | /* reassemble IP fragments */ |
791 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { | 837 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { |
792 | if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) | 838 | if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) |
793 | return NF_STOLEN; | 839 | return NF_STOLEN; |
794 | } | 840 | } |
795 | 841 | ||
@@ -832,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) | |||
832 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) | 878 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) |
833 | return NF_ACCEPT; | 879 | return NF_ACCEPT; |
834 | 880 | ||
835 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); | 881 | IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, |
882 | "Checking outgoing ICMPv6 for"); | ||
836 | 883 | ||
837 | offset += sizeof(struct ipv6hdr); | 884 | offset += sizeof(struct ipv6hdr); |
838 | 885 | ||
@@ -880,7 +927,7 @@ static unsigned int | |||
880 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 927 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, |
881 | struct ip_vs_conn *cp, int ihl) | 928 | struct ip_vs_conn *cp, int ihl) |
882 | { | 929 | { |
883 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | 930 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); |
884 | 931 | ||
885 | if (!skb_make_writable(skb, ihl)) | 932 | if (!skb_make_writable(skb, ihl)) |
886 | goto drop; | 933 | goto drop; |
@@ -914,23 +961,24 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
914 | * if it came from this machine itself. So re-compute | 961 | * if it came from this machine itself. So re-compute |
915 | * the routing information. | 962 | * the routing information. |
916 | */ | 963 | */ |
917 | if (sysctl_ip_vs_snat_reroute) { | ||
918 | #ifdef CONFIG_IP_VS_IPV6 | 964 | #ifdef CONFIG_IP_VS_IPV6 |
919 | if (af == AF_INET6) { | 965 | if (af == AF_INET6) { |
920 | if (ip6_route_me_harder(skb) != 0) | 966 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) |
921 | goto drop; | 967 | goto drop; |
922 | } else | 968 | } else |
923 | #endif | 969 | #endif |
924 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | 970 | if ((sysctl_ip_vs_snat_reroute || |
925 | goto drop; | 971 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && |
926 | } | 972 | ip_route_me_harder(skb, RTN_LOCAL) != 0) |
973 | goto drop; | ||
927 | 974 | ||
928 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | 975 | IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); |
929 | 976 | ||
930 | ip_vs_out_stats(cp, skb); | 977 | ip_vs_out_stats(cp, skb); |
931 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | 978 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); |
979 | skb->ipvs_property = 1; | ||
932 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) | 980 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
933 | skb->ipvs_property = 1; | 981 | ip_vs_notrack(skb); |
934 | else | 982 | else |
935 | ip_vs_update_conntrack(skb, cp, 0); | 983 | ip_vs_update_conntrack(skb, cp, 0); |
936 | ip_vs_conn_put(cp); | 984 | ip_vs_conn_put(cp); |
@@ -946,53 +994,54 @@ drop: | |||
946 | } | 994 | } |
947 | 995 | ||
948 | /* | 996 | /* |
949 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | ||
950 | * Check if outgoing packet belongs to the established ip_vs_conn. | 997 | * Check if outgoing packet belongs to the established ip_vs_conn. |
951 | */ | 998 | */ |
952 | static unsigned int | 999 | static unsigned int |
953 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | 1000 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) |
954 | const struct net_device *in, const struct net_device *out, | ||
955 | int (*okfn)(struct sk_buff *)) | ||
956 | { | 1001 | { |
957 | struct ip_vs_iphdr iph; | 1002 | struct ip_vs_iphdr iph; |
958 | struct ip_vs_protocol *pp; | 1003 | struct ip_vs_protocol *pp; |
959 | struct ip_vs_conn *cp; | 1004 | struct ip_vs_conn *cp; |
960 | int af; | ||
961 | 1005 | ||
962 | EnterFunction(11); | 1006 | EnterFunction(11); |
963 | 1007 | ||
964 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | 1008 | /* Already marked as IPVS request or reply? */ |
965 | |||
966 | if (skb->ipvs_property) | 1009 | if (skb->ipvs_property) |
967 | return NF_ACCEPT; | 1010 | return NF_ACCEPT; |
968 | 1011 | ||
1012 | /* Bad... Do not break raw sockets */ | ||
1013 | if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && | ||
1014 | af == AF_INET)) { | ||
1015 | struct sock *sk = skb->sk; | ||
1016 | struct inet_sock *inet = inet_sk(skb->sk); | ||
1017 | |||
1018 | if (inet && sk->sk_family == PF_INET && inet->nodefrag) | ||
1019 | return NF_ACCEPT; | ||
1020 | } | ||
1021 | |||
1022 | if (unlikely(!skb_dst(skb))) | ||
1023 | return NF_ACCEPT; | ||
1024 | |||
969 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1025 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
970 | #ifdef CONFIG_IP_VS_IPV6 | 1026 | #ifdef CONFIG_IP_VS_IPV6 |
971 | if (af == AF_INET6) { | 1027 | if (af == AF_INET6) { |
972 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { | 1028 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { |
973 | int related, verdict = ip_vs_out_icmp_v6(skb, &related); | 1029 | int related; |
1030 | int verdict = ip_vs_out_icmp_v6(skb, &related, | ||
1031 | hooknum); | ||
974 | 1032 | ||
975 | if (related) { | 1033 | if (related) |
976 | if (sysctl_ip_vs_snat_reroute && | ||
977 | NF_ACCEPT == verdict && | ||
978 | ip6_route_me_harder(skb)) | ||
979 | verdict = NF_DROP; | ||
980 | return verdict; | 1034 | return verdict; |
981 | } | ||
982 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1035 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
983 | } | 1036 | } |
984 | } else | 1037 | } else |
985 | #endif | 1038 | #endif |
986 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { | 1039 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { |
987 | int related, verdict = ip_vs_out_icmp(skb, &related); | 1040 | int related; |
1041 | int verdict = ip_vs_out_icmp(skb, &related, hooknum); | ||
988 | 1042 | ||
989 | if (related) { | 1043 | if (related) |
990 | if (sysctl_ip_vs_snat_reroute && | ||
991 | NF_ACCEPT == verdict && | ||
992 | ip_route_me_harder(skb, RTN_LOCAL)) | ||
993 | verdict = NF_DROP; | ||
994 | return verdict; | 1044 | return verdict; |
995 | } | ||
996 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1045 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
997 | } | 1046 | } |
998 | 1047 | ||
@@ -1003,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
1003 | /* reassemble IP fragments */ | 1052 | /* reassemble IP fragments */ |
1004 | #ifdef CONFIG_IP_VS_IPV6 | 1053 | #ifdef CONFIG_IP_VS_IPV6 |
1005 | if (af == AF_INET6) { | 1054 | if (af == AF_INET6) { |
1006 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { | 1055 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { |
1007 | int related, verdict = ip_vs_out_icmp_v6(skb, &related); | 1056 | if (ip_vs_gather_frags_v6(skb, |
1008 | 1057 | ip_vs_defrag_user(hooknum))) | |
1009 | if (related) | 1058 | return NF_STOLEN; |
1010 | return verdict; | ||
1011 | |||
1012 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
1013 | } | 1059 | } |
1060 | |||
1061 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
1014 | } else | 1062 | } else |
1015 | #endif | 1063 | #endif |
1016 | if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && | 1064 | if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && |
1017 | !pp->dont_defrag)) { | 1065 | !pp->dont_defrag)) { |
1018 | if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | 1066 | if (ip_vs_gather_frags(skb, |
1067 | ip_vs_defrag_user(hooknum))) | ||
1019 | return NF_STOLEN; | 1068 | return NF_STOLEN; |
1020 | 1069 | ||
1021 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1070 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
@@ -1026,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
1026 | */ | 1075 | */ |
1027 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | 1076 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); |
1028 | 1077 | ||
1029 | if (unlikely(!cp)) { | 1078 | if (likely(cp)) |
1030 | if (sysctl_ip_vs_nat_icmp_send && | 1079 | return handle_response(af, skb, pp, cp, iph.len); |
1031 | (pp->protocol == IPPROTO_TCP || | 1080 | if (sysctl_ip_vs_nat_icmp_send && |
1032 | pp->protocol == IPPROTO_UDP || | 1081 | (pp->protocol == IPPROTO_TCP || |
1033 | pp->protocol == IPPROTO_SCTP)) { | 1082 | pp->protocol == IPPROTO_UDP || |
1034 | __be16 _ports[2], *pptr; | 1083 | pp->protocol == IPPROTO_SCTP)) { |
1035 | 1084 | __be16 _ports[2], *pptr; | |
1036 | pptr = skb_header_pointer(skb, iph.len, | 1085 | |
1037 | sizeof(_ports), _ports); | 1086 | pptr = skb_header_pointer(skb, iph.len, |
1038 | if (pptr == NULL) | 1087 | sizeof(_ports), _ports); |
1039 | return NF_ACCEPT; /* Not for me */ | 1088 | if (pptr == NULL) |
1040 | if (ip_vs_lookup_real_service(af, iph.protocol, | 1089 | return NF_ACCEPT; /* Not for me */ |
1041 | &iph.saddr, | 1090 | if (ip_vs_lookup_real_service(af, iph.protocol, |
1042 | pptr[0])) { | 1091 | &iph.saddr, |
1043 | /* | 1092 | pptr[0])) { |
1044 | * Notify the real server: there is no | 1093 | /* |
1045 | * existing entry if it is not RST | 1094 | * Notify the real server: there is no |
1046 | * packet or not TCP packet. | 1095 | * existing entry if it is not RST |
1047 | */ | 1096 | * packet or not TCP packet. |
1048 | if ((iph.protocol != IPPROTO_TCP && | 1097 | */ |
1049 | iph.protocol != IPPROTO_SCTP) | 1098 | if ((iph.protocol != IPPROTO_TCP && |
1050 | || ((iph.protocol == IPPROTO_TCP | 1099 | iph.protocol != IPPROTO_SCTP) |
1051 | && !is_tcp_reset(skb, iph.len)) | 1100 | || ((iph.protocol == IPPROTO_TCP |
1052 | || (iph.protocol == IPPROTO_SCTP | 1101 | && !is_tcp_reset(skb, iph.len)) |
1053 | && !is_sctp_abort(skb, | 1102 | || (iph.protocol == IPPROTO_SCTP |
1054 | iph.len)))) { | 1103 | && !is_sctp_abort(skb, |
1104 | iph.len)))) { | ||
1055 | #ifdef CONFIG_IP_VS_IPV6 | 1105 | #ifdef CONFIG_IP_VS_IPV6 |
1056 | if (af == AF_INET6) | 1106 | if (af == AF_INET6) { |
1057 | icmpv6_send(skb, | 1107 | struct net *net = |
1058 | ICMPV6_DEST_UNREACH, | 1108 | dev_net(skb_dst(skb)->dev); |
1059 | ICMPV6_PORT_UNREACH, | 1109 | |
1060 | 0); | 1110 | if (!skb->dev) |
1061 | else | 1111 | skb->dev = net->loopback_dev; |
1112 | icmpv6_send(skb, | ||
1113 | ICMPV6_DEST_UNREACH, | ||
1114 | ICMPV6_PORT_UNREACH, | ||
1115 | 0); | ||
1116 | } else | ||
1062 | #endif | 1117 | #endif |
1063 | icmp_send(skb, | 1118 | icmp_send(skb, |
1064 | ICMP_DEST_UNREACH, | 1119 | ICMP_DEST_UNREACH, |
1065 | ICMP_PORT_UNREACH, 0); | 1120 | ICMP_PORT_UNREACH, 0); |
1066 | return NF_DROP; | 1121 | return NF_DROP; |
1067 | } | ||
1068 | } | 1122 | } |
1069 | } | 1123 | } |
1070 | IP_VS_DBG_PKT(12, pp, skb, 0, | ||
1071 | "packet continues traversal as normal"); | ||
1072 | return NF_ACCEPT; | ||
1073 | } | 1124 | } |
1125 | IP_VS_DBG_PKT(12, af, pp, skb, 0, | ||
1126 | "ip_vs_out: packet continues traversal as normal"); | ||
1127 | return NF_ACCEPT; | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, | ||
1132 | * used only for VS/NAT. | ||
1133 | * Check if packet is reply for established ip_vs_conn. | ||
1134 | */ | ||
1135 | static unsigned int | ||
1136 | ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, | ||
1137 | const struct net_device *in, const struct net_device *out, | ||
1138 | int (*okfn)(struct sk_buff *)) | ||
1139 | { | ||
1140 | return ip_vs_out(hooknum, skb, AF_INET); | ||
1141 | } | ||
1142 | |||
1143 | /* | ||
1144 | * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. | ||
1145 | * Check if packet is reply for established ip_vs_conn. | ||
1146 | */ | ||
1147 | static unsigned int | ||
1148 | ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, | ||
1149 | const struct net_device *in, const struct net_device *out, | ||
1150 | int (*okfn)(struct sk_buff *)) | ||
1151 | { | ||
1152 | unsigned int verdict; | ||
1074 | 1153 | ||
1075 | return handle_response(af, skb, pp, cp, iph.len); | 1154 | /* Disable BH in LOCAL_OUT until all places are fixed */ |
1155 | local_bh_disable(); | ||
1156 | verdict = ip_vs_out(hooknum, skb, AF_INET); | ||
1157 | local_bh_enable(); | ||
1158 | return verdict; | ||
1076 | } | 1159 | } |
1077 | 1160 | ||
1161 | #ifdef CONFIG_IP_VS_IPV6 | ||
1162 | |||
1163 | /* | ||
1164 | * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, | ||
1165 | * used only for VS/NAT. | ||
1166 | * Check if packet is reply for established ip_vs_conn. | ||
1167 | */ | ||
1168 | static unsigned int | ||
1169 | ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, | ||
1170 | const struct net_device *in, const struct net_device *out, | ||
1171 | int (*okfn)(struct sk_buff *)) | ||
1172 | { | ||
1173 | return ip_vs_out(hooknum, skb, AF_INET6); | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. | ||
1178 | * Check if packet is reply for established ip_vs_conn. | ||
1179 | */ | ||
1180 | static unsigned int | ||
1181 | ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, | ||
1182 | const struct net_device *in, const struct net_device *out, | ||
1183 | int (*okfn)(struct sk_buff *)) | ||
1184 | { | ||
1185 | unsigned int verdict; | ||
1186 | |||
1187 | /* Disable BH in LOCAL_OUT until all places are fixed */ | ||
1188 | local_bh_disable(); | ||
1189 | verdict = ip_vs_out(hooknum, skb, AF_INET6); | ||
1190 | local_bh_enable(); | ||
1191 | return verdict; | ||
1192 | } | ||
1193 | |||
1194 | #endif | ||
1078 | 1195 | ||
1079 | /* | 1196 | /* |
1080 | * Handle ICMP messages in the outside-to-inside direction (incoming). | 1197 | * Handle ICMP messages in the outside-to-inside direction (incoming). |
@@ -1098,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1098 | 1215 | ||
1099 | /* reassemble IP fragments */ | 1216 | /* reassemble IP fragments */ |
1100 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 1217 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
1101 | if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? | 1218 | if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) |
1102 | IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) | ||
1103 | return NF_STOLEN; | 1219 | return NF_STOLEN; |
1104 | } | 1220 | } |
1105 | 1221 | ||
@@ -1142,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1142 | pp->dont_defrag)) | 1258 | pp->dont_defrag)) |
1143 | return NF_ACCEPT; | 1259 | return NF_ACCEPT; |
1144 | 1260 | ||
1145 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); | 1261 | IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, |
1262 | "Checking incoming ICMP for"); | ||
1146 | 1263 | ||
1147 | offset += cih->ihl * 4; | 1264 | offset += cih->ihl * 4; |
1148 | 1265 | ||
@@ -1176,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1176 | if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) | 1293 | if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) |
1177 | offset += 2 * sizeof(__u16); | 1294 | offset += 2 * sizeof(__u16); |
1178 | verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); | 1295 | verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); |
1179 | /* do not touch skb anymore */ | 1296 | /* LOCALNODE from FORWARD hook is not supported */ |
1297 | if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && | ||
1298 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) { | ||
1299 | IP_VS_DBG(1, "%s(): " | ||
1300 | "local delivery to %pI4 but in FORWARD\n", | ||
1301 | __func__, &skb_rtable(skb)->rt_dst); | ||
1302 | verdict = NF_DROP; | ||
1303 | } | ||
1180 | 1304 | ||
1181 | out: | 1305 | out: |
1182 | __ip_vs_conn_put(cp); | 1306 | __ip_vs_conn_put(cp); |
@@ -1197,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1197 | struct ip_vs_protocol *pp; | 1321 | struct ip_vs_protocol *pp; |
1198 | unsigned int offset, verdict; | 1322 | unsigned int offset, verdict; |
1199 | union nf_inet_addr snet; | 1323 | union nf_inet_addr snet; |
1324 | struct rt6_info *rt; | ||
1200 | 1325 | ||
1201 | *related = 1; | 1326 | *related = 1; |
1202 | 1327 | ||
1203 | /* reassemble IP fragments */ | 1328 | /* reassemble IP fragments */ |
1204 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { | 1329 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { |
1205 | if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? | 1330 | if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) |
1206 | IP_DEFRAG_VS_IN : | ||
1207 | IP_DEFRAG_VS_FWD)) | ||
1208 | return NF_STOLEN; | 1331 | return NF_STOLEN; |
1209 | } | 1332 | } |
1210 | 1333 | ||
@@ -1247,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1247 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) | 1370 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) |
1248 | return NF_ACCEPT; | 1371 | return NF_ACCEPT; |
1249 | 1372 | ||
1250 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); | 1373 | IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, |
1374 | "Checking incoming ICMPv6 for"); | ||
1251 | 1375 | ||
1252 | offset += sizeof(struct ipv6hdr); | 1376 | offset += sizeof(struct ipv6hdr); |
1253 | 1377 | ||
@@ -1275,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1275 | IPPROTO_SCTP == cih->nexthdr) | 1399 | IPPROTO_SCTP == cih->nexthdr) |
1276 | offset += 2 * sizeof(__u16); | 1400 | offset += 2 * sizeof(__u16); |
1277 | verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); | 1401 | verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); |
1278 | /* do not touch skb anymore */ | 1402 | /* LOCALNODE from FORWARD hook is not supported */ |
1403 | if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && | ||
1404 | (rt = (struct rt6_info *) skb_dst(skb)) && | ||
1405 | rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) { | ||
1406 | IP_VS_DBG(1, "%s(): " | ||
1407 | "local delivery to %pI6 but in FORWARD\n", | ||
1408 | __func__, &rt->rt6i_dst); | ||
1409 | verdict = NF_DROP; | ||
1410 | } | ||
1279 | 1411 | ||
1280 | __ip_vs_conn_put(cp); | 1412 | __ip_vs_conn_put(cp); |
1281 | 1413 | ||
@@ -1289,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1289 | * and send it on its way... | 1421 | * and send it on its way... |
1290 | */ | 1422 | */ |
1291 | static unsigned int | 1423 | static unsigned int |
1292 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | 1424 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) |
1293 | const struct net_device *in, const struct net_device *out, | ||
1294 | int (*okfn)(struct sk_buff *)) | ||
1295 | { | 1425 | { |
1296 | struct ip_vs_iphdr iph; | 1426 | struct ip_vs_iphdr iph; |
1297 | struct ip_vs_protocol *pp; | 1427 | struct ip_vs_protocol *pp; |
1298 | struct ip_vs_conn *cp; | 1428 | struct ip_vs_conn *cp; |
1299 | int ret, restart, af, pkts; | 1429 | int ret, restart, pkts; |
1300 | |||
1301 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | ||
1302 | 1430 | ||
1303 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1431 | /* Already marked as IPVS request or reply? */ |
1432 | if (skb->ipvs_property) | ||
1433 | return NF_ACCEPT; | ||
1304 | 1434 | ||
1305 | /* | 1435 | /* |
1306 | * Big tappo: only PACKET_HOST, including loopback for local client | 1436 | * Big tappo: |
1307 | * Don't handle local packets on IPv6 for now | 1437 | * - remote client: only PACKET_HOST |
1438 | * - route: used for struct net when skb->dev is unset | ||
1308 | */ | 1439 | */ |
1309 | if (unlikely(skb->pkt_type != PACKET_HOST)) { | 1440 | if (unlikely((skb->pkt_type != PACKET_HOST && |
1310 | IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", | 1441 | hooknum != NF_INET_LOCAL_OUT) || |
1311 | skb->pkt_type, | 1442 | !skb_dst(skb))) { |
1312 | iph.protocol, | 1443 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1313 | IP_VS_DBG_ADDR(af, &iph.daddr)); | 1444 | IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" |
1445 | " ignored in hook %u\n", | ||
1446 | skb->pkt_type, iph.protocol, | ||
1447 | IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); | ||
1314 | return NF_ACCEPT; | 1448 | return NF_ACCEPT; |
1315 | } | 1449 | } |
1450 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
1451 | |||
1452 | /* Bad... Do not break raw sockets */ | ||
1453 | if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && | ||
1454 | af == AF_INET)) { | ||
1455 | struct sock *sk = skb->sk; | ||
1456 | struct inet_sock *inet = inet_sk(skb->sk); | ||
1457 | |||
1458 | if (inet && sk->sk_family == PF_INET && inet->nodefrag) | ||
1459 | return NF_ACCEPT; | ||
1460 | } | ||
1316 | 1461 | ||
1317 | #ifdef CONFIG_IP_VS_IPV6 | 1462 | #ifdef CONFIG_IP_VS_IPV6 |
1318 | if (af == AF_INET6) { | 1463 | if (af == AF_INET6) { |
1319 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { | 1464 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { |
1320 | int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); | 1465 | int related; |
1466 | int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); | ||
1321 | 1467 | ||
1322 | if (related) | 1468 | if (related) |
1323 | return verdict; | 1469 | return verdict; |
@@ -1326,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
1326 | } else | 1472 | } else |
1327 | #endif | 1473 | #endif |
1328 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { | 1474 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { |
1329 | int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); | 1475 | int related; |
1476 | int verdict = ip_vs_in_icmp(skb, &related, hooknum); | ||
1330 | 1477 | ||
1331 | if (related) | 1478 | if (related) |
1332 | return verdict; | 1479 | return verdict; |
@@ -1346,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
1346 | if (unlikely(!cp)) { | 1493 | if (unlikely(!cp)) { |
1347 | int v; | 1494 | int v; |
1348 | 1495 | ||
1349 | /* For local client packets, it could be a response */ | ||
1350 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | ||
1351 | if (cp) | ||
1352 | return handle_response(af, skb, pp, cp, iph.len); | ||
1353 | |||
1354 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | 1496 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) |
1355 | return v; | 1497 | return v; |
1356 | } | 1498 | } |
1357 | 1499 | ||
1358 | if (unlikely(!cp)) { | 1500 | if (unlikely(!cp)) { |
1359 | /* sorry, all this trouble for a no-hit :) */ | 1501 | /* sorry, all this trouble for a no-hit :) */ |
1360 | IP_VS_DBG_PKT(12, pp, skb, 0, | 1502 | IP_VS_DBG_PKT(12, af, pp, skb, 0, |
1361 | "packet continues traversal as normal"); | 1503 | "ip_vs_in: packet continues traversal as normal"); |
1362 | return NF_ACCEPT; | 1504 | return NF_ACCEPT; |
1363 | } | 1505 | } |
1364 | 1506 | ||
1365 | IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); | 1507 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); |
1366 | 1508 | ||
1367 | /* Check the server status */ | 1509 | /* Check the server status */ |
1368 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 1510 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
@@ -1429,6 +1571,72 @@ out: | |||
1429 | return ret; | 1571 | return ret; |
1430 | } | 1572 | } |
1431 | 1573 | ||
1574 | /* | ||
1575 | * AF_INET handler in NF_INET_LOCAL_IN chain | ||
1576 | * Schedule and forward packets from remote clients | ||
1577 | */ | ||
1578 | static unsigned int | ||
1579 | ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, | ||
1580 | const struct net_device *in, | ||
1581 | const struct net_device *out, | ||
1582 | int (*okfn)(struct sk_buff *)) | ||
1583 | { | ||
1584 | return ip_vs_in(hooknum, skb, AF_INET); | ||
1585 | } | ||
1586 | |||
1587 | /* | ||
1588 | * AF_INET handler in NF_INET_LOCAL_OUT chain | ||
1589 | * Schedule and forward packets from local clients | ||
1590 | */ | ||
1591 | static unsigned int | ||
1592 | ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, | ||
1593 | const struct net_device *in, const struct net_device *out, | ||
1594 | int (*okfn)(struct sk_buff *)) | ||
1595 | { | ||
1596 | unsigned int verdict; | ||
1597 | |||
1598 | /* Disable BH in LOCAL_OUT until all places are fixed */ | ||
1599 | local_bh_disable(); | ||
1600 | verdict = ip_vs_in(hooknum, skb, AF_INET); | ||
1601 | local_bh_enable(); | ||
1602 | return verdict; | ||
1603 | } | ||
1604 | |||
1605 | #ifdef CONFIG_IP_VS_IPV6 | ||
1606 | |||
1607 | /* | ||
1608 | * AF_INET6 handler in NF_INET_LOCAL_IN chain | ||
1609 | * Schedule and forward packets from remote clients | ||
1610 | */ | ||
1611 | static unsigned int | ||
1612 | ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, | ||
1613 | const struct net_device *in, | ||
1614 | const struct net_device *out, | ||
1615 | int (*okfn)(struct sk_buff *)) | ||
1616 | { | ||
1617 | return ip_vs_in(hooknum, skb, AF_INET6); | ||
1618 | } | ||
1619 | |||
1620 | /* | ||
1621 | * AF_INET6 handler in NF_INET_LOCAL_OUT chain | ||
1622 | * Schedule and forward packets from local clients | ||
1623 | */ | ||
1624 | static unsigned int | ||
1625 | ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, | ||
1626 | const struct net_device *in, const struct net_device *out, | ||
1627 | int (*okfn)(struct sk_buff *)) | ||
1628 | { | ||
1629 | unsigned int verdict; | ||
1630 | |||
1631 | /* Disable BH in LOCAL_OUT until all places are fixed */ | ||
1632 | local_bh_disable(); | ||
1633 | verdict = ip_vs_in(hooknum, skb, AF_INET6); | ||
1634 | local_bh_enable(); | ||
1635 | return verdict; | ||
1636 | } | ||
1637 | |||
1638 | #endif | ||
1639 | |||
1432 | 1640 | ||
1433 | /* | 1641 | /* |
1434 | * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP | 1642 | * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP |
@@ -1469,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, | |||
1469 | 1677 | ||
1470 | 1678 | ||
1471 | static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | 1679 | static struct nf_hook_ops ip_vs_ops[] __read_mostly = { |
1680 | /* After packet filtering, change source only for VS/NAT */ | ||
1681 | { | ||
1682 | .hook = ip_vs_reply4, | ||
1683 | .owner = THIS_MODULE, | ||
1684 | .pf = PF_INET, | ||
1685 | .hooknum = NF_INET_LOCAL_IN, | ||
1686 | .priority = 99, | ||
1687 | }, | ||
1472 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | 1688 | /* After packet filtering, forward packet through VS/DR, VS/TUN, |
1473 | * or VS/NAT(change destination), so that filtering rules can be | 1689 | * or VS/NAT(change destination), so that filtering rules can be |
1474 | * applied to IPVS. */ | 1690 | * applied to IPVS. */ |
1475 | { | 1691 | { |
1476 | .hook = ip_vs_in, | 1692 | .hook = ip_vs_remote_request4, |
1477 | .owner = THIS_MODULE, | 1693 | .owner = THIS_MODULE, |
1478 | .pf = PF_INET, | 1694 | .pf = PF_INET, |
1479 | .hooknum = NF_INET_LOCAL_IN, | 1695 | .hooknum = NF_INET_LOCAL_IN, |
1480 | .priority = 100, | 1696 | .priority = 101, |
1481 | }, | 1697 | }, |
1482 | /* After packet filtering, change source only for VS/NAT */ | 1698 | /* Before ip_vs_in, change source only for VS/NAT */ |
1699 | { | ||
1700 | .hook = ip_vs_local_reply4, | ||
1701 | .owner = THIS_MODULE, | ||
1702 | .pf = PF_INET, | ||
1703 | .hooknum = NF_INET_LOCAL_OUT, | ||
1704 | .priority = -99, | ||
1705 | }, | ||
1706 | /* After mangle, schedule and forward local requests */ | ||
1483 | { | 1707 | { |
1484 | .hook = ip_vs_out, | 1708 | .hook = ip_vs_local_request4, |
1485 | .owner = THIS_MODULE, | 1709 | .owner = THIS_MODULE, |
1486 | .pf = PF_INET, | 1710 | .pf = PF_INET, |
1487 | .hooknum = NF_INET_FORWARD, | 1711 | .hooknum = NF_INET_LOCAL_OUT, |
1488 | .priority = 100, | 1712 | .priority = -98, |
1489 | }, | 1713 | }, |
1490 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp | 1714 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp |
1491 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | 1715 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ |
@@ -1493,35 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1493 | .hook = ip_vs_forward_icmp, | 1717 | .hook = ip_vs_forward_icmp, |
1494 | .owner = THIS_MODULE, | 1718 | .owner = THIS_MODULE, |
1495 | .pf = PF_INET, | 1719 | .pf = PF_INET, |
1496 | .hooknum = NF_INET_FORWARD, | 1720 | .hooknum = NF_INET_FORWARD, |
1497 | .priority = 99, | 1721 | .priority = 99, |
1498 | }, | 1722 | }, |
1499 | /* Before the netfilter connection tracking, exit from POST_ROUTING */ | 1723 | /* After packet filtering, change source only for VS/NAT */ |
1500 | { | 1724 | { |
1501 | .hook = ip_vs_post_routing, | 1725 | .hook = ip_vs_reply4, |
1502 | .owner = THIS_MODULE, | 1726 | .owner = THIS_MODULE, |
1503 | .pf = PF_INET, | 1727 | .pf = PF_INET, |
1504 | .hooknum = NF_INET_POST_ROUTING, | 1728 | .hooknum = NF_INET_FORWARD, |
1505 | .priority = NF_IP_PRI_NAT_SRC-1, | 1729 | .priority = 100, |
1506 | }, | 1730 | }, |
1507 | #ifdef CONFIG_IP_VS_IPV6 | 1731 | #ifdef CONFIG_IP_VS_IPV6 |
1732 | /* After packet filtering, change source only for VS/NAT */ | ||
1733 | { | ||
1734 | .hook = ip_vs_reply6, | ||
1735 | .owner = THIS_MODULE, | ||
1736 | .pf = PF_INET6, | ||
1737 | .hooknum = NF_INET_LOCAL_IN, | ||
1738 | .priority = 99, | ||
1739 | }, | ||
1508 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | 1740 | /* After packet filtering, forward packet through VS/DR, VS/TUN, |
1509 | * or VS/NAT(change destination), so that filtering rules can be | 1741 | * or VS/NAT(change destination), so that filtering rules can be |
1510 | * applied to IPVS. */ | 1742 | * applied to IPVS. */ |
1511 | { | 1743 | { |
1512 | .hook = ip_vs_in, | 1744 | .hook = ip_vs_remote_request6, |
1513 | .owner = THIS_MODULE, | 1745 | .owner = THIS_MODULE, |
1514 | .pf = PF_INET6, | 1746 | .pf = PF_INET6, |
1515 | .hooknum = NF_INET_LOCAL_IN, | 1747 | .hooknum = NF_INET_LOCAL_IN, |
1516 | .priority = 100, | 1748 | .priority = 101, |
1517 | }, | 1749 | }, |
1518 | /* After packet filtering, change source only for VS/NAT */ | 1750 | /* Before ip_vs_in, change source only for VS/NAT */ |
1519 | { | 1751 | { |
1520 | .hook = ip_vs_out, | 1752 | .hook = ip_vs_local_reply6, |
1753 | .owner = THIS_MODULE, | ||
1754 | .pf = PF_INET, | ||
1755 | .hooknum = NF_INET_LOCAL_OUT, | ||
1756 | .priority = -99, | ||
1757 | }, | ||
1758 | /* After mangle, schedule and forward local requests */ | ||
1759 | { | ||
1760 | .hook = ip_vs_local_request6, | ||
1521 | .owner = THIS_MODULE, | 1761 | .owner = THIS_MODULE, |
1522 | .pf = PF_INET6, | 1762 | .pf = PF_INET6, |
1523 | .hooknum = NF_INET_FORWARD, | 1763 | .hooknum = NF_INET_LOCAL_OUT, |
1524 | .priority = 100, | 1764 | .priority = -98, |
1525 | }, | 1765 | }, |
1526 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp | 1766 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp |
1527 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | 1767 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ |
@@ -1529,16 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1529 | .hook = ip_vs_forward_icmp_v6, | 1769 | .hook = ip_vs_forward_icmp_v6, |
1530 | .owner = THIS_MODULE, | 1770 | .owner = THIS_MODULE, |
1531 | .pf = PF_INET6, | 1771 | .pf = PF_INET6, |
1532 | .hooknum = NF_INET_FORWARD, | 1772 | .hooknum = NF_INET_FORWARD, |
1533 | .priority = 99, | 1773 | .priority = 99, |
1534 | }, | 1774 | }, |
1535 | /* Before the netfilter connection tracking, exit from POST_ROUTING */ | 1775 | /* After packet filtering, change source only for VS/NAT */ |
1536 | { | 1776 | { |
1537 | .hook = ip_vs_post_routing, | 1777 | .hook = ip_vs_reply6, |
1538 | .owner = THIS_MODULE, | 1778 | .owner = THIS_MODULE, |
1539 | .pf = PF_INET6, | 1779 | .pf = PF_INET6, |
1540 | .hooknum = NF_INET_POST_ROUTING, | 1780 | .hooknum = NF_INET_FORWARD, |
1541 | .priority = NF_IP6_PRI_NAT_SRC-1, | 1781 | .priority = 100, |
1542 | }, | 1782 | }, |
1543 | #endif | 1783 | #endif |
1544 | }; | 1784 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0b884d3e192f..5f5daa30b0af 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
777 | conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; | 777 | conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; |
778 | conn_flags |= IP_VS_CONN_F_INACTIVE; | 778 | conn_flags |= IP_VS_CONN_F_INACTIVE; |
779 | 779 | ||
780 | /* check if local node and update the flags */ | ||
781 | #ifdef CONFIG_IP_VS_IPV6 | ||
782 | if (svc->af == AF_INET6) { | ||
783 | if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { | ||
784 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | ||
785 | | IP_VS_CONN_F_LOCALNODE; | ||
786 | } | ||
787 | } else | ||
788 | #endif | ||
789 | if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { | ||
790 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | ||
791 | | IP_VS_CONN_F_LOCALNODE; | ||
792 | } | ||
793 | |||
794 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ | 780 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ |
795 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { | 781 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { |
796 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; | 782 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; |
@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
824 | dest->u_threshold = udest->u_threshold; | 810 | dest->u_threshold = udest->u_threshold; |
825 | dest->l_threshold = udest->l_threshold; | 811 | dest->l_threshold = udest->l_threshold; |
826 | 812 | ||
813 | spin_lock(&dest->dst_lock); | ||
814 | ip_vs_dst_reset(dest); | ||
815 | spin_unlock(&dest->dst_lock); | ||
816 | |||
827 | if (add) | 817 | if (add) |
828 | ip_vs_new_estimator(&dest->stats); | 818 | ip_vs_new_estimator(&dest->stats); |
829 | 819 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 090889a3b3af..75455000ad1c 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -242,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
242 | ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 242 | ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, |
243 | start-data, end-start, | 243 | start-data, end-start, |
244 | buf, buf_len); | 244 | buf, buf_len); |
245 | if (ret) | 245 | if (ret) { |
246 | ip_vs_nfct_expect_related(skb, ct, n_cp, | 246 | ip_vs_nfct_expect_related(skb, ct, n_cp, |
247 | IPPROTO_TCP, 0, 0); | 247 | IPPROTO_TCP, 0, 0); |
248 | if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
249 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
250 | /* csum is updated */ | ||
251 | ret = 1; | ||
252 | } | ||
248 | } | 253 | } |
249 | 254 | ||
250 | /* | 255 | /* |
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 027f654799fe..c53998390877 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c | |||
@@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, | |||
172 | else if (ih->frag_off & htons(IP_OFFSET)) | 172 | else if (ih->frag_off & htons(IP_OFFSET)) |
173 | sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); | 173 | sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); |
174 | else { | 174 | else { |
175 | __be16 _ports[2], *pptr | 175 | __be16 _ports[2], *pptr; |
176 | ; | 176 | |
177 | pptr = skb_header_pointer(skb, offset + ih->ihl*4, | 177 | pptr = skb_header_pointer(skb, offset + ih->ihl*4, |
178 | sizeof(_ports), _ports); | 178 | sizeof(_ports), _ports); |
179 | if (pptr == NULL) | 179 | if (pptr == NULL) |
@@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, | |||
223 | 223 | ||
224 | 224 | ||
225 | void | 225 | void |
226 | ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, | 226 | ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, |
227 | const struct sk_buff *skb, | 227 | const struct sk_buff *skb, |
228 | int offset, | 228 | int offset, |
229 | const char *msg) | 229 | const char *msg) |
230 | { | 230 | { |
231 | #ifdef CONFIG_IP_VS_IPV6 | 231 | #ifdef CONFIG_IP_VS_IPV6 |
232 | if (skb->protocol == htons(ETH_P_IPV6)) | 232 | if (af == AF_INET6) |
233 | ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); | 233 | ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); |
234 | else | 234 | else |
235 | #endif | 235 | #endif |
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 8956ef33ea6c..3a0461117d3f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c | |||
@@ -117,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
119 | 119 | ||
120 | |||
121 | static void | ||
122 | ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
123 | int offset, const char *msg) | ||
124 | { | ||
125 | char buf[256]; | ||
126 | struct iphdr _iph, *ih; | ||
127 | |||
128 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
129 | if (ih == NULL) | ||
130 | sprintf(buf, "TRUNCATED"); | ||
131 | else | ||
132 | sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr); | ||
133 | |||
134 | pr_debug("%s: %s %s\n", msg, pp->name, buf); | ||
135 | } | ||
136 | |||
137 | #ifdef CONFIG_IP_VS_IPV6 | ||
138 | static void | ||
139 | ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
140 | int offset, const char *msg) | ||
141 | { | ||
142 | char buf[256]; | ||
143 | struct ipv6hdr _iph, *ih; | ||
144 | |||
145 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
146 | if (ih == NULL) | ||
147 | sprintf(buf, "TRUNCATED"); | ||
148 | else | ||
149 | sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr); | ||
150 | |||
151 | pr_debug("%s: %s %s\n", msg, pp->name, buf); | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | static void | ||
156 | ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
157 | int offset, const char *msg) | ||
158 | { | ||
159 | #ifdef CONFIG_IP_VS_IPV6 | ||
160 | if (skb->protocol == htons(ETH_P_IPV6)) | ||
161 | ah_esp_debug_packet_v6(pp, skb, offset, msg); | ||
162 | else | ||
163 | #endif | ||
164 | ah_esp_debug_packet_v4(pp, skb, offset, msg); | ||
165 | } | ||
166 | |||
167 | |||
168 | static void ah_esp_init(struct ip_vs_protocol *pp) | 120 | static void ah_esp_init(struct ip_vs_protocol *pp) |
169 | { | 121 | { |
170 | /* nothing to do now */ | 122 | /* nothing to do now */ |
@@ -195,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = { | |||
195 | .register_app = NULL, | 147 | .register_app = NULL, |
196 | .unregister_app = NULL, | 148 | .unregister_app = NULL, |
197 | .app_conn_bind = NULL, | 149 | .app_conn_bind = NULL, |
198 | .debug_packet = ah_esp_debug_packet, | 150 | .debug_packet = ip_vs_tcpudp_debug_packet, |
199 | .timeout_change = NULL, /* ISAKMP */ | 151 | .timeout_change = NULL, /* ISAKMP */ |
200 | .set_state_timeout = NULL, | 152 | .set_state_timeout = NULL, |
201 | }; | 153 | }; |
@@ -219,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = { | |||
219 | .register_app = NULL, | 171 | .register_app = NULL, |
220 | .unregister_app = NULL, | 172 | .unregister_app = NULL, |
221 | .app_conn_bind = NULL, | 173 | .app_conn_bind = NULL, |
222 | .debug_packet = ah_esp_debug_packet, | 174 | .debug_packet = ip_vs_tcpudp_debug_packet, |
223 | .timeout_change = NULL, /* ISAKMP */ | 175 | .timeout_change = NULL, /* ISAKMP */ |
224 | }; | 176 | }; |
225 | #endif | 177 | #endif |
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 4c0855cb006e..d254345bfda7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c | |||
@@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
31 | if ((sch->type == SCTP_CID_INIT) && | 31 | if ((sch->type == SCTP_CID_INIT) && |
32 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 32 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, |
33 | &iph.daddr, sh->dest))) { | 33 | &iph.daddr, sh->dest))) { |
34 | int ignored; | ||
35 | |||
34 | if (ip_vs_todrop()) { | 36 | if (ip_vs_todrop()) { |
35 | /* | 37 | /* |
36 | * It seems that we are very loaded. | 38 | * It seems that we are very loaded. |
@@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
44 | * Let the virtual server select a real server for the | 46 | * Let the virtual server select a real server for the |
45 | * incoming connection, and create a connection entry. | 47 | * incoming connection, and create a connection entry. |
46 | */ | 48 | */ |
47 | *cpp = ip_vs_schedule(svc, skb); | 49 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); |
48 | if (!*cpp) { | 50 | if (!*cpp && !ignored) { |
49 | *verdict = ip_vs_leave(svc, skb, pp); | 51 | *verdict = ip_vs_leave(svc, skb, pp); |
50 | return 0; | 52 | return 0; |
51 | } | 53 | } |
@@ -174,7 +176,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
174 | 176 | ||
175 | if (val != cmp) { | 177 | if (val != cmp) { |
176 | /* CRC failure, dump it. */ | 178 | /* CRC failure, dump it. */ |
177 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 179 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
178 | "Failed checksum for"); | 180 | "Failed checksum for"); |
179 | return 0; | 181 | return 0; |
180 | } | 182 | } |
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 282d24de8592..f6c5200e2146 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c | |||
@@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
43 | return 0; | 43 | return 0; |
44 | } | 44 | } |
45 | 45 | ||
46 | /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ | ||
46 | if (th->syn && | 47 | if (th->syn && |
47 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, | 48 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, |
48 | th->dest))) { | 49 | th->dest))) { |
50 | int ignored; | ||
51 | |||
49 | if (ip_vs_todrop()) { | 52 | if (ip_vs_todrop()) { |
50 | /* | 53 | /* |
51 | * It seems that we are very loaded. | 54 | * It seems that we are very loaded. |
@@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
60 | * Let the virtual server select a real server for the | 63 | * Let the virtual server select a real server for the |
61 | * incoming connection, and create a connection entry. | 64 | * incoming connection, and create a connection entry. |
62 | */ | 65 | */ |
63 | *cpp = ip_vs_schedule(svc, skb); | 66 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); |
64 | if (!*cpp) { | 67 | if (!*cpp && !ignored) { |
65 | *verdict = ip_vs_leave(svc, skb, pp); | 68 | *verdict = ip_vs_leave(svc, skb, pp); |
66 | return 0; | 69 | return 0; |
67 | } | 70 | } |
@@ -101,15 +104,15 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph, | |||
101 | #ifdef CONFIG_IP_VS_IPV6 | 104 | #ifdef CONFIG_IP_VS_IPV6 |
102 | if (af == AF_INET6) | 105 | if (af == AF_INET6) |
103 | tcph->check = | 106 | tcph->check = |
104 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, | 107 | ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, |
105 | ip_vs_check_diff2(oldlen, newlen, | 108 | ip_vs_check_diff2(oldlen, newlen, |
106 | ~csum_unfold(tcph->check)))); | 109 | csum_unfold(tcph->check)))); |
107 | else | 110 | else |
108 | #endif | 111 | #endif |
109 | tcph->check = | 112 | tcph->check = |
110 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, | 113 | ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, |
111 | ip_vs_check_diff2(oldlen, newlen, | 114 | ip_vs_check_diff2(oldlen, newlen, |
112 | ~csum_unfold(tcph->check)))); | 115 | csum_unfold(tcph->check)))); |
113 | } | 116 | } |
114 | 117 | ||
115 | 118 | ||
@@ -120,6 +123,7 @@ tcp_snat_handler(struct sk_buff *skb, | |||
120 | struct tcphdr *tcph; | 123 | struct tcphdr *tcph; |
121 | unsigned int tcphoff; | 124 | unsigned int tcphoff; |
122 | int oldlen; | 125 | int oldlen; |
126 | int payload_csum = 0; | ||
123 | 127 | ||
124 | #ifdef CONFIG_IP_VS_IPV6 | 128 | #ifdef CONFIG_IP_VS_IPV6 |
125 | if (cp->af == AF_INET6) | 129 | if (cp->af == AF_INET6) |
@@ -134,13 +138,20 @@ tcp_snat_handler(struct sk_buff *skb, | |||
134 | return 0; | 138 | return 0; |
135 | 139 | ||
136 | if (unlikely(cp->app != NULL)) { | 140 | if (unlikely(cp->app != NULL)) { |
141 | int ret; | ||
142 | |||
137 | /* Some checks before mangling */ | 143 | /* Some checks before mangling */ |
138 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | 144 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
139 | return 0; | 145 | return 0; |
140 | 146 | ||
141 | /* Call application helper if needed */ | 147 | /* Call application helper if needed */ |
142 | if (!ip_vs_app_pkt_out(cp, skb)) | 148 | if (!(ret = ip_vs_app_pkt_out(cp, skb))) |
143 | return 0; | 149 | return 0; |
150 | /* ret=2: csum update is needed after payload mangling */ | ||
151 | if (ret == 1) | ||
152 | oldlen = skb->len - tcphoff; | ||
153 | else | ||
154 | payload_csum = 1; | ||
144 | } | 155 | } |
145 | 156 | ||
146 | tcph = (void *)skb_network_header(skb) + tcphoff; | 157 | tcph = (void *)skb_network_header(skb) + tcphoff; |
@@ -151,12 +162,13 @@ tcp_snat_handler(struct sk_buff *skb, | |||
151 | tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | 162 | tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, |
152 | htons(oldlen), | 163 | htons(oldlen), |
153 | htons(skb->len - tcphoff)); | 164 | htons(skb->len - tcphoff)); |
154 | } else if (!cp->app) { | 165 | } else if (!payload_csum) { |
155 | /* Only port and addr are changed, do fast csum update */ | 166 | /* Only port and addr are changed, do fast csum update */ |
156 | tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | 167 | tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, |
157 | cp->dport, cp->vport); | 168 | cp->dport, cp->vport); |
158 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 169 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
159 | skb->ip_summed = CHECKSUM_NONE; | 170 | skb->ip_summed = (cp->app && pp->csum_check) ? |
171 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE; | ||
160 | } else { | 172 | } else { |
161 | /* full checksum calculation */ | 173 | /* full checksum calculation */ |
162 | tcph->check = 0; | 174 | tcph->check = 0; |
@@ -174,6 +186,7 @@ tcp_snat_handler(struct sk_buff *skb, | |||
174 | skb->len - tcphoff, | 186 | skb->len - tcphoff, |
175 | cp->protocol, | 187 | cp->protocol, |
176 | skb->csum); | 188 | skb->csum); |
189 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
177 | 190 | ||
178 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | 191 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", |
179 | pp->name, tcph->check, | 192 | pp->name, tcph->check, |
@@ -190,6 +203,7 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
190 | struct tcphdr *tcph; | 203 | struct tcphdr *tcph; |
191 | unsigned int tcphoff; | 204 | unsigned int tcphoff; |
192 | int oldlen; | 205 | int oldlen; |
206 | int payload_csum = 0; | ||
193 | 207 | ||
194 | #ifdef CONFIG_IP_VS_IPV6 | 208 | #ifdef CONFIG_IP_VS_IPV6 |
195 | if (cp->af == AF_INET6) | 209 | if (cp->af == AF_INET6) |
@@ -204,6 +218,8 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
204 | return 0; | 218 | return 0; |
205 | 219 | ||
206 | if (unlikely(cp->app != NULL)) { | 220 | if (unlikely(cp->app != NULL)) { |
221 | int ret; | ||
222 | |||
207 | /* Some checks before mangling */ | 223 | /* Some checks before mangling */ |
208 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | 224 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
209 | return 0; | 225 | return 0; |
@@ -212,8 +228,13 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
212 | * Attempt ip_vs_app call. | 228 | * Attempt ip_vs_app call. |
213 | * It will fix ip_vs_conn and iph ack_seq stuff | 229 | * It will fix ip_vs_conn and iph ack_seq stuff |
214 | */ | 230 | */ |
215 | if (!ip_vs_app_pkt_in(cp, skb)) | 231 | if (!(ret = ip_vs_app_pkt_in(cp, skb))) |
216 | return 0; | 232 | return 0; |
233 | /* ret=2: csum update is needed after payload mangling */ | ||
234 | if (ret == 1) | ||
235 | oldlen = skb->len - tcphoff; | ||
236 | else | ||
237 | payload_csum = 1; | ||
217 | } | 238 | } |
218 | 239 | ||
219 | tcph = (void *)skb_network_header(skb) + tcphoff; | 240 | tcph = (void *)skb_network_header(skb) + tcphoff; |
@@ -223,15 +244,16 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
223 | * Adjust TCP checksums | 244 | * Adjust TCP checksums |
224 | */ | 245 | */ |
225 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 246 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
226 | tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | 247 | tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, |
227 | htons(oldlen), | 248 | htons(oldlen), |
228 | htons(skb->len - tcphoff)); | 249 | htons(skb->len - tcphoff)); |
229 | } else if (!cp->app) { | 250 | } else if (!payload_csum) { |
230 | /* Only port and addr are changed, do fast csum update */ | 251 | /* Only port and addr are changed, do fast csum update */ |
231 | tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, | 252 | tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, |
232 | cp->vport, cp->dport); | 253 | cp->vport, cp->dport); |
233 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 254 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
234 | skb->ip_summed = CHECKSUM_NONE; | 255 | skb->ip_summed = (cp->app && pp->csum_check) ? |
256 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE; | ||
235 | } else { | 257 | } else { |
236 | /* full checksum calculation */ | 258 | /* full checksum calculation */ |
237 | tcph->check = 0; | 259 | tcph->check = 0; |
@@ -278,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
278 | skb->len - tcphoff, | 300 | skb->len - tcphoff, |
279 | ipv6_hdr(skb)->nexthdr, | 301 | ipv6_hdr(skb)->nexthdr, |
280 | skb->csum)) { | 302 | skb->csum)) { |
281 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 303 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
282 | "Failed checksum for"); | 304 | "Failed checksum for"); |
283 | return 0; | 305 | return 0; |
284 | } | 306 | } |
@@ -289,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
289 | skb->len - tcphoff, | 311 | skb->len - tcphoff, |
290 | ip_hdr(skb)->protocol, | 312 | ip_hdr(skb)->protocol, |
291 | skb->csum)) { | 313 | skb->csum)) { |
292 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 314 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
293 | "Failed checksum for"); | 315 | "Failed checksum for"); |
294 | return 0; | 316 | return 0; |
295 | } | 317 | } |
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 8553231b5d41..9d106a06bb0a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c | |||
@@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
46 | svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 46 | svc = ip_vs_service_get(af, skb->mark, iph.protocol, |
47 | &iph.daddr, uh->dest); | 47 | &iph.daddr, uh->dest); |
48 | if (svc) { | 48 | if (svc) { |
49 | int ignored; | ||
50 | |||
49 | if (ip_vs_todrop()) { | 51 | if (ip_vs_todrop()) { |
50 | /* | 52 | /* |
51 | * It seems that we are very loaded. | 53 | * It seems that we are very loaded. |
@@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
60 | * Let the virtual server select a real server for the | 62 | * Let the virtual server select a real server for the |
61 | * incoming connection, and create a connection entry. | 63 | * incoming connection, and create a connection entry. |
62 | */ | 64 | */ |
63 | *cpp = ip_vs_schedule(svc, skb); | 65 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); |
64 | if (!*cpp) { | 66 | if (!*cpp && !ignored) { |
65 | *verdict = ip_vs_leave(svc, skb, pp); | 67 | *verdict = ip_vs_leave(svc, skb, pp); |
66 | return 0; | 68 | return 0; |
67 | } | 69 | } |
@@ -102,15 +104,15 @@ udp_partial_csum_update(int af, struct udphdr *uhdr, | |||
102 | #ifdef CONFIG_IP_VS_IPV6 | 104 | #ifdef CONFIG_IP_VS_IPV6 |
103 | if (af == AF_INET6) | 105 | if (af == AF_INET6) |
104 | uhdr->check = | 106 | uhdr->check = |
105 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, | 107 | ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, |
106 | ip_vs_check_diff2(oldlen, newlen, | 108 | ip_vs_check_diff2(oldlen, newlen, |
107 | ~csum_unfold(uhdr->check)))); | 109 | csum_unfold(uhdr->check)))); |
108 | else | 110 | else |
109 | #endif | 111 | #endif |
110 | uhdr->check = | 112 | uhdr->check = |
111 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, | 113 | ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, |
112 | ip_vs_check_diff2(oldlen, newlen, | 114 | ip_vs_check_diff2(oldlen, newlen, |
113 | ~csum_unfold(uhdr->check)))); | 115 | csum_unfold(uhdr->check)))); |
114 | } | 116 | } |
115 | 117 | ||
116 | 118 | ||
@@ -121,6 +123,7 @@ udp_snat_handler(struct sk_buff *skb, | |||
121 | struct udphdr *udph; | 123 | struct udphdr *udph; |
122 | unsigned int udphoff; | 124 | unsigned int udphoff; |
123 | int oldlen; | 125 | int oldlen; |
126 | int payload_csum = 0; | ||
124 | 127 | ||
125 | #ifdef CONFIG_IP_VS_IPV6 | 128 | #ifdef CONFIG_IP_VS_IPV6 |
126 | if (cp->af == AF_INET6) | 129 | if (cp->af == AF_INET6) |
@@ -135,6 +138,8 @@ udp_snat_handler(struct sk_buff *skb, | |||
135 | return 0; | 138 | return 0; |
136 | 139 | ||
137 | if (unlikely(cp->app != NULL)) { | 140 | if (unlikely(cp->app != NULL)) { |
141 | int ret; | ||
142 | |||
138 | /* Some checks before mangling */ | 143 | /* Some checks before mangling */ |
139 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | 144 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
140 | return 0; | 145 | return 0; |
@@ -142,8 +147,13 @@ udp_snat_handler(struct sk_buff *skb, | |||
142 | /* | 147 | /* |
143 | * Call application helper if needed | 148 | * Call application helper if needed |
144 | */ | 149 | */ |
145 | if (!ip_vs_app_pkt_out(cp, skb)) | 150 | if (!(ret = ip_vs_app_pkt_out(cp, skb))) |
146 | return 0; | 151 | return 0; |
152 | /* ret=2: csum update is needed after payload mangling */ | ||
153 | if (ret == 1) | ||
154 | oldlen = skb->len - udphoff; | ||
155 | else | ||
156 | payload_csum = 1; | ||
147 | } | 157 | } |
148 | 158 | ||
149 | udph = (void *)skb_network_header(skb) + udphoff; | 159 | udph = (void *)skb_network_header(skb) + udphoff; |
@@ -156,12 +166,13 @@ udp_snat_handler(struct sk_buff *skb, | |||
156 | udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, | 166 | udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, |
157 | htons(oldlen), | 167 | htons(oldlen), |
158 | htons(skb->len - udphoff)); | 168 | htons(skb->len - udphoff)); |
159 | } else if (!cp->app && (udph->check != 0)) { | 169 | } else if (!payload_csum && (udph->check != 0)) { |
160 | /* Only port and addr are changed, do fast csum update */ | 170 | /* Only port and addr are changed, do fast csum update */ |
161 | udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, | 171 | udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, |
162 | cp->dport, cp->vport); | 172 | cp->dport, cp->vport); |
163 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 173 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
164 | skb->ip_summed = CHECKSUM_NONE; | 174 | skb->ip_summed = (cp->app && pp->csum_check) ? |
175 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE; | ||
165 | } else { | 176 | } else { |
166 | /* full checksum calculation */ | 177 | /* full checksum calculation */ |
167 | udph->check = 0; | 178 | udph->check = 0; |
@@ -181,6 +192,7 @@ udp_snat_handler(struct sk_buff *skb, | |||
181 | skb->csum); | 192 | skb->csum); |
182 | if (udph->check == 0) | 193 | if (udph->check == 0) |
183 | udph->check = CSUM_MANGLED_0; | 194 | udph->check = CSUM_MANGLED_0; |
195 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
184 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | 196 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", |
185 | pp->name, udph->check, | 197 | pp->name, udph->check, |
186 | (char*)&(udph->check) - (char*)udph); | 198 | (char*)&(udph->check) - (char*)udph); |
@@ -196,6 +208,7 @@ udp_dnat_handler(struct sk_buff *skb, | |||
196 | struct udphdr *udph; | 208 | struct udphdr *udph; |
197 | unsigned int udphoff; | 209 | unsigned int udphoff; |
198 | int oldlen; | 210 | int oldlen; |
211 | int payload_csum = 0; | ||
199 | 212 | ||
200 | #ifdef CONFIG_IP_VS_IPV6 | 213 | #ifdef CONFIG_IP_VS_IPV6 |
201 | if (cp->af == AF_INET6) | 214 | if (cp->af == AF_INET6) |
@@ -210,6 +223,8 @@ udp_dnat_handler(struct sk_buff *skb, | |||
210 | return 0; | 223 | return 0; |
211 | 224 | ||
212 | if (unlikely(cp->app != NULL)) { | 225 | if (unlikely(cp->app != NULL)) { |
226 | int ret; | ||
227 | |||
213 | /* Some checks before mangling */ | 228 | /* Some checks before mangling */ |
214 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | 229 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
215 | return 0; | 230 | return 0; |
@@ -218,8 +233,13 @@ udp_dnat_handler(struct sk_buff *skb, | |||
218 | * Attempt ip_vs_app call. | 233 | * Attempt ip_vs_app call. |
219 | * It will fix ip_vs_conn | 234 | * It will fix ip_vs_conn |
220 | */ | 235 | */ |
221 | if (!ip_vs_app_pkt_in(cp, skb)) | 236 | if (!(ret = ip_vs_app_pkt_in(cp, skb))) |
222 | return 0; | 237 | return 0; |
238 | /* ret=2: csum update is needed after payload mangling */ | ||
239 | if (ret == 1) | ||
240 | oldlen = skb->len - udphoff; | ||
241 | else | ||
242 | payload_csum = 1; | ||
223 | } | 243 | } |
224 | 244 | ||
225 | udph = (void *)skb_network_header(skb) + udphoff; | 245 | udph = (void *)skb_network_header(skb) + udphoff; |
@@ -229,15 +249,16 @@ udp_dnat_handler(struct sk_buff *skb, | |||
229 | * Adjust UDP checksums | 249 | * Adjust UDP checksums |
230 | */ | 250 | */ |
231 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 251 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
232 | udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, | 252 | udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, |
233 | htons(oldlen), | 253 | htons(oldlen), |
234 | htons(skb->len - udphoff)); | 254 | htons(skb->len - udphoff)); |
235 | } else if (!cp->app && (udph->check != 0)) { | 255 | } else if (!payload_csum && (udph->check != 0)) { |
236 | /* Only port and addr are changed, do fast csum update */ | 256 | /* Only port and addr are changed, do fast csum update */ |
237 | udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, | 257 | udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, |
238 | cp->vport, cp->dport); | 258 | cp->vport, cp->dport); |
239 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 259 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
240 | skb->ip_summed = CHECKSUM_NONE; | 260 | skb->ip_summed = (cp->app && pp->csum_check) ? |
261 | CHECKSUM_UNNECESSARY : CHECKSUM_NONE; | ||
241 | } else { | 262 | } else { |
242 | /* full checksum calculation */ | 263 | /* full checksum calculation */ |
243 | udph->check = 0; | 264 | udph->check = 0; |
@@ -293,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
293 | skb->len - udphoff, | 314 | skb->len - udphoff, |
294 | ipv6_hdr(skb)->nexthdr, | 315 | ipv6_hdr(skb)->nexthdr, |
295 | skb->csum)) { | 316 | skb->csum)) { |
296 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 317 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
297 | "Failed checksum for"); | 318 | "Failed checksum for"); |
298 | return 0; | 319 | return 0; |
299 | } | 320 | } |
@@ -304,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
304 | skb->len - udphoff, | 325 | skb->len - udphoff, |
305 | ip_hdr(skb)->protocol, | 326 | ip_hdr(skb)->protocol, |
306 | skb->csum)) { | 327 | skb->csum)) { |
307 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 328 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
308 | "Failed checksum for"); | 329 | "Failed checksum for"); |
309 | return 0; | 330 | return 0; |
310 | } | 331 | } |
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b0bd8afbf368..de04ea39cde8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -11,6 +11,16 @@ | |||
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: |
13 | * | 13 | * |
14 | * Description of forwarding methods: | ||
15 | * - all transmitters are called from LOCAL_IN (remote clients) and | ||
16 | * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD | ||
17 | * - not all connections have destination server, for example, | ||
18 | * connections in backup server when fwmark is used | ||
19 | * - bypass connections use daddr from packet | ||
20 | * LOCAL_OUT rules: | ||
21 | * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) | ||
22 | * - skb->pkt_type is not set yet | ||
23 | * - the only place where we can see skb->sk != NULL | ||
14 | */ | 24 | */ |
15 | 25 | ||
16 | #define KMSG_COMPONENT "IPVS" | 26 | #define KMSG_COMPONENT "IPVS" |
@@ -67,12 +77,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) | |||
67 | return dst; | 77 | return dst; |
68 | } | 78 | } |
69 | 79 | ||
80 | /* | ||
81 | * Get route to destination or remote server | ||
82 | * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, | ||
83 | * &4=Allow redirect from remote daddr to local | ||
84 | */ | ||
70 | static struct rtable * | 85 | static struct rtable * |
71 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) | 86 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, |
87 | __be32 daddr, u32 rtos, int rt_mode) | ||
72 | { | 88 | { |
73 | struct net *net = dev_net(skb->dev); | 89 | struct net *net = dev_net(skb_dst(skb)->dev); |
74 | struct rtable *rt; /* Route to the other host */ | 90 | struct rtable *rt; /* Route to the other host */ |
75 | struct ip_vs_dest *dest = cp->dest; | 91 | struct rtable *ort; /* Original route */ |
92 | int local; | ||
76 | 93 | ||
77 | if (dest) { | 94 | if (dest) { |
78 | spin_lock(&dest->dst_lock); | 95 | spin_lock(&dest->dst_lock); |
@@ -104,23 +121,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) | |||
104 | .oif = 0, | 121 | .oif = 0, |
105 | .nl_u = { | 122 | .nl_u = { |
106 | .ip4_u = { | 123 | .ip4_u = { |
107 | .daddr = cp->daddr.ip, | 124 | .daddr = daddr, |
108 | .saddr = 0, | 125 | .saddr = 0, |
109 | .tos = rtos, } }, | 126 | .tos = rtos, } }, |
110 | }; | 127 | }; |
111 | 128 | ||
112 | if (ip_route_output_key(net, &rt, &fl)) { | 129 | if (ip_route_output_key(net, &rt, &fl)) { |
113 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", | 130 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", |
114 | &cp->daddr.ip); | 131 | &daddr); |
115 | return NULL; | 132 | return NULL; |
116 | } | 133 | } |
117 | } | 134 | } |
118 | 135 | ||
136 | local = rt->rt_flags & RTCF_LOCAL; | ||
137 | if (!((local ? 1 : 2) & rt_mode)) { | ||
138 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", | ||
139 | (rt->rt_flags & RTCF_LOCAL) ? | ||
140 | "local":"non-local", &rt->rt_dst); | ||
141 | ip_rt_put(rt); | ||
142 | return NULL; | ||
143 | } | ||
144 | if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && | ||
145 | ort->rt_flags & RTCF_LOCAL)) { | ||
146 | IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " | ||
147 | "requires NAT method, dest: %pI4\n", | ||
148 | &ip_hdr(skb)->daddr, &rt->rt_dst); | ||
149 | ip_rt_put(rt); | ||
150 | return NULL; | ||
151 | } | ||
152 | if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { | ||
153 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " | ||
154 | "to non-local address, dest: %pI4\n", | ||
155 | &ip_hdr(skb)->saddr, &rt->rt_dst); | ||
156 | ip_rt_put(rt); | ||
157 | return NULL; | ||
158 | } | ||
159 | |||
119 | return rt; | 160 | return rt; |
120 | } | 161 | } |
121 | 162 | ||
163 | /* Reroute packet to local IPv4 stack after DNAT */ | ||
164 | static int | ||
165 | __ip_vs_reroute_locally(struct sk_buff *skb) | ||
166 | { | ||
167 | struct rtable *rt = skb_rtable(skb); | ||
168 | struct net_device *dev = rt->dst.dev; | ||
169 | struct net *net = dev_net(dev); | ||
170 | struct iphdr *iph = ip_hdr(skb); | ||
171 | |||
172 | if (rt->fl.iif) { | ||
173 | unsigned long orefdst = skb->_skb_refdst; | ||
174 | |||
175 | if (ip_route_input(skb, iph->daddr, iph->saddr, | ||
176 | iph->tos, skb->dev)) | ||
177 | return 0; | ||
178 | refdst_drop(orefdst); | ||
179 | } else { | ||
180 | struct flowi fl = { | ||
181 | .oif = 0, | ||
182 | .nl_u = { | ||
183 | .ip4_u = { | ||
184 | .daddr = iph->daddr, | ||
185 | .saddr = iph->saddr, | ||
186 | .tos = RT_TOS(iph->tos), | ||
187 | } | ||
188 | }, | ||
189 | .mark = skb->mark, | ||
190 | }; | ||
191 | struct rtable *rt; | ||
192 | |||
193 | if (ip_route_output_key(net, &rt, &fl)) | ||
194 | return 0; | ||
195 | if (!(rt->rt_flags & RTCF_LOCAL)) { | ||
196 | ip_rt_put(rt); | ||
197 | return 0; | ||
198 | } | ||
199 | /* Drop old route. */ | ||
200 | skb_dst_drop(skb); | ||
201 | skb_dst_set(skb, &rt->dst); | ||
202 | } | ||
203 | return 1; | ||
204 | } | ||
205 | |||
122 | #ifdef CONFIG_IP_VS_IPV6 | 206 | #ifdef CONFIG_IP_VS_IPV6 |
123 | 207 | ||
208 | static inline int __ip_vs_is_local_route6(struct rt6_info *rt) | ||
209 | { | ||
210 | return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; | ||
211 | } | ||
212 | |||
124 | static struct dst_entry * | 213 | static struct dst_entry * |
125 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, | 214 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, |
126 | struct in6_addr *ret_saddr, int do_xfrm) | 215 | struct in6_addr *ret_saddr, int do_xfrm) |
@@ -155,14 +244,21 @@ out_err: | |||
155 | return NULL; | 244 | return NULL; |
156 | } | 245 | } |
157 | 246 | ||
247 | /* | ||
248 | * Get route to destination or remote server | ||
249 | * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, | ||
250 | * &4=Allow redirect from remote daddr to local | ||
251 | */ | ||
158 | static struct rt6_info * | 252 | static struct rt6_info * |
159 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 253 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, |
160 | struct in6_addr *ret_saddr, int do_xfrm) | 254 | struct in6_addr *daddr, struct in6_addr *ret_saddr, |
255 | int do_xfrm, int rt_mode) | ||
161 | { | 256 | { |
162 | struct net *net = dev_net(skb->dev); | 257 | struct net *net = dev_net(skb_dst(skb)->dev); |
163 | struct rt6_info *rt; /* Route to the other host */ | 258 | struct rt6_info *rt; /* Route to the other host */ |
164 | struct ip_vs_dest *dest = cp->dest; | 259 | struct rt6_info *ort; /* Original route */ |
165 | struct dst_entry *dst; | 260 | struct dst_entry *dst; |
261 | int local; | ||
166 | 262 | ||
167 | if (dest) { | 263 | if (dest) { |
168 | spin_lock(&dest->dst_lock); | 264 | spin_lock(&dest->dst_lock); |
@@ -188,13 +284,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
188 | ipv6_addr_copy(ret_saddr, &dest->dst_saddr); | 284 | ipv6_addr_copy(ret_saddr, &dest->dst_saddr); |
189 | spin_unlock(&dest->dst_lock); | 285 | spin_unlock(&dest->dst_lock); |
190 | } else { | 286 | } else { |
191 | dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, | 287 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); |
192 | do_xfrm); | ||
193 | if (!dst) | 288 | if (!dst) |
194 | return NULL; | 289 | return NULL; |
195 | rt = (struct rt6_info *) dst; | 290 | rt = (struct rt6_info *) dst; |
196 | } | 291 | } |
197 | 292 | ||
293 | local = __ip_vs_is_local_route6(rt); | ||
294 | if (!((local ? 1 : 2) & rt_mode)) { | ||
295 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", | ||
296 | local ? "local":"non-local", daddr); | ||
297 | dst_release(&rt->dst); | ||
298 | return NULL; | ||
299 | } | ||
300 | if (local && !(rt_mode & 4) && | ||
301 | !((ort = (struct rt6_info *) skb_dst(skb)) && | ||
302 | __ip_vs_is_local_route6(ort))) { | ||
303 | IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " | ||
304 | "requires NAT method, dest: %pI6\n", | ||
305 | &ipv6_hdr(skb)->daddr, daddr); | ||
306 | dst_release(&rt->dst); | ||
307 | return NULL; | ||
308 | } | ||
309 | if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | ||
310 | ipv6_addr_type(&ipv6_hdr(skb)->saddr) & | ||
311 | IPV6_ADDR_LOOPBACK)) { | ||
312 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " | ||
313 | "to non-local address, dest: %pI6\n", | ||
314 | &ipv6_hdr(skb)->saddr, daddr); | ||
315 | dst_release(&rt->dst); | ||
316 | return NULL; | ||
317 | } | ||
318 | |||
198 | return rt; | 319 | return rt; |
199 | } | 320 | } |
200 | #endif | 321 | #endif |
@@ -217,30 +338,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) | |||
217 | ({ \ | 338 | ({ \ |
218 | int __ret = NF_ACCEPT; \ | 339 | int __ret = NF_ACCEPT; \ |
219 | \ | 340 | \ |
341 | (skb)->ipvs_property = 1; \ | ||
220 | if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ | 342 | if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ |
221 | __ret = ip_vs_confirm_conntrack(skb, cp); \ | 343 | __ret = ip_vs_confirm_conntrack(skb, cp); \ |
222 | if (__ret == NF_ACCEPT) { \ | 344 | if (__ret == NF_ACCEPT) { \ |
223 | nf_reset(skb); \ | 345 | nf_reset(skb); \ |
224 | (skb)->ip_summed = CHECKSUM_NONE; \ | 346 | skb_forward_csum(skb); \ |
225 | } \ | 347 | } \ |
226 | __ret; \ | 348 | __ret; \ |
227 | }) | 349 | }) |
228 | 350 | ||
229 | #define IP_VS_XMIT_NAT(pf, skb, cp) \ | 351 | #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ |
230 | do { \ | 352 | do { \ |
353 | (skb)->ipvs_property = 1; \ | ||
231 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 354 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
232 | (skb)->ipvs_property = 1; \ | 355 | ip_vs_notrack(skb); \ |
233 | else \ | 356 | else \ |
234 | ip_vs_update_conntrack(skb, cp, 1); \ | 357 | ip_vs_update_conntrack(skb, cp, 1); \ |
358 | if (local) \ | ||
359 | return NF_ACCEPT; \ | ||
235 | skb_forward_csum(skb); \ | 360 | skb_forward_csum(skb); \ |
236 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 361 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
237 | skb_dst(skb)->dev, dst_output); \ | 362 | skb_dst(skb)->dev, dst_output); \ |
238 | } while (0) | 363 | } while (0) |
239 | 364 | ||
240 | #define IP_VS_XMIT(pf, skb, cp) \ | 365 | #define IP_VS_XMIT(pf, skb, cp, local) \ |
241 | do { \ | 366 | do { \ |
367 | (skb)->ipvs_property = 1; \ | ||
242 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 368 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
243 | (skb)->ipvs_property = 1; \ | 369 | ip_vs_notrack(skb); \ |
370 | if (local) \ | ||
371 | return NF_ACCEPT; \ | ||
244 | skb_forward_csum(skb); \ | 372 | skb_forward_csum(skb); \ |
245 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 373 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
246 | skb_dst(skb)->dev, dst_output); \ | 374 | skb_dst(skb)->dev, dst_output); \ |
@@ -255,7 +383,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
255 | struct ip_vs_protocol *pp) | 383 | struct ip_vs_protocol *pp) |
256 | { | 384 | { |
257 | /* we do not touch skb and do not need pskb ptr */ | 385 | /* we do not touch skb and do not need pskb ptr */ |
258 | return NF_ACCEPT; | 386 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); |
259 | } | 387 | } |
260 | 388 | ||
261 | 389 | ||
@@ -268,27 +396,15 @@ int | |||
268 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 396 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
269 | struct ip_vs_protocol *pp) | 397 | struct ip_vs_protocol *pp) |
270 | { | 398 | { |
271 | struct net *net = dev_net(skb->dev); | ||
272 | struct rtable *rt; /* Route to the other host */ | 399 | struct rtable *rt; /* Route to the other host */ |
273 | struct iphdr *iph = ip_hdr(skb); | 400 | struct iphdr *iph = ip_hdr(skb); |
274 | u8 tos = iph->tos; | ||
275 | int mtu; | 401 | int mtu; |
276 | struct flowi fl = { | ||
277 | .oif = 0, | ||
278 | .nl_u = { | ||
279 | .ip4_u = { | ||
280 | .daddr = iph->daddr, | ||
281 | .saddr = 0, | ||
282 | .tos = RT_TOS(tos), } }, | ||
283 | }; | ||
284 | 402 | ||
285 | EnterFunction(10); | 403 | EnterFunction(10); |
286 | 404 | ||
287 | if (ip_route_output_key(net, &rt, &fl)) { | 405 | if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, |
288 | IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", | 406 | RT_TOS(iph->tos), 2))) |
289 | __func__, &iph->daddr); | ||
290 | goto tx_error_icmp; | 407 | goto tx_error_icmp; |
291 | } | ||
292 | 408 | ||
293 | /* MTU checking */ | 409 | /* MTU checking */ |
294 | mtu = dst_mtu(&rt->dst); | 410 | mtu = dst_mtu(&rt->dst); |
@@ -316,7 +432,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
316 | /* Another hack: avoid icmp_send in ip_fragment */ | 432 | /* Another hack: avoid icmp_send in ip_fragment */ |
317 | skb->local_df = 1; | 433 | skb->local_df = 1; |
318 | 434 | ||
319 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 435 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
320 | 436 | ||
321 | LeaveFunction(10); | 437 | LeaveFunction(10); |
322 | return NF_STOLEN; | 438 | return NF_STOLEN; |
@@ -334,24 +450,25 @@ int | |||
334 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 450 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
335 | struct ip_vs_protocol *pp) | 451 | struct ip_vs_protocol *pp) |
336 | { | 452 | { |
337 | struct net *net = dev_net(skb->dev); | ||
338 | struct dst_entry *dst; | ||
339 | struct rt6_info *rt; /* Route to the other host */ | 453 | struct rt6_info *rt; /* Route to the other host */ |
340 | struct ipv6hdr *iph = ipv6_hdr(skb); | 454 | struct ipv6hdr *iph = ipv6_hdr(skb); |
341 | int mtu; | 455 | int mtu; |
342 | 456 | ||
343 | EnterFunction(10); | 457 | EnterFunction(10); |
344 | 458 | ||
345 | dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); | 459 | if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2))) |
346 | if (!dst) | ||
347 | goto tx_error_icmp; | 460 | goto tx_error_icmp; |
348 | rt = (struct rt6_info *) dst; | ||
349 | 461 | ||
350 | /* MTU checking */ | 462 | /* MTU checking */ |
351 | mtu = dst_mtu(&rt->dst); | 463 | mtu = dst_mtu(&rt->dst); |
352 | if (skb->len > mtu) { | 464 | if (skb->len > mtu) { |
353 | dst_release(&rt->dst); | 465 | if (!skb->dev) { |
466 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
467 | |||
468 | skb->dev = net->loopback_dev; | ||
469 | } | ||
354 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 470 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
471 | dst_release(&rt->dst); | ||
355 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 472 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
356 | goto tx_error; | 473 | goto tx_error; |
357 | } | 474 | } |
@@ -373,7 +490,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
373 | /* Another hack: avoid icmp_send in ip_fragment */ | 490 | /* Another hack: avoid icmp_send in ip_fragment */ |
374 | skb->local_df = 1; | 491 | skb->local_df = 1; |
375 | 492 | ||
376 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 493 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
377 | 494 | ||
378 | LeaveFunction(10); | 495 | LeaveFunction(10); |
379 | return NF_STOLEN; | 496 | return NF_STOLEN; |
@@ -398,6 +515,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
398 | struct rtable *rt; /* Route to the other host */ | 515 | struct rtable *rt; /* Route to the other host */ |
399 | int mtu; | 516 | int mtu; |
400 | struct iphdr *iph = ip_hdr(skb); | 517 | struct iphdr *iph = ip_hdr(skb); |
518 | int local; | ||
401 | 519 | ||
402 | EnterFunction(10); | 520 | EnterFunction(10); |
403 | 521 | ||
@@ -411,16 +529,42 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
411 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 529 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
412 | } | 530 | } |
413 | 531 | ||
414 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) | 532 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
533 | RT_TOS(iph->tos), 1|2|4))) | ||
415 | goto tx_error_icmp; | 534 | goto tx_error_icmp; |
535 | local = rt->rt_flags & RTCF_LOCAL; | ||
536 | /* | ||
537 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
538 | * to local address when connection is sync-ed | ||
539 | */ | ||
540 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
541 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
542 | enum ip_conntrack_info ctinfo; | ||
543 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
544 | |||
545 | if (ct && !nf_ct_is_untracked(ct)) { | ||
546 | IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, | ||
547 | "ip_vs_nat_xmit(): " | ||
548 | "stopping DNAT to local address"); | ||
549 | goto tx_error_put; | ||
550 | } | ||
551 | } | ||
552 | #endif | ||
553 | |||
554 | /* From world but DNAT to loopback address? */ | ||
555 | if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { | ||
556 | IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " | ||
557 | "stopping DNAT to loopback address"); | ||
558 | goto tx_error_put; | ||
559 | } | ||
416 | 560 | ||
417 | /* MTU checking */ | 561 | /* MTU checking */ |
418 | mtu = dst_mtu(&rt->dst); | 562 | mtu = dst_mtu(&rt->dst); |
419 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 563 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { |
420 | ip_rt_put(rt); | ||
421 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 564 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
422 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); | 565 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, |
423 | goto tx_error; | 566 | "ip_vs_nat_xmit(): frag needed for"); |
567 | goto tx_error_put; | ||
424 | } | 568 | } |
425 | 569 | ||
426 | /* copy-on-write the packet before mangling it */ | 570 | /* copy-on-write the packet before mangling it */ |
@@ -430,17 +574,28 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
430 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 574 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
431 | goto tx_error_put; | 575 | goto tx_error_put; |
432 | 576 | ||
433 | /* drop old route */ | ||
434 | skb_dst_drop(skb); | ||
435 | skb_dst_set(skb, &rt->dst); | ||
436 | |||
437 | /* mangle the packet */ | 577 | /* mangle the packet */ |
438 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | 578 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) |
439 | goto tx_error; | 579 | goto tx_error_put; |
440 | ip_hdr(skb)->daddr = cp->daddr.ip; | 580 | ip_hdr(skb)->daddr = cp->daddr.ip; |
441 | ip_send_check(ip_hdr(skb)); | 581 | ip_send_check(ip_hdr(skb)); |
442 | 582 | ||
443 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 583 | if (!local) { |
584 | /* drop old route */ | ||
585 | skb_dst_drop(skb); | ||
586 | skb_dst_set(skb, &rt->dst); | ||
587 | } else { | ||
588 | ip_rt_put(rt); | ||
589 | /* | ||
590 | * Some IPv4 replies get local address from routes, | ||
591 | * not from iph, so while we DNAT after routing | ||
592 | * we need this second input/output route. | ||
593 | */ | ||
594 | if (!__ip_vs_reroute_locally(skb)) | ||
595 | goto tx_error; | ||
596 | } | ||
597 | |||
598 | IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); | ||
444 | 599 | ||
445 | /* FIXME: when application helper enlarges the packet and the length | 600 | /* FIXME: when application helper enlarges the packet and the length |
446 | is larger than the MTU of outgoing device, there will be still | 601 | is larger than the MTU of outgoing device, there will be still |
@@ -449,7 +604,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
449 | /* Another hack: avoid icmp_send in ip_fragment */ | 604 | /* Another hack: avoid icmp_send in ip_fragment */ |
450 | skb->local_df = 1; | 605 | skb->local_df = 1; |
451 | 606 | ||
452 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); | 607 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
453 | 608 | ||
454 | LeaveFunction(10); | 609 | LeaveFunction(10); |
455 | return NF_STOLEN; | 610 | return NF_STOLEN; |
@@ -472,6 +627,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
472 | { | 627 | { |
473 | struct rt6_info *rt; /* Route to the other host */ | 628 | struct rt6_info *rt; /* Route to the other host */ |
474 | int mtu; | 629 | int mtu; |
630 | int local; | ||
475 | 631 | ||
476 | EnterFunction(10); | 632 | EnterFunction(10); |
477 | 633 | ||
@@ -486,18 +642,49 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
486 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 642 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
487 | } | 643 | } |
488 | 644 | ||
489 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 645 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
490 | if (!rt) | 646 | 0, 1|2|4))) |
491 | goto tx_error_icmp; | 647 | goto tx_error_icmp; |
648 | local = __ip_vs_is_local_route6(rt); | ||
649 | /* | ||
650 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
651 | * to local address when connection is sync-ed | ||
652 | */ | ||
653 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
654 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
655 | enum ip_conntrack_info ctinfo; | ||
656 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
657 | |||
658 | if (ct && !nf_ct_is_untracked(ct)) { | ||
659 | IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, | ||
660 | "ip_vs_nat_xmit_v6(): " | ||
661 | "stopping DNAT to local address"); | ||
662 | goto tx_error_put; | ||
663 | } | ||
664 | } | ||
665 | #endif | ||
666 | |||
667 | /* From world but DNAT to loopback address? */ | ||
668 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | ||
669 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | ||
670 | IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, | ||
671 | "ip_vs_nat_xmit_v6(): " | ||
672 | "stopping DNAT to loopback address"); | ||
673 | goto tx_error_put; | ||
674 | } | ||
492 | 675 | ||
493 | /* MTU checking */ | 676 | /* MTU checking */ |
494 | mtu = dst_mtu(&rt->dst); | 677 | mtu = dst_mtu(&rt->dst); |
495 | if (skb->len > mtu) { | 678 | if (skb->len > mtu) { |
496 | dst_release(&rt->dst); | 679 | if (!skb->dev) { |
680 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
681 | |||
682 | skb->dev = net->loopback_dev; | ||
683 | } | ||
497 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 684 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
498 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 685 | IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, |
499 | "ip_vs_nat_xmit_v6(): frag needed for"); | 686 | "ip_vs_nat_xmit_v6(): frag needed for"); |
500 | goto tx_error; | 687 | goto tx_error_put; |
501 | } | 688 | } |
502 | 689 | ||
503 | /* copy-on-write the packet before mangling it */ | 690 | /* copy-on-write the packet before mangling it */ |
@@ -507,16 +694,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
507 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 694 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
508 | goto tx_error_put; | 695 | goto tx_error_put; |
509 | 696 | ||
510 | /* drop old route */ | ||
511 | skb_dst_drop(skb); | ||
512 | skb_dst_set(skb, &rt->dst); | ||
513 | |||
514 | /* mangle the packet */ | 697 | /* mangle the packet */ |
515 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | 698 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) |
516 | goto tx_error; | 699 | goto tx_error; |
517 | ipv6_hdr(skb)->daddr = cp->daddr.in6; | 700 | ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); |
518 | 701 | ||
519 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 702 | if (!local || !skb->dev) { |
703 | /* drop the old route when skb is not shared */ | ||
704 | skb_dst_drop(skb); | ||
705 | skb_dst_set(skb, &rt->dst); | ||
706 | } else { | ||
707 | /* destined to loopback, do we need to change route? */ | ||
708 | dst_release(&rt->dst); | ||
709 | } | ||
710 | |||
711 | IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); | ||
520 | 712 | ||
521 | /* FIXME: when application helper enlarges the packet and the length | 713 | /* FIXME: when application helper enlarges the packet and the length |
522 | is larger than the MTU of outgoing device, there will be still | 714 | is larger than the MTU of outgoing device, there will be still |
@@ -525,7 +717,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
525 | /* Another hack: avoid icmp_send in ip_fragment */ | 717 | /* Another hack: avoid icmp_send in ip_fragment */ |
526 | skb->local_df = 1; | 718 | skb->local_df = 1; |
527 | 719 | ||
528 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); | 720 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
529 | 721 | ||
530 | LeaveFunction(10); | 722 | LeaveFunction(10); |
531 | return NF_STOLEN; | 723 | return NF_STOLEN; |
@@ -578,23 +770,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
578 | 770 | ||
579 | EnterFunction(10); | 771 | EnterFunction(10); |
580 | 772 | ||
581 | if (skb->protocol != htons(ETH_P_IP)) { | 773 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
582 | IP_VS_DBG_RL("%s(): protocol error, " | 774 | RT_TOS(tos), 1|2))) |
583 | "ETH_P_IP: %d, skb protocol: %d\n", | ||
584 | __func__, htons(ETH_P_IP), skb->protocol); | ||
585 | goto tx_error; | ||
586 | } | ||
587 | |||
588 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) | ||
589 | goto tx_error_icmp; | 775 | goto tx_error_icmp; |
776 | if (rt->rt_flags & RTCF_LOCAL) { | ||
777 | ip_rt_put(rt); | ||
778 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | ||
779 | } | ||
590 | 780 | ||
591 | tdev = rt->dst.dev; | 781 | tdev = rt->dst.dev; |
592 | 782 | ||
593 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); | 783 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
594 | if (mtu < 68) { | 784 | if (mtu < 68) { |
595 | ip_rt_put(rt); | ||
596 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); | 785 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); |
597 | goto tx_error; | 786 | goto tx_error_put; |
598 | } | 787 | } |
599 | if (skb_dst(skb)) | 788 | if (skb_dst(skb)) |
600 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 789 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
@@ -604,9 +793,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
604 | if ((old_iph->frag_off & htons(IP_DF)) | 793 | if ((old_iph->frag_off & htons(IP_DF)) |
605 | && mtu < ntohs(old_iph->tot_len)) { | 794 | && mtu < ntohs(old_iph->tot_len)) { |
606 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 795 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
607 | ip_rt_put(rt); | ||
608 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 796 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
609 | goto tx_error; | 797 | goto tx_error_put; |
610 | } | 798 | } |
611 | 799 | ||
612 | /* | 800 | /* |
@@ -675,6 +863,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
675 | kfree_skb(skb); | 863 | kfree_skb(skb); |
676 | LeaveFunction(10); | 864 | LeaveFunction(10); |
677 | return NF_STOLEN; | 865 | return NF_STOLEN; |
866 | tx_error_put: | ||
867 | ip_rt_put(rt); | ||
868 | goto tx_error; | ||
678 | } | 869 | } |
679 | 870 | ||
680 | #ifdef CONFIG_IP_VS_IPV6 | 871 | #ifdef CONFIG_IP_VS_IPV6 |
@@ -693,34 +884,34 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
693 | 884 | ||
694 | EnterFunction(10); | 885 | EnterFunction(10); |
695 | 886 | ||
696 | if (skb->protocol != htons(ETH_P_IPV6)) { | 887 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, |
697 | IP_VS_DBG_RL("%s(): protocol error, " | 888 | &saddr, 1, 1|2))) |
698 | "ETH_P_IPV6: %d, skb protocol: %d\n", | ||
699 | __func__, htons(ETH_P_IPV6), skb->protocol); | ||
700 | goto tx_error; | ||
701 | } | ||
702 | |||
703 | rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); | ||
704 | if (!rt) | ||
705 | goto tx_error_icmp; | 889 | goto tx_error_icmp; |
890 | if (__ip_vs_is_local_route6(rt)) { | ||
891 | dst_release(&rt->dst); | ||
892 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | ||
893 | } | ||
706 | 894 | ||
707 | tdev = rt->dst.dev; | 895 | tdev = rt->dst.dev; |
708 | 896 | ||
709 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); | 897 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); |
710 | if (mtu < IPV6_MIN_MTU) { | 898 | if (mtu < IPV6_MIN_MTU) { |
711 | dst_release(&rt->dst); | ||
712 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, | 899 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, |
713 | IPV6_MIN_MTU); | 900 | IPV6_MIN_MTU); |
714 | goto tx_error; | 901 | goto tx_error_put; |
715 | } | 902 | } |
716 | if (skb_dst(skb)) | 903 | if (skb_dst(skb)) |
717 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 904 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
718 | 905 | ||
719 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { | 906 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { |
907 | if (!skb->dev) { | ||
908 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
909 | |||
910 | skb->dev = net->loopback_dev; | ||
911 | } | ||
720 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 912 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
721 | dst_release(&rt->dst); | ||
722 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 913 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
723 | goto tx_error; | 914 | goto tx_error_put; |
724 | } | 915 | } |
725 | 916 | ||
726 | /* | 917 | /* |
@@ -786,6 +977,9 @@ tx_error: | |||
786 | kfree_skb(skb); | 977 | kfree_skb(skb); |
787 | LeaveFunction(10); | 978 | LeaveFunction(10); |
788 | return NF_STOLEN; | 979 | return NF_STOLEN; |
980 | tx_error_put: | ||
981 | dst_release(&rt->dst); | ||
982 | goto tx_error; | ||
789 | } | 983 | } |
790 | #endif | 984 | #endif |
791 | 985 | ||
@@ -804,8 +998,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
804 | 998 | ||
805 | EnterFunction(10); | 999 | EnterFunction(10); |
806 | 1000 | ||
807 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) | 1001 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
1002 | RT_TOS(iph->tos), 1|2))) | ||
808 | goto tx_error_icmp; | 1003 | goto tx_error_icmp; |
1004 | if (rt->rt_flags & RTCF_LOCAL) { | ||
1005 | ip_rt_put(rt); | ||
1006 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | ||
1007 | } | ||
809 | 1008 | ||
810 | /* MTU checking */ | 1009 | /* MTU checking */ |
811 | mtu = dst_mtu(&rt->dst); | 1010 | mtu = dst_mtu(&rt->dst); |
@@ -833,7 +1032,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
833 | /* Another hack: avoid icmp_send in ip_fragment */ | 1032 | /* Another hack: avoid icmp_send in ip_fragment */ |
834 | skb->local_df = 1; | 1033 | skb->local_df = 1; |
835 | 1034 | ||
836 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 1035 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
837 | 1036 | ||
838 | LeaveFunction(10); | 1037 | LeaveFunction(10); |
839 | return NF_STOLEN; | 1038 | return NF_STOLEN; |
@@ -856,13 +1055,22 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
856 | 1055 | ||
857 | EnterFunction(10); | 1056 | EnterFunction(10); |
858 | 1057 | ||
859 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 1058 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
860 | if (!rt) | 1059 | 0, 1|2))) |
861 | goto tx_error_icmp; | 1060 | goto tx_error_icmp; |
1061 | if (__ip_vs_is_local_route6(rt)) { | ||
1062 | dst_release(&rt->dst); | ||
1063 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | ||
1064 | } | ||
862 | 1065 | ||
863 | /* MTU checking */ | 1066 | /* MTU checking */ |
864 | mtu = dst_mtu(&rt->dst); | 1067 | mtu = dst_mtu(&rt->dst); |
865 | if (skb->len > mtu) { | 1068 | if (skb->len > mtu) { |
1069 | if (!skb->dev) { | ||
1070 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
1071 | |||
1072 | skb->dev = net->loopback_dev; | ||
1073 | } | ||
866 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1074 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
867 | dst_release(&rt->dst); | 1075 | dst_release(&rt->dst); |
868 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1076 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
@@ -886,7 +1094,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
886 | /* Another hack: avoid icmp_send in ip_fragment */ | 1094 | /* Another hack: avoid icmp_send in ip_fragment */ |
887 | skb->local_df = 1; | 1095 | skb->local_df = 1; |
888 | 1096 | ||
889 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 1097 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
890 | 1098 | ||
891 | LeaveFunction(10); | 1099 | LeaveFunction(10); |
892 | return NF_STOLEN; | 1100 | return NF_STOLEN; |
@@ -912,6 +1120,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
912 | struct rtable *rt; /* Route to the other host */ | 1120 | struct rtable *rt; /* Route to the other host */ |
913 | int mtu; | 1121 | int mtu; |
914 | int rc; | 1122 | int rc; |
1123 | int local; | ||
915 | 1124 | ||
916 | EnterFunction(10); | 1125 | EnterFunction(10); |
917 | 1126 | ||
@@ -932,16 +1141,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
932 | * mangle and send the packet here (only for VS/NAT) | 1141 | * mangle and send the packet here (only for VS/NAT) |
933 | */ | 1142 | */ |
934 | 1143 | ||
935 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) | 1144 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
1145 | RT_TOS(ip_hdr(skb)->tos), 1|2|4))) | ||
936 | goto tx_error_icmp; | 1146 | goto tx_error_icmp; |
1147 | local = rt->rt_flags & RTCF_LOCAL; | ||
1148 | |||
1149 | /* | ||
1150 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
1151 | * to local address when connection is sync-ed | ||
1152 | */ | ||
1153 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
1154 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
1155 | enum ip_conntrack_info ctinfo; | ||
1156 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
1157 | |||
1158 | if (ct && !nf_ct_is_untracked(ct)) { | ||
1159 | IP_VS_DBG(10, "%s(): " | ||
1160 | "stopping DNAT to local address %pI4\n", | ||
1161 | __func__, &cp->daddr.ip); | ||
1162 | goto tx_error_put; | ||
1163 | } | ||
1164 | } | ||
1165 | #endif | ||
1166 | |||
1167 | /* From world but DNAT to loopback address? */ | ||
1168 | if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { | ||
1169 | IP_VS_DBG(1, "%s(): " | ||
1170 | "stopping DNAT to loopback %pI4\n", | ||
1171 | __func__, &cp->daddr.ip); | ||
1172 | goto tx_error_put; | ||
1173 | } | ||
937 | 1174 | ||
938 | /* MTU checking */ | 1175 | /* MTU checking */ |
939 | mtu = dst_mtu(&rt->dst); | 1176 | mtu = dst_mtu(&rt->dst); |
940 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { | 1177 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { |
941 | ip_rt_put(rt); | ||
942 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 1178 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
943 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1179 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
944 | goto tx_error; | 1180 | goto tx_error_put; |
945 | } | 1181 | } |
946 | 1182 | ||
947 | /* copy-on-write the packet before mangling it */ | 1183 | /* copy-on-write the packet before mangling it */ |
@@ -951,16 +1187,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
951 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1187 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
952 | goto tx_error_put; | 1188 | goto tx_error_put; |
953 | 1189 | ||
954 | /* drop the old route when skb is not shared */ | ||
955 | skb_dst_drop(skb); | ||
956 | skb_dst_set(skb, &rt->dst); | ||
957 | |||
958 | ip_vs_nat_icmp(skb, pp, cp, 0); | 1190 | ip_vs_nat_icmp(skb, pp, cp, 0); |
959 | 1191 | ||
1192 | if (!local) { | ||
1193 | /* drop the old route when skb is not shared */ | ||
1194 | skb_dst_drop(skb); | ||
1195 | skb_dst_set(skb, &rt->dst); | ||
1196 | } else { | ||
1197 | ip_rt_put(rt); | ||
1198 | /* | ||
1199 | * Some IPv4 replies get local address from routes, | ||
1200 | * not from iph, so while we DNAT after routing | ||
1201 | * we need this second input/output route. | ||
1202 | */ | ||
1203 | if (!__ip_vs_reroute_locally(skb)) | ||
1204 | goto tx_error; | ||
1205 | } | ||
1206 | |||
960 | /* Another hack: avoid icmp_send in ip_fragment */ | 1207 | /* Another hack: avoid icmp_send in ip_fragment */ |
961 | skb->local_df = 1; | 1208 | skb->local_df = 1; |
962 | 1209 | ||
963 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 1210 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
964 | 1211 | ||
965 | rc = NF_STOLEN; | 1212 | rc = NF_STOLEN; |
966 | goto out; | 1213 | goto out; |
@@ -986,6 +1233,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
986 | struct rt6_info *rt; /* Route to the other host */ | 1233 | struct rt6_info *rt; /* Route to the other host */ |
987 | int mtu; | 1234 | int mtu; |
988 | int rc; | 1235 | int rc; |
1236 | int local; | ||
989 | 1237 | ||
990 | EnterFunction(10); | 1238 | EnterFunction(10); |
991 | 1239 | ||
@@ -1006,17 +1254,49 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1006 | * mangle and send the packet here (only for VS/NAT) | 1254 | * mangle and send the packet here (only for VS/NAT) |
1007 | */ | 1255 | */ |
1008 | 1256 | ||
1009 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 1257 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
1010 | if (!rt) | 1258 | 0, 1|2|4))) |
1011 | goto tx_error_icmp; | 1259 | goto tx_error_icmp; |
1012 | 1260 | ||
1261 | local = __ip_vs_is_local_route6(rt); | ||
1262 | /* | ||
1263 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
1264 | * to local address when connection is sync-ed | ||
1265 | */ | ||
1266 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
1267 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
1268 | enum ip_conntrack_info ctinfo; | ||
1269 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
1270 | |||
1271 | if (ct && !nf_ct_is_untracked(ct)) { | ||
1272 | IP_VS_DBG(10, "%s(): " | ||
1273 | "stopping DNAT to local address %pI6\n", | ||
1274 | __func__, &cp->daddr.in6); | ||
1275 | goto tx_error_put; | ||
1276 | } | ||
1277 | } | ||
1278 | #endif | ||
1279 | |||
1280 | /* From world but DNAT to loopback address? */ | ||
1281 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | ||
1282 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | ||
1283 | IP_VS_DBG(1, "%s(): " | ||
1284 | "stopping DNAT to loopback %pI6\n", | ||
1285 | __func__, &cp->daddr.in6); | ||
1286 | goto tx_error_put; | ||
1287 | } | ||
1288 | |||
1013 | /* MTU checking */ | 1289 | /* MTU checking */ |
1014 | mtu = dst_mtu(&rt->dst); | 1290 | mtu = dst_mtu(&rt->dst); |
1015 | if (skb->len > mtu) { | 1291 | if (skb->len > mtu) { |
1016 | dst_release(&rt->dst); | 1292 | if (!skb->dev) { |
1293 | struct net *net = dev_net(skb_dst(skb)->dev); | ||
1294 | |||
1295 | skb->dev = net->loopback_dev; | ||
1296 | } | ||
1017 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1297 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
1018 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1298 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1019 | goto tx_error; | 1299 | goto tx_error_put; |
1020 | } | 1300 | } |
1021 | 1301 | ||
1022 | /* copy-on-write the packet before mangling it */ | 1302 | /* copy-on-write the packet before mangling it */ |
@@ -1026,16 +1306,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1026 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1306 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
1027 | goto tx_error_put; | 1307 | goto tx_error_put; |
1028 | 1308 | ||
1029 | /* drop the old route when skb is not shared */ | ||
1030 | skb_dst_drop(skb); | ||
1031 | skb_dst_set(skb, &rt->dst); | ||
1032 | |||
1033 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); | 1309 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); |
1034 | 1310 | ||
1311 | if (!local || !skb->dev) { | ||
1312 | /* drop the old route when skb is not shared */ | ||
1313 | skb_dst_drop(skb); | ||
1314 | skb_dst_set(skb, &rt->dst); | ||
1315 | } else { | ||
1316 | /* destined to loopback, do we need to change route? */ | ||
1317 | dst_release(&rt->dst); | ||
1318 | } | ||
1319 | |||
1035 | /* Another hack: avoid icmp_send in ip_fragment */ | 1320 | /* Another hack: avoid icmp_send in ip_fragment */ |
1036 | skb->local_df = 1; | 1321 | skb->local_df = 1; |
1037 | 1322 | ||
1038 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 1323 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
1039 | 1324 | ||
1040 | rc = NF_STOLEN; | 1325 | rc = NF_STOLEN; |
1041 | goto out; | 1326 | goto out; |