diff options
-rw-r--r-- | include/net/ip_vs.h | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 123 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 18 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 433 |
4 files changed, 458 insertions, 117 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d5c1b96530..2f88d594233 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -409,6 +409,7 @@ struct ip_vs_conn { | |||
409 | /* packet transmitter for different forwarding methods. If it | 409 | /* packet transmitter for different forwarding methods. If it |
410 | mangles the packet, it must return NF_DROP or better NF_STOLEN, | 410 | mangles the packet, it must return NF_DROP or better NF_STOLEN, |
411 | otherwise this must be changed to a sk_buff **. | 411 | otherwise this must be changed to a sk_buff **. |
412 | NF_ACCEPT can be returned when destination is local. | ||
412 | */ | 413 | */ |
413 | int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, | 414 | int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, |
414 | struct ip_vs_protocol *pp); | 415 | struct ip_vs_protocol *pp); |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c4f091d5a62..a6c8aff1b47 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -984,26 +984,34 @@ drop: | |||
984 | } | 984 | } |
985 | 985 | ||
986 | /* | 986 | /* |
987 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | ||
988 | * Check if outgoing packet belongs to the established ip_vs_conn. | 987 | * Check if outgoing packet belongs to the established ip_vs_conn. |
989 | */ | 988 | */ |
990 | static unsigned int | 989 | static unsigned int |
991 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | 990 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) |
992 | const struct net_device *in, const struct net_device *out, | ||
993 | int (*okfn)(struct sk_buff *)) | ||
994 | { | 991 | { |
995 | struct ip_vs_iphdr iph; | 992 | struct ip_vs_iphdr iph; |
996 | struct ip_vs_protocol *pp; | 993 | struct ip_vs_protocol *pp; |
997 | struct ip_vs_conn *cp; | 994 | struct ip_vs_conn *cp; |
998 | int af; | ||
999 | 995 | ||
1000 | EnterFunction(11); | 996 | EnterFunction(11); |
1001 | 997 | ||
1002 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | 998 | /* Already marked as IPVS request or reply? */ |
1003 | |||
1004 | if (skb->ipvs_property) | 999 | if (skb->ipvs_property) |
1005 | return NF_ACCEPT; | 1000 | return NF_ACCEPT; |
1006 | 1001 | ||
1002 | /* Bad... Do not break raw sockets */ | ||
1003 | if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && | ||
1004 | af == AF_INET)) { | ||
1005 | struct sock *sk = skb->sk; | ||
1006 | struct inet_sock *inet = inet_sk(skb->sk); | ||
1007 | |||
1008 | if (inet && sk->sk_family == PF_INET && inet->nodefrag) | ||
1009 | return NF_ACCEPT; | ||
1010 | } | ||
1011 | |||
1012 | if (unlikely(!skb_dst(skb))) | ||
1013 | return NF_ACCEPT; | ||
1014 | |||
1007 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1015 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1008 | #ifdef CONFIG_IP_VS_IPV6 | 1016 | #ifdef CONFIG_IP_VS_IPV6 |
1009 | if (af == AF_INET6) { | 1017 | if (af == AF_INET6) { |
@@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
1106 | return handle_response(af, skb, pp, cp, iph.len); | 1114 | return handle_response(af, skb, pp, cp, iph.len); |
1107 | } | 1115 | } |
1108 | 1116 | ||
1117 | /* | ||
1118 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | ||
1119 | * Check if packet is reply for established ip_vs_conn. | ||
1120 | */ | ||
1121 | static unsigned int | ||
1122 | ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, | ||
1123 | const struct net_device *in, const struct net_device *out, | ||
1124 | int (*okfn)(struct sk_buff *)) | ||
1125 | { | ||
1126 | return ip_vs_out(hooknum, skb, AF_INET); | ||
1127 | } | ||
1128 | |||
1129 | /* | ||
1130 | * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. | ||
1131 | * Check if packet is reply for established ip_vs_conn. | ||
1132 | */ | ||
1133 | static unsigned int | ||
1134 | ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, | ||
1135 | const struct net_device *in, const struct net_device *out, | ||
1136 | int (*okfn)(struct sk_buff *)) | ||
1137 | { | ||
1138 | unsigned int verdict; | ||
1139 | |||
1140 | /* Disable BH in LOCAL_OUT until all places are fixed */ | ||
1141 | local_bh_disable(); | ||
1142 | verdict = ip_vs_out(hooknum, skb, AF_INET); | ||
1143 | local_bh_enable(); | ||
1144 | return verdict; | ||
1145 | } | ||
1146 | |||
1147 | #ifdef CONFIG_IP_VS_IPV6 | ||
1148 | |||
1149 | /* | ||
1150 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | ||
1151 | * Check if packet is reply for established ip_vs_conn. | ||
1152 | */ | ||
1153 | static unsigned int | ||
1154 | ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, | ||
1155 | const struct net_device *in, const struct net_device *out, | ||
1156 | int (*okfn)(struct sk_buff *)) | ||
1157 | { | ||
1158 | return ip_vs_out(hooknum, skb, AF_INET6); | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. | ||
1163 | * Check if packet is reply for established ip_vs_conn. | ||
1164 | */ | ||
1165 | static unsigned int | ||
1166 | ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, | ||
1167 | const struct net_device *in, const struct net_device *out, | ||
1168 | int (*okfn)(struct sk_buff *)) | ||
1169 | { | ||
1170 | unsigned int verdict; | ||
1171 | |||
1172 | /* Disable BH in LOCAL_OUT until all places are fixed */ | ||
1173 | local_bh_disable(); | ||
1174 | verdict = ip_vs_out(hooknum, skb, AF_INET6); | ||
1175 | local_bh_enable(); | ||
1176 | return verdict; | ||
1177 | } | ||
1178 | |||
1179 | #endif | ||
1109 | 1180 | ||
1110 | /* | 1181 | /* |
1111 | * Handle ICMP messages in the outside-to-inside direction (incoming). | 1182 | * Handle ICMP messages in the outside-to-inside direction (incoming). |
@@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
1342 | struct ip_vs_conn *cp; | 1413 | struct ip_vs_conn *cp; |
1343 | int ret, restart, af, pkts; | 1414 | int ret, restart, af, pkts; |
1344 | 1415 | ||
1416 | /* Already marked as IPVS request or reply? */ | ||
1417 | if (skb->ipvs_property) | ||
1418 | return NF_ACCEPT; | ||
1419 | |||
1345 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | 1420 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; |
1346 | 1421 | ||
1347 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1422 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
@@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1525 | .hooknum = NF_INET_LOCAL_IN, | 1600 | .hooknum = NF_INET_LOCAL_IN, |
1526 | .priority = 100, | 1601 | .priority = 100, |
1527 | }, | 1602 | }, |
1528 | /* After packet filtering, change source only for VS/NAT */ | 1603 | /* Before ip_vs_in, change source only for VS/NAT */ |
1529 | { | 1604 | { |
1530 | .hook = ip_vs_out, | 1605 | .hook = ip_vs_local_reply4, |
1531 | .owner = THIS_MODULE, | 1606 | .owner = THIS_MODULE, |
1532 | .pf = PF_INET, | 1607 | .pf = PF_INET, |
1533 | .hooknum = NF_INET_FORWARD, | 1608 | .hooknum = NF_INET_LOCAL_OUT, |
1534 | .priority = 100, | 1609 | .priority = -99, |
1535 | }, | 1610 | }, |
1536 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp | 1611 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp |
1537 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | 1612 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ |
@@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1542 | .hooknum = NF_INET_FORWARD, | 1617 | .hooknum = NF_INET_FORWARD, |
1543 | .priority = 99, | 1618 | .priority = 99, |
1544 | }, | 1619 | }, |
1620 | /* After packet filtering, change source only for VS/NAT */ | ||
1621 | { | ||
1622 | .hook = ip_vs_reply4, | ||
1623 | .owner = THIS_MODULE, | ||
1624 | .pf = PF_INET, | ||
1625 | .hooknum = NF_INET_FORWARD, | ||
1626 | .priority = 100, | ||
1627 | }, | ||
1545 | #ifdef CONFIG_IP_VS_IPV6 | 1628 | #ifdef CONFIG_IP_VS_IPV6 |
1546 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | 1629 | /* After packet filtering, forward packet through VS/DR, VS/TUN, |
1547 | * or VS/NAT(change destination), so that filtering rules can be | 1630 | * or VS/NAT(change destination), so that filtering rules can be |
@@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1553 | .hooknum = NF_INET_LOCAL_IN, | 1636 | .hooknum = NF_INET_LOCAL_IN, |
1554 | .priority = 100, | 1637 | .priority = 100, |
1555 | }, | 1638 | }, |
1556 | /* After packet filtering, change source only for VS/NAT */ | 1639 | /* Before ip_vs_in, change source only for VS/NAT */ |
1557 | { | 1640 | { |
1558 | .hook = ip_vs_out, | 1641 | .hook = ip_vs_local_reply6, |
1559 | .owner = THIS_MODULE, | 1642 | .owner = THIS_MODULE, |
1560 | .pf = PF_INET6, | 1643 | .pf = PF_INET, |
1561 | .hooknum = NF_INET_FORWARD, | 1644 | .hooknum = NF_INET_LOCAL_OUT, |
1562 | .priority = 100, | 1645 | .priority = -99, |
1563 | }, | 1646 | }, |
1564 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp | 1647 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp |
1565 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | 1648 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ |
@@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1570 | .hooknum = NF_INET_FORWARD, | 1653 | .hooknum = NF_INET_FORWARD, |
1571 | .priority = 99, | 1654 | .priority = 99, |
1572 | }, | 1655 | }, |
1656 | /* After packet filtering, change source only for VS/NAT */ | ||
1657 | { | ||
1658 | .hook = ip_vs_reply6, | ||
1659 | .owner = THIS_MODULE, | ||
1660 | .pf = PF_INET6, | ||
1661 | .hooknum = NF_INET_FORWARD, | ||
1662 | .priority = 100, | ||
1663 | }, | ||
1573 | #endif | 1664 | #endif |
1574 | }; | 1665 | }; |
1575 | 1666 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0b884d3e192..5f5daa30b0a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
777 | conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; | 777 | conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; |
778 | conn_flags |= IP_VS_CONN_F_INACTIVE; | 778 | conn_flags |= IP_VS_CONN_F_INACTIVE; |
779 | 779 | ||
780 | /* check if local node and update the flags */ | ||
781 | #ifdef CONFIG_IP_VS_IPV6 | ||
782 | if (svc->af == AF_INET6) { | ||
783 | if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { | ||
784 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | ||
785 | | IP_VS_CONN_F_LOCALNODE; | ||
786 | } | ||
787 | } else | ||
788 | #endif | ||
789 | if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { | ||
790 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | ||
791 | | IP_VS_CONN_F_LOCALNODE; | ||
792 | } | ||
793 | |||
794 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ | 780 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ |
795 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { | 781 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { |
796 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; | 782 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; |
@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
824 | dest->u_threshold = udest->u_threshold; | 810 | dest->u_threshold = udest->u_threshold; |
825 | dest->l_threshold = udest->l_threshold; | 811 | dest->l_threshold = udest->l_threshold; |
826 | 812 | ||
813 | spin_lock(&dest->dst_lock); | ||
814 | ip_vs_dst_reset(dest); | ||
815 | spin_unlock(&dest->dst_lock); | ||
816 | |||
827 | if (add) | 817 | if (add) |
828 | ip_vs_new_estimator(&dest->stats); | 818 | ip_vs_new_estimator(&dest->stats); |
829 | 819 | ||
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 63cc0feaaef..8608882f89e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -67,12 +67,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) | |||
67 | return dst; | 67 | return dst; |
68 | } | 68 | } |
69 | 69 | ||
70 | /* | ||
71 | * Get route to destination or remote server | ||
72 | * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, | ||
73 | * &4=Allow redirect from remote daddr to local | ||
74 | */ | ||
70 | static struct rtable * | 75 | static struct rtable * |
71 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) | 76 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, |
77 | __be32 daddr, u32 rtos, int rt_mode) | ||
72 | { | 78 | { |
73 | struct net *net = dev_net(skb->dev); | 79 | struct net *net = dev_net(skb_dst(skb)->dev); |
74 | struct rtable *rt; /* Route to the other host */ | 80 | struct rtable *rt; /* Route to the other host */ |
75 | struct ip_vs_dest *dest = cp->dest; | 81 | struct rtable *ort; /* Original route */ |
82 | int local; | ||
76 | 83 | ||
77 | if (dest) { | 84 | if (dest) { |
78 | spin_lock(&dest->dst_lock); | 85 | spin_lock(&dest->dst_lock); |
@@ -104,23 +111,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) | |||
104 | .oif = 0, | 111 | .oif = 0, |
105 | .nl_u = { | 112 | .nl_u = { |
106 | .ip4_u = { | 113 | .ip4_u = { |
107 | .daddr = cp->daddr.ip, | 114 | .daddr = daddr, |
108 | .saddr = 0, | 115 | .saddr = 0, |
109 | .tos = rtos, } }, | 116 | .tos = rtos, } }, |
110 | }; | 117 | }; |
111 | 118 | ||
112 | if (ip_route_output_key(net, &rt, &fl)) { | 119 | if (ip_route_output_key(net, &rt, &fl)) { |
113 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", | 120 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", |
114 | &cp->daddr.ip); | 121 | &daddr); |
115 | return NULL; | 122 | return NULL; |
116 | } | 123 | } |
117 | } | 124 | } |
118 | 125 | ||
126 | local = rt->rt_flags & RTCF_LOCAL; | ||
127 | if (!((local ? 1 : 2) & rt_mode)) { | ||
128 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", | ||
129 | (rt->rt_flags & RTCF_LOCAL) ? | ||
130 | "local":"non-local", &rt->rt_dst); | ||
131 | ip_rt_put(rt); | ||
132 | return NULL; | ||
133 | } | ||
134 | if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && | ||
135 | ort->rt_flags & RTCF_LOCAL)) { | ||
136 | IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " | ||
137 | "requires NAT method, dest: %pI4\n", | ||
138 | &ip_hdr(skb)->daddr, &rt->rt_dst); | ||
139 | ip_rt_put(rt); | ||
140 | return NULL; | ||
141 | } | ||
142 | if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { | ||
143 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " | ||
144 | "to non-local address, dest: %pI4\n", | ||
145 | &ip_hdr(skb)->saddr, &rt->rt_dst); | ||
146 | ip_rt_put(rt); | ||
147 | return NULL; | ||
148 | } | ||
149 | |||
119 | return rt; | 150 | return rt; |
120 | } | 151 | } |
121 | 152 | ||
153 | /* Reroute packet to local IPv4 stack after DNAT */ | ||
154 | static int | ||
155 | __ip_vs_reroute_locally(struct sk_buff *skb) | ||
156 | { | ||
157 | struct rtable *rt = skb_rtable(skb); | ||
158 | struct net_device *dev = rt->dst.dev; | ||
159 | struct net *net = dev_net(dev); | ||
160 | struct iphdr *iph = ip_hdr(skb); | ||
161 | |||
162 | if (rt->fl.iif) { | ||
163 | unsigned long orefdst = skb->_skb_refdst; | ||
164 | |||
165 | if (ip_route_input(skb, iph->daddr, iph->saddr, | ||
166 | iph->tos, skb->dev)) | ||
167 | return 0; | ||
168 | refdst_drop(orefdst); | ||
169 | } else { | ||
170 | struct flowi fl = { | ||
171 | .oif = 0, | ||
172 | .nl_u = { | ||
173 | .ip4_u = { | ||
174 | .daddr = iph->daddr, | ||
175 | .saddr = iph->saddr, | ||
176 | .tos = RT_TOS(iph->tos), | ||
177 | } | ||
178 | }, | ||
179 | .mark = skb->mark, | ||
180 | }; | ||
181 | struct rtable *rt; | ||
182 | |||
183 | if (ip_route_output_key(net, &rt, &fl)) | ||
184 | return 0; | ||
185 | if (!(rt->rt_flags & RTCF_LOCAL)) { | ||
186 | ip_rt_put(rt); | ||
187 | return 0; | ||
188 | } | ||
189 | /* Drop old route. */ | ||
190 | skb_dst_drop(skb); | ||
191 | skb_dst_set(skb, &rt->dst); | ||
192 | } | ||
193 | return 1; | ||
194 | } | ||
195 | |||
122 | #ifdef CONFIG_IP_VS_IPV6 | 196 | #ifdef CONFIG_IP_VS_IPV6 |
123 | 197 | ||
198 | static inline int __ip_vs_is_local_route6(struct rt6_info *rt) | ||
199 | { | ||
200 | return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; | ||
201 | } | ||
202 | |||
124 | static struct dst_entry * | 203 | static struct dst_entry * |
125 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, | 204 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, |
126 | struct in6_addr *ret_saddr, int do_xfrm) | 205 | struct in6_addr *ret_saddr, int do_xfrm) |
@@ -155,14 +234,21 @@ out_err: | |||
155 | return NULL; | 234 | return NULL; |
156 | } | 235 | } |
157 | 236 | ||
237 | /* | ||
238 | * Get route to destination or remote server | ||
239 | * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, | ||
240 | * &4=Allow redirect from remote daddr to local | ||
241 | */ | ||
158 | static struct rt6_info * | 242 | static struct rt6_info * |
159 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 243 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, |
160 | struct in6_addr *ret_saddr, int do_xfrm) | 244 | struct in6_addr *daddr, struct in6_addr *ret_saddr, |
245 | int do_xfrm, int rt_mode) | ||
161 | { | 246 | { |
162 | struct net *net = dev_net(skb->dev); | 247 | struct net *net = dev_net(skb_dst(skb)->dev); |
163 | struct rt6_info *rt; /* Route to the other host */ | 248 | struct rt6_info *rt; /* Route to the other host */ |
164 | struct ip_vs_dest *dest = cp->dest; | 249 | struct rt6_info *ort; /* Original route */ |
165 | struct dst_entry *dst; | 250 | struct dst_entry *dst; |
251 | int local; | ||
166 | 252 | ||
167 | if (dest) { | 253 | if (dest) { |
168 | spin_lock(&dest->dst_lock); | 254 | spin_lock(&dest->dst_lock); |
@@ -188,13 +274,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
188 | ipv6_addr_copy(ret_saddr, &dest->dst_saddr); | 274 | ipv6_addr_copy(ret_saddr, &dest->dst_saddr); |
189 | spin_unlock(&dest->dst_lock); | 275 | spin_unlock(&dest->dst_lock); |
190 | } else { | 276 | } else { |
191 | dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, | 277 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); |
192 | do_xfrm); | ||
193 | if (!dst) | 278 | if (!dst) |
194 | return NULL; | 279 | return NULL; |
195 | rt = (struct rt6_info *) dst; | 280 | rt = (struct rt6_info *) dst; |
196 | } | 281 | } |
197 | 282 | ||
283 | local = __ip_vs_is_local_route6(rt); | ||
284 | if (!((local ? 1 : 2) & rt_mode)) { | ||
285 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", | ||
286 | local ? "local":"non-local", daddr); | ||
287 | dst_release(&rt->dst); | ||
288 | return NULL; | ||
289 | } | ||
290 | if (local && !(rt_mode & 4) && | ||
291 | !((ort = (struct rt6_info *) skb_dst(skb)) && | ||
292 | __ip_vs_is_local_route6(ort))) { | ||
293 | IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " | ||
294 | "requires NAT method, dest: %pI6\n", | ||
295 | &ipv6_hdr(skb)->daddr, daddr); | ||
296 | dst_release(&rt->dst); | ||
297 | return NULL; | ||
298 | } | ||
299 | if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | ||
300 | ipv6_addr_type(&ipv6_hdr(skb)->saddr) & | ||
301 | IPV6_ADDR_LOOPBACK)) { | ||
302 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " | ||
303 | "to non-local address, dest: %pI6\n", | ||
304 | &ipv6_hdr(skb)->saddr, daddr); | ||
305 | dst_release(&rt->dst); | ||
306 | return NULL; | ||
307 | } | ||
308 | |||
198 | return rt; | 309 | return rt; |
199 | } | 310 | } |
200 | #endif | 311 | #endif |
@@ -227,23 +338,27 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) | |||
227 | __ret; \ | 338 | __ret; \ |
228 | }) | 339 | }) |
229 | 340 | ||
230 | #define IP_VS_XMIT_NAT(pf, skb, cp) \ | 341 | #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ |
231 | do { \ | 342 | do { \ |
232 | (skb)->ipvs_property = 1; \ | 343 | (skb)->ipvs_property = 1; \ |
233 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 344 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
234 | ip_vs_notrack(skb); \ | 345 | ip_vs_notrack(skb); \ |
235 | else \ | 346 | else \ |
236 | ip_vs_update_conntrack(skb, cp, 1); \ | 347 | ip_vs_update_conntrack(skb, cp, 1); \ |
348 | if (local) \ | ||
349 | return NF_ACCEPT; \ | ||
237 | skb_forward_csum(skb); \ | 350 | skb_forward_csum(skb); \ |
238 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 351 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
239 | skb_dst(skb)->dev, dst_output); \ | 352 | skb_dst(skb)->dev, dst_output); \ |
240 | } while (0) | 353 | } while (0) |
241 | 354 | ||
242 | #define IP_VS_XMIT(pf, skb, cp) \ | 355 | #define IP_VS_XMIT(pf, skb, cp, local) \ |
243 | do { \ | 356 | do { \ |
244 | (skb)->ipvs_property = 1; \ | 357 | (skb)->ipvs_property = 1; \ |
245 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 358 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
246 | ip_vs_notrack(skb); \ | 359 | ip_vs_notrack(skb); \ |
360 | if (local) \ | ||
361 | return NF_ACCEPT; \ | ||
247 | skb_forward_csum(skb); \ | 362 | skb_forward_csum(skb); \ |
248 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 363 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
249 | skb_dst(skb)->dev, dst_output); \ | 364 | skb_dst(skb)->dev, dst_output); \ |
@@ -258,7 +373,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
258 | struct ip_vs_protocol *pp) | 373 | struct ip_vs_protocol *pp) |
259 | { | 374 | { |
260 | /* we do not touch skb and do not need pskb ptr */ | 375 | /* we do not touch skb and do not need pskb ptr */ |
261 | return NF_ACCEPT; | 376 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); |
262 | } | 377 | } |
263 | 378 | ||
264 | 379 | ||
@@ -271,27 +386,15 @@ int | |||
271 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 386 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
272 | struct ip_vs_protocol *pp) | 387 | struct ip_vs_protocol *pp) |
273 | { | 388 | { |
274 | struct net *net = dev_net(skb->dev); | ||
275 | struct rtable *rt; /* Route to the other host */ | 389 | struct rtable *rt; /* Route to the other host */ |
276 | struct iphdr *iph = ip_hdr(skb); | 390 | struct iphdr *iph = ip_hdr(skb); |
277 | u8 tos = iph->tos; | ||
278 | int mtu; | 391 | int mtu; |
279 | struct flowi fl = { | ||
280 | .oif = 0, | ||
281 | .nl_u = { | ||
282 | .ip4_u = { | ||
283 | .daddr = iph->daddr, | ||
284 | .saddr = 0, | ||
285 | .tos = RT_TOS(tos), } }, | ||
286 | }; | ||
287 | 392 | ||
288 | EnterFunction(10); | 393 | EnterFunction(10); |
289 | 394 | ||
290 | if (ip_route_output_key(net, &rt, &fl)) { | 395 | if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, |
291 | IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", | 396 | RT_TOS(iph->tos), 2))) |
292 | __func__, &iph->daddr); | ||
293 | goto tx_error_icmp; | 397 | goto tx_error_icmp; |
294 | } | ||
295 | 398 | ||
296 | /* MTU checking */ | 399 | /* MTU checking */ |
297 | mtu = dst_mtu(&rt->dst); | 400 | mtu = dst_mtu(&rt->dst); |
@@ -319,7 +422,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
319 | /* Another hack: avoid icmp_send in ip_fragment */ | 422 | /* Another hack: avoid icmp_send in ip_fragment */ |
320 | skb->local_df = 1; | 423 | skb->local_df = 1; |
321 | 424 | ||
322 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 425 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
323 | 426 | ||
324 | LeaveFunction(10); | 427 | LeaveFunction(10); |
325 | return NF_STOLEN; | 428 | return NF_STOLEN; |
@@ -337,18 +440,14 @@ int | |||
337 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 440 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
338 | struct ip_vs_protocol *pp) | 441 | struct ip_vs_protocol *pp) |
339 | { | 442 | { |
340 | struct net *net = dev_net(skb->dev); | ||
341 | struct dst_entry *dst; | ||
342 | struct rt6_info *rt; /* Route to the other host */ | 443 | struct rt6_info *rt; /* Route to the other host */ |
343 | struct ipv6hdr *iph = ipv6_hdr(skb); | 444 | struct ipv6hdr *iph = ipv6_hdr(skb); |
344 | int mtu; | 445 | int mtu; |
345 | 446 | ||
346 | EnterFunction(10); | 447 | EnterFunction(10); |
347 | 448 | ||
348 | dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); | 449 | if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2))) |
349 | if (!dst) | ||
350 | goto tx_error_icmp; | 450 | goto tx_error_icmp; |
351 | rt = (struct rt6_info *) dst; | ||
352 | 451 | ||
353 | /* MTU checking */ | 452 | /* MTU checking */ |
354 | mtu = dst_mtu(&rt->dst); | 453 | mtu = dst_mtu(&rt->dst); |
@@ -376,7 +475,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
376 | /* Another hack: avoid icmp_send in ip_fragment */ | 475 | /* Another hack: avoid icmp_send in ip_fragment */ |
377 | skb->local_df = 1; | 476 | skb->local_df = 1; |
378 | 477 | ||
379 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 478 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
380 | 479 | ||
381 | LeaveFunction(10); | 480 | LeaveFunction(10); |
382 | return NF_STOLEN; | 481 | return NF_STOLEN; |
@@ -401,6 +500,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
401 | struct rtable *rt; /* Route to the other host */ | 500 | struct rtable *rt; /* Route to the other host */ |
402 | int mtu; | 501 | int mtu; |
403 | struct iphdr *iph = ip_hdr(skb); | 502 | struct iphdr *iph = ip_hdr(skb); |
503 | int local; | ||
404 | 504 | ||
405 | EnterFunction(10); | 505 | EnterFunction(10); |
406 | 506 | ||
@@ -414,16 +514,40 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
414 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 514 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
415 | } | 515 | } |
416 | 516 | ||
417 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) | 517 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
518 | RT_TOS(iph->tos), 1|2|4))) | ||
418 | goto tx_error_icmp; | 519 | goto tx_error_icmp; |
520 | local = rt->rt_flags & RTCF_LOCAL; | ||
521 | /* | ||
522 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
523 | * to local address when connection is sync-ed | ||
524 | */ | ||
525 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
526 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
527 | enum ip_conntrack_info ctinfo; | ||
528 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
529 | |||
530 | if (ct && !nf_ct_is_untracked(ct)) { | ||
531 | IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): " | ||
532 | "stopping DNAT to local address"); | ||
533 | goto tx_error_put; | ||
534 | } | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | /* From world but DNAT to loopback address? */ | ||
539 | if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { | ||
540 | IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): " | ||
541 | "stopping DNAT to loopback address"); | ||
542 | goto tx_error_put; | ||
543 | } | ||
419 | 544 | ||
420 | /* MTU checking */ | 545 | /* MTU checking */ |
421 | mtu = dst_mtu(&rt->dst); | 546 | mtu = dst_mtu(&rt->dst); |
422 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 547 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { |
423 | ip_rt_put(rt); | ||
424 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 548 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
425 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); | 549 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); |
426 | goto tx_error; | 550 | goto tx_error_put; |
427 | } | 551 | } |
428 | 552 | ||
429 | /* copy-on-write the packet before mangling it */ | 553 | /* copy-on-write the packet before mangling it */ |
@@ -433,16 +557,27 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
433 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 557 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
434 | goto tx_error_put; | 558 | goto tx_error_put; |
435 | 559 | ||
436 | /* drop old route */ | ||
437 | skb_dst_drop(skb); | ||
438 | skb_dst_set(skb, &rt->dst); | ||
439 | |||
440 | /* mangle the packet */ | 560 | /* mangle the packet */ |
441 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | 561 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) |
442 | goto tx_error; | 562 | goto tx_error_put; |
443 | ip_hdr(skb)->daddr = cp->daddr.ip; | 563 | ip_hdr(skb)->daddr = cp->daddr.ip; |
444 | ip_send_check(ip_hdr(skb)); | 564 | ip_send_check(ip_hdr(skb)); |
445 | 565 | ||
566 | if (!local) { | ||
567 | /* drop old route */ | ||
568 | skb_dst_drop(skb); | ||
569 | skb_dst_set(skb, &rt->dst); | ||
570 | } else { | ||
571 | ip_rt_put(rt); | ||
572 | /* | ||
573 | * Some IPv4 replies get local address from routes, | ||
574 | * not from iph, so while we DNAT after routing | ||
575 | * we need this second input/output route. | ||
576 | */ | ||
577 | if (!__ip_vs_reroute_locally(skb)) | ||
578 | goto tx_error; | ||
579 | } | ||
580 | |||
446 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 581 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
447 | 582 | ||
448 | /* FIXME: when application helper enlarges the packet and the length | 583 | /* FIXME: when application helper enlarges the packet and the length |
@@ -452,7 +587,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
452 | /* Another hack: avoid icmp_send in ip_fragment */ | 587 | /* Another hack: avoid icmp_send in ip_fragment */ |
453 | skb->local_df = 1; | 588 | skb->local_df = 1; |
454 | 589 | ||
455 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); | 590 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
456 | 591 | ||
457 | LeaveFunction(10); | 592 | LeaveFunction(10); |
458 | return NF_STOLEN; | 593 | return NF_STOLEN; |
@@ -475,6 +610,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
475 | { | 610 | { |
476 | struct rt6_info *rt; /* Route to the other host */ | 611 | struct rt6_info *rt; /* Route to the other host */ |
477 | int mtu; | 612 | int mtu; |
613 | int local; | ||
478 | 614 | ||
479 | EnterFunction(10); | 615 | EnterFunction(10); |
480 | 616 | ||
@@ -489,18 +625,44 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
489 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 625 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
490 | } | 626 | } |
491 | 627 | ||
492 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 628 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
493 | if (!rt) | 629 | 0, 1|2|4))) |
494 | goto tx_error_icmp; | 630 | goto tx_error_icmp; |
631 | local = __ip_vs_is_local_route6(rt); | ||
632 | /* | ||
633 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
634 | * to local address when connection is sync-ed | ||
635 | */ | ||
636 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
637 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
638 | enum ip_conntrack_info ctinfo; | ||
639 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
640 | |||
641 | if (ct && !nf_ct_is_untracked(ct)) { | ||
642 | IP_VS_DBG_RL_PKT(10, pp, skb, 0, | ||
643 | "ip_vs_nat_xmit_v6(): " | ||
644 | "stopping DNAT to local address"); | ||
645 | goto tx_error_put; | ||
646 | } | ||
647 | } | ||
648 | #endif | ||
649 | |||
650 | /* From world but DNAT to loopback address? */ | ||
651 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | ||
652 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | ||
653 | IP_VS_DBG_RL_PKT(1, pp, skb, 0, | ||
654 | "ip_vs_nat_xmit_v6(): " | ||
655 | "stopping DNAT to loopback address"); | ||
656 | goto tx_error_put; | ||
657 | } | ||
495 | 658 | ||
496 | /* MTU checking */ | 659 | /* MTU checking */ |
497 | mtu = dst_mtu(&rt->dst); | 660 | mtu = dst_mtu(&rt->dst); |
498 | if (skb->len > mtu) { | 661 | if (skb->len > mtu) { |
499 | dst_release(&rt->dst); | ||
500 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 662 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
501 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 663 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, |
502 | "ip_vs_nat_xmit_v6(): frag needed for"); | 664 | "ip_vs_nat_xmit_v6(): frag needed for"); |
503 | goto tx_error; | 665 | goto tx_error_put; |
504 | } | 666 | } |
505 | 667 | ||
506 | /* copy-on-write the packet before mangling it */ | 668 | /* copy-on-write the packet before mangling it */ |
@@ -510,14 +672,19 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
510 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 672 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
511 | goto tx_error_put; | 673 | goto tx_error_put; |
512 | 674 | ||
513 | /* drop old route */ | ||
514 | skb_dst_drop(skb); | ||
515 | skb_dst_set(skb, &rt->dst); | ||
516 | |||
517 | /* mangle the packet */ | 675 | /* mangle the packet */ |
518 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | 676 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) |
519 | goto tx_error; | 677 | goto tx_error; |
520 | ipv6_hdr(skb)->daddr = cp->daddr.in6; | 678 | ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); |
679 | |||
680 | if (!local || !skb->dev) { | ||
681 | /* drop the old route when skb is not shared */ | ||
682 | skb_dst_drop(skb); | ||
683 | skb_dst_set(skb, &rt->dst); | ||
684 | } else { | ||
685 | /* destined to loopback, do we need to change route? */ | ||
686 | dst_release(&rt->dst); | ||
687 | } | ||
521 | 688 | ||
522 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 689 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
523 | 690 | ||
@@ -528,7 +695,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
528 | /* Another hack: avoid icmp_send in ip_fragment */ | 695 | /* Another hack: avoid icmp_send in ip_fragment */ |
529 | skb->local_df = 1; | 696 | skb->local_df = 1; |
530 | 697 | ||
531 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); | 698 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
532 | 699 | ||
533 | LeaveFunction(10); | 700 | LeaveFunction(10); |
534 | return NF_STOLEN; | 701 | return NF_STOLEN; |
@@ -588,16 +755,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
588 | goto tx_error; | 755 | goto tx_error; |
589 | } | 756 | } |
590 | 757 | ||
591 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) | 758 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
759 | RT_TOS(tos), 1|2))) | ||
592 | goto tx_error_icmp; | 760 | goto tx_error_icmp; |
761 | if (rt->rt_flags & RTCF_LOCAL) { | ||
762 | ip_rt_put(rt); | ||
763 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | ||
764 | } | ||
593 | 765 | ||
594 | tdev = rt->dst.dev; | 766 | tdev = rt->dst.dev; |
595 | 767 | ||
596 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); | 768 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
597 | if (mtu < 68) { | 769 | if (mtu < 68) { |
598 | ip_rt_put(rt); | ||
599 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); | 770 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); |
600 | goto tx_error; | 771 | goto tx_error_put; |
601 | } | 772 | } |
602 | if (skb_dst(skb)) | 773 | if (skb_dst(skb)) |
603 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 774 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
@@ -607,9 +778,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
607 | if ((old_iph->frag_off & htons(IP_DF)) | 778 | if ((old_iph->frag_off & htons(IP_DF)) |
608 | && mtu < ntohs(old_iph->tot_len)) { | 779 | && mtu < ntohs(old_iph->tot_len)) { |
609 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 780 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
610 | ip_rt_put(rt); | ||
611 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 781 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
612 | goto tx_error; | 782 | goto tx_error_put; |
613 | } | 783 | } |
614 | 784 | ||
615 | /* | 785 | /* |
@@ -678,6 +848,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
678 | kfree_skb(skb); | 848 | kfree_skb(skb); |
679 | LeaveFunction(10); | 849 | LeaveFunction(10); |
680 | return NF_STOLEN; | 850 | return NF_STOLEN; |
851 | tx_error_put: | ||
852 | ip_rt_put(rt); | ||
853 | goto tx_error; | ||
681 | } | 854 | } |
682 | 855 | ||
683 | #ifdef CONFIG_IP_VS_IPV6 | 856 | #ifdef CONFIG_IP_VS_IPV6 |
@@ -703,27 +876,29 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
703 | goto tx_error; | 876 | goto tx_error; |
704 | } | 877 | } |
705 | 878 | ||
706 | rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); | 879 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, |
707 | if (!rt) | 880 | &saddr, 1, 1|2))) |
708 | goto tx_error_icmp; | 881 | goto tx_error_icmp; |
882 | if (__ip_vs_is_local_route6(rt)) { | ||
883 | dst_release(&rt->dst); | ||
884 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | ||
885 | } | ||
709 | 886 | ||
710 | tdev = rt->dst.dev; | 887 | tdev = rt->dst.dev; |
711 | 888 | ||
712 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); | 889 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); |
713 | if (mtu < IPV6_MIN_MTU) { | 890 | if (mtu < IPV6_MIN_MTU) { |
714 | dst_release(&rt->dst); | ||
715 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, | 891 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, |
716 | IPV6_MIN_MTU); | 892 | IPV6_MIN_MTU); |
717 | goto tx_error; | 893 | goto tx_error_put; |
718 | } | 894 | } |
719 | if (skb_dst(skb)) | 895 | if (skb_dst(skb)) |
720 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 896 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
721 | 897 | ||
722 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { | 898 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { |
723 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 899 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
724 | dst_release(&rt->dst); | ||
725 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 900 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
726 | goto tx_error; | 901 | goto tx_error_put; |
727 | } | 902 | } |
728 | 903 | ||
729 | /* | 904 | /* |
@@ -789,6 +964,9 @@ tx_error: | |||
789 | kfree_skb(skb); | 964 | kfree_skb(skb); |
790 | LeaveFunction(10); | 965 | LeaveFunction(10); |
791 | return NF_STOLEN; | 966 | return NF_STOLEN; |
967 | tx_error_put: | ||
968 | dst_release(&rt->dst); | ||
969 | goto tx_error; | ||
792 | } | 970 | } |
793 | #endif | 971 | #endif |
794 | 972 | ||
@@ -807,8 +985,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
807 | 985 | ||
808 | EnterFunction(10); | 986 | EnterFunction(10); |
809 | 987 | ||
810 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) | 988 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
989 | RT_TOS(iph->tos), 1|2))) | ||
811 | goto tx_error_icmp; | 990 | goto tx_error_icmp; |
991 | if (rt->rt_flags & RTCF_LOCAL) { | ||
992 | ip_rt_put(rt); | ||
993 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | ||
994 | } | ||
812 | 995 | ||
813 | /* MTU checking */ | 996 | /* MTU checking */ |
814 | mtu = dst_mtu(&rt->dst); | 997 | mtu = dst_mtu(&rt->dst); |
@@ -836,7 +1019,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
836 | /* Another hack: avoid icmp_send in ip_fragment */ | 1019 | /* Another hack: avoid icmp_send in ip_fragment */ |
837 | skb->local_df = 1; | 1020 | skb->local_df = 1; |
838 | 1021 | ||
839 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 1022 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
840 | 1023 | ||
841 | LeaveFunction(10); | 1024 | LeaveFunction(10); |
842 | return NF_STOLEN; | 1025 | return NF_STOLEN; |
@@ -859,9 +1042,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
859 | 1042 | ||
860 | EnterFunction(10); | 1043 | EnterFunction(10); |
861 | 1044 | ||
862 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 1045 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
863 | if (!rt) | 1046 | 0, 1|2))) |
864 | goto tx_error_icmp; | 1047 | goto tx_error_icmp; |
1048 | if (__ip_vs_is_local_route6(rt)) { | ||
1049 | dst_release(&rt->dst); | ||
1050 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | ||
1051 | } | ||
865 | 1052 | ||
866 | /* MTU checking */ | 1053 | /* MTU checking */ |
867 | mtu = dst_mtu(&rt->dst); | 1054 | mtu = dst_mtu(&rt->dst); |
@@ -889,7 +1076,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
889 | /* Another hack: avoid icmp_send in ip_fragment */ | 1076 | /* Another hack: avoid icmp_send in ip_fragment */ |
890 | skb->local_df = 1; | 1077 | skb->local_df = 1; |
891 | 1078 | ||
892 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 1079 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
893 | 1080 | ||
894 | LeaveFunction(10); | 1081 | LeaveFunction(10); |
895 | return NF_STOLEN; | 1082 | return NF_STOLEN; |
@@ -915,6 +1102,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
915 | struct rtable *rt; /* Route to the other host */ | 1102 | struct rtable *rt; /* Route to the other host */ |
916 | int mtu; | 1103 | int mtu; |
917 | int rc; | 1104 | int rc; |
1105 | int local; | ||
918 | 1106 | ||
919 | EnterFunction(10); | 1107 | EnterFunction(10); |
920 | 1108 | ||
@@ -935,16 +1123,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
935 | * mangle and send the packet here (only for VS/NAT) | 1123 | * mangle and send the packet here (only for VS/NAT) |
936 | */ | 1124 | */ |
937 | 1125 | ||
938 | if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) | 1126 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
1127 | RT_TOS(ip_hdr(skb)->tos), 1|2|4))) | ||
939 | goto tx_error_icmp; | 1128 | goto tx_error_icmp; |
1129 | local = rt->rt_flags & RTCF_LOCAL; | ||
1130 | |||
1131 | /* | ||
1132 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
1133 | * to local address when connection is sync-ed | ||
1134 | */ | ||
1135 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
1136 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
1137 | enum ip_conntrack_info ctinfo; | ||
1138 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
1139 | |||
1140 | if (ct && !nf_ct_is_untracked(ct)) { | ||
1141 | IP_VS_DBG(10, "%s(): " | ||
1142 | "stopping DNAT to local address %pI4\n", | ||
1143 | __func__, &cp->daddr.ip); | ||
1144 | goto tx_error_put; | ||
1145 | } | ||
1146 | } | ||
1147 | #endif | ||
1148 | |||
1149 | /* From world but DNAT to loopback address? */ | ||
1150 | if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { | ||
1151 | IP_VS_DBG(1, "%s(): " | ||
1152 | "stopping DNAT to loopback %pI4\n", | ||
1153 | __func__, &cp->daddr.ip); | ||
1154 | goto tx_error_put; | ||
1155 | } | ||
940 | 1156 | ||
941 | /* MTU checking */ | 1157 | /* MTU checking */ |
942 | mtu = dst_mtu(&rt->dst); | 1158 | mtu = dst_mtu(&rt->dst); |
943 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { | 1159 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { |
944 | ip_rt_put(rt); | ||
945 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 1160 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
946 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1161 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
947 | goto tx_error; | 1162 | goto tx_error_put; |
948 | } | 1163 | } |
949 | 1164 | ||
950 | /* copy-on-write the packet before mangling it */ | 1165 | /* copy-on-write the packet before mangling it */ |
@@ -954,16 +1169,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
954 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1169 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
955 | goto tx_error_put; | 1170 | goto tx_error_put; |
956 | 1171 | ||
957 | /* drop the old route when skb is not shared */ | ||
958 | skb_dst_drop(skb); | ||
959 | skb_dst_set(skb, &rt->dst); | ||
960 | |||
961 | ip_vs_nat_icmp(skb, pp, cp, 0); | 1172 | ip_vs_nat_icmp(skb, pp, cp, 0); |
962 | 1173 | ||
1174 | if (!local) { | ||
1175 | /* drop the old route when skb is not shared */ | ||
1176 | skb_dst_drop(skb); | ||
1177 | skb_dst_set(skb, &rt->dst); | ||
1178 | } else { | ||
1179 | ip_rt_put(rt); | ||
1180 | /* | ||
1181 | * Some IPv4 replies get local address from routes, | ||
1182 | * not from iph, so while we DNAT after routing | ||
1183 | * we need this second input/output route. | ||
1184 | */ | ||
1185 | if (!__ip_vs_reroute_locally(skb)) | ||
1186 | goto tx_error; | ||
1187 | } | ||
1188 | |||
963 | /* Another hack: avoid icmp_send in ip_fragment */ | 1189 | /* Another hack: avoid icmp_send in ip_fragment */ |
964 | skb->local_df = 1; | 1190 | skb->local_df = 1; |
965 | 1191 | ||
966 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp); | 1192 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
967 | 1193 | ||
968 | rc = NF_STOLEN; | 1194 | rc = NF_STOLEN; |
969 | goto out; | 1195 | goto out; |
@@ -989,6 +1215,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
989 | struct rt6_info *rt; /* Route to the other host */ | 1215 | struct rt6_info *rt; /* Route to the other host */ |
990 | int mtu; | 1216 | int mtu; |
991 | int rc; | 1217 | int rc; |
1218 | int local; | ||
992 | 1219 | ||
993 | EnterFunction(10); | 1220 | EnterFunction(10); |
994 | 1221 | ||
@@ -1009,17 +1236,44 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1009 | * mangle and send the packet here (only for VS/NAT) | 1236 | * mangle and send the packet here (only for VS/NAT) |
1010 | */ | 1237 | */ |
1011 | 1238 | ||
1012 | rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); | 1239 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
1013 | if (!rt) | 1240 | 0, 1|2|4))) |
1014 | goto tx_error_icmp; | 1241 | goto tx_error_icmp; |
1015 | 1242 | ||
1243 | local = __ip_vs_is_local_route6(rt); | ||
1244 | /* | ||
1245 | * Avoid duplicate tuple in reply direction for NAT traffic | ||
1246 | * to local address when connection is sync-ed | ||
1247 | */ | ||
1248 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
1249 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | ||
1250 | enum ip_conntrack_info ctinfo; | ||
1251 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | ||
1252 | |||
1253 | if (ct && !nf_ct_is_untracked(ct)) { | ||
1254 | IP_VS_DBG(10, "%s(): " | ||
1255 | "stopping DNAT to local address %pI6\n", | ||
1256 | __func__, &cp->daddr.in6); | ||
1257 | goto tx_error_put; | ||
1258 | } | ||
1259 | } | ||
1260 | #endif | ||
1261 | |||
1262 | /* From world but DNAT to loopback address? */ | ||
1263 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | ||
1264 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | ||
1265 | IP_VS_DBG(1, "%s(): " | ||
1266 | "stopping DNAT to loopback %pI6\n", | ||
1267 | __func__, &cp->daddr.in6); | ||
1268 | goto tx_error_put; | ||
1269 | } | ||
1270 | |||
1016 | /* MTU checking */ | 1271 | /* MTU checking */ |
1017 | mtu = dst_mtu(&rt->dst); | 1272 | mtu = dst_mtu(&rt->dst); |
1018 | if (skb->len > mtu) { | 1273 | if (skb->len > mtu) { |
1019 | dst_release(&rt->dst); | ||
1020 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1274 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
1021 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1275 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1022 | goto tx_error; | 1276 | goto tx_error_put; |
1023 | } | 1277 | } |
1024 | 1278 | ||
1025 | /* copy-on-write the packet before mangling it */ | 1279 | /* copy-on-write the packet before mangling it */ |
@@ -1029,16 +1283,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1029 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1283 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
1030 | goto tx_error_put; | 1284 | goto tx_error_put; |
1031 | 1285 | ||
1032 | /* drop the old route when skb is not shared */ | ||
1033 | skb_dst_drop(skb); | ||
1034 | skb_dst_set(skb, &rt->dst); | ||
1035 | |||
1036 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); | 1286 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); |
1037 | 1287 | ||
1288 | if (!local || !skb->dev) { | ||
1289 | /* drop the old route when skb is not shared */ | ||
1290 | skb_dst_drop(skb); | ||
1291 | skb_dst_set(skb, &rt->dst); | ||
1292 | } else { | ||
1293 | /* destined to loopback, do we need to change route? */ | ||
1294 | dst_release(&rt->dst); | ||
1295 | } | ||
1296 | |||
1038 | /* Another hack: avoid icmp_send in ip_fragment */ | 1297 | /* Another hack: avoid icmp_send in ip_fragment */ |
1039 | skb->local_df = 1; | 1298 | skb->local_df = 1; |
1040 | 1299 | ||
1041 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp); | 1300 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
1042 | 1301 | ||
1043 | rc = NF_STOLEN; | 1302 | rc = NF_STOLEN; |
1044 | goto out; | 1303 | goto out; |