aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2010-10-17 09:38:15 -0400
committerSimon Horman <horms@verge.net.au>2010-10-21 05:03:46 -0400
commitfc604767613b6d2036cdc35b660bc39451040a47 (patch)
tree50177ff8f66e0f153b2f172f98290eae5fcaee24 /net/netfilter
parentf5a41847acc535e2e2018e397b1876ba7577d9d9 (diff)
ipvs: changes for local real server
This patch deals with local real servers: - Add support for DNAT to local address (different real server port). It needs ip_vs_out hook in LOCAL_OUT for both families because skb->protocol is not set for locally generated packets and can not be used to set 'af'. - Skip packets in ip_vs_in marked with skb->ipvs_property because ip_vs_out processing can be executed in LOCAL_OUT but we still have the conn_out_get check in ip_vs_in. - Ignore packets with inet->nodefrag from local stack - Require skb_dst(skb) != NULL because we use it to get struct net - Add support for changing the route to local IPv4 stack after DNAT depending on the source address type. Local client sets output route and the remote client sets input route. It looks like IPv6 does not need such rerouting because the replies use addresses from initial incoming header, not from skb route. - All transmitters now have strict checks for the destination address type: redirect from non-local address to local real server requires NAT method, local address can not be used as source address when talking to remote real server. - Now LOCALNODE is not set explicitly as forwarding method in real server to allow the connections to provide correct forwarding method to the backup server. Not sure if this breaks tools that expect to see 'Local' real server type. If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL. Now it should be possible connections in backup that lost their fwmark information during sync to be forwarded properly to their daddr, even if it is local address in the backup server. By this way backup could be used as real server for DR or TUN, for NAT there are some restrictions because tuple collisions in conntracks can create problems for the traffic. - Call ip_vs_dst_reset when destination is updated in case some real server IP type is changed between local and remote. [ horms@verge.net.au: removed trailing whitespace ] Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c123
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c433
3 files changed, 457 insertions, 117 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c4f091d5a62..a6c8aff1b47 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -984,26 +984,34 @@ drop:
984} 984}
985 985
986/* 986/*
987 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
988 * Check if outgoing packet belongs to the established ip_vs_conn. 987 * Check if outgoing packet belongs to the established ip_vs_conn.
989 */ 988 */
990static unsigned int 989static unsigned int
991ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 990ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
992 const struct net_device *in, const struct net_device *out,
993 int (*okfn)(struct sk_buff *))
994{ 991{
995 struct ip_vs_iphdr iph; 992 struct ip_vs_iphdr iph;
996 struct ip_vs_protocol *pp; 993 struct ip_vs_protocol *pp;
997 struct ip_vs_conn *cp; 994 struct ip_vs_conn *cp;
998 int af;
999 995
1000 EnterFunction(11); 996 EnterFunction(11);
1001 997
1002 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 998 /* Already marked as IPVS request or reply? */
1003
1004 if (skb->ipvs_property) 999 if (skb->ipvs_property)
1005 return NF_ACCEPT; 1000 return NF_ACCEPT;
1006 1001
1002 /* Bad... Do not break raw sockets */
1003 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1004 af == AF_INET)) {
1005 struct sock *sk = skb->sk;
1006 struct inet_sock *inet = inet_sk(skb->sk);
1007
1008 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1009 return NF_ACCEPT;
1010 }
1011
1012 if (unlikely(!skb_dst(skb)))
1013 return NF_ACCEPT;
1014
1007 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1015 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1008#ifdef CONFIG_IP_VS_IPV6 1016#ifdef CONFIG_IP_VS_IPV6
1009 if (af == AF_INET6) { 1017 if (af == AF_INET6) {
@@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
1106 return handle_response(af, skb, pp, cp, iph.len); 1114 return handle_response(af, skb, pp, cp, iph.len);
1107} 1115}
1108 1116
1117/*
1118 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
1119 * Check if packet is reply for established ip_vs_conn.
1120 */
1121static unsigned int
1122ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1123 const struct net_device *in, const struct net_device *out,
1124 int (*okfn)(struct sk_buff *))
1125{
1126 return ip_vs_out(hooknum, skb, AF_INET);
1127}
1128
1129/*
1130 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1131 * Check if packet is reply for established ip_vs_conn.
1132 */
1133static unsigned int
1134ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1135 const struct net_device *in, const struct net_device *out,
1136 int (*okfn)(struct sk_buff *))
1137{
1138 unsigned int verdict;
1139
1140 /* Disable BH in LOCAL_OUT until all places are fixed */
1141 local_bh_disable();
1142 verdict = ip_vs_out(hooknum, skb, AF_INET);
1143 local_bh_enable();
1144 return verdict;
1145}
1146
1147#ifdef CONFIG_IP_VS_IPV6
1148
1149/*
1150 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
1151 * Check if packet is reply for established ip_vs_conn.
1152 */
1153static unsigned int
1154ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1155 const struct net_device *in, const struct net_device *out,
1156 int (*okfn)(struct sk_buff *))
1157{
1158 return ip_vs_out(hooknum, skb, AF_INET6);
1159}
1160
1161/*
1162 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1163 * Check if packet is reply for established ip_vs_conn.
1164 */
1165static unsigned int
1166ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1167 const struct net_device *in, const struct net_device *out,
1168 int (*okfn)(struct sk_buff *))
1169{
1170 unsigned int verdict;
1171
1172 /* Disable BH in LOCAL_OUT until all places are fixed */
1173 local_bh_disable();
1174 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1175 local_bh_enable();
1176 return verdict;
1177}
1178
1179#endif
1109 1180
1110/* 1181/*
1111 * Handle ICMP messages in the outside-to-inside direction (incoming). 1182 * Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1342 struct ip_vs_conn *cp; 1413 struct ip_vs_conn *cp;
1343 int ret, restart, af, pkts; 1414 int ret, restart, af, pkts;
1344 1415
1416 /* Already marked as IPVS request or reply? */
1417 if (skb->ipvs_property)
1418 return NF_ACCEPT;
1419
1345 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1420 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
1346 1421
1347 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1422 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1525 .hooknum = NF_INET_LOCAL_IN, 1600 .hooknum = NF_INET_LOCAL_IN,
1526 .priority = 100, 1601 .priority = 100,
1527 }, 1602 },
1528 /* After packet filtering, change source only for VS/NAT */ 1603 /* Before ip_vs_in, change source only for VS/NAT */
1529 { 1604 {
1530 .hook = ip_vs_out, 1605 .hook = ip_vs_local_reply4,
1531 .owner = THIS_MODULE, 1606 .owner = THIS_MODULE,
1532 .pf = PF_INET, 1607 .pf = PF_INET,
1533 .hooknum = NF_INET_FORWARD, 1608 .hooknum = NF_INET_LOCAL_OUT,
1534 .priority = 100, 1609 .priority = -99,
1535 }, 1610 },
1536 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1611 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1537 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1612 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1542 .hooknum = NF_INET_FORWARD, 1617 .hooknum = NF_INET_FORWARD,
1543 .priority = 99, 1618 .priority = 99,
1544 }, 1619 },
1620 /* After packet filtering, change source only for VS/NAT */
1621 {
1622 .hook = ip_vs_reply4,
1623 .owner = THIS_MODULE,
1624 .pf = PF_INET,
1625 .hooknum = NF_INET_FORWARD,
1626 .priority = 100,
1627 },
1545#ifdef CONFIG_IP_VS_IPV6 1628#ifdef CONFIG_IP_VS_IPV6
1546 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1629 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1547 * or VS/NAT(change destination), so that filtering rules can be 1630 * or VS/NAT(change destination), so that filtering rules can be
@@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1553 .hooknum = NF_INET_LOCAL_IN, 1636 .hooknum = NF_INET_LOCAL_IN,
1554 .priority = 100, 1637 .priority = 100,
1555 }, 1638 },
1556 /* After packet filtering, change source only for VS/NAT */ 1639 /* Before ip_vs_in, change source only for VS/NAT */
1557 { 1640 {
1558 .hook = ip_vs_out, 1641 .hook = ip_vs_local_reply6,
1559 .owner = THIS_MODULE, 1642 .owner = THIS_MODULE,
1560 .pf = PF_INET6, 1643 .pf = PF_INET,
1561 .hooknum = NF_INET_FORWARD, 1644 .hooknum = NF_INET_LOCAL_OUT,
1562 .priority = 100, 1645 .priority = -99,
1563 }, 1646 },
1564 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1647 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1565 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1648 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1570 .hooknum = NF_INET_FORWARD, 1653 .hooknum = NF_INET_FORWARD,
1571 .priority = 99, 1654 .priority = 99,
1572 }, 1655 },
1656 /* After packet filtering, change source only for VS/NAT */
1657 {
1658 .hook = ip_vs_reply6,
1659 .owner = THIS_MODULE,
1660 .pf = PF_INET6,
1661 .hooknum = NF_INET_FORWARD,
1662 .priority = 100,
1663 },
1573#endif 1664#endif
1574}; 1665};
1575 1666
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0b884d3e192..5f5daa30b0a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778 conn_flags |= IP_VS_CONN_F_INACTIVE; 778 conn_flags |= IP_VS_CONN_F_INACTIVE;
779 779
780 /* check if local node and update the flags */
781#ifdef CONFIG_IP_VS_IPV6
782 if (svc->af == AF_INET6) {
783 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
784 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
785 | IP_VS_CONN_F_LOCALNODE;
786 }
787 } else
788#endif
789 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
790 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
791 | IP_VS_CONN_F_LOCALNODE;
792 }
793
794 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
795 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
796 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
824 dest->u_threshold = udest->u_threshold; 810 dest->u_threshold = udest->u_threshold;
825 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
826 812
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
816
827 if (add) 817 if (add)
828 ip_vs_new_estimator(&dest->stats); 818 ip_vs_new_estimator(&dest->stats);
829 819
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 63cc0feaaef..8608882f89e 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -67,12 +67,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
67 return dst; 67 return dst;
68} 68}
69 69
70/*
71 * Get route to destination or remote server
72 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
73 * &4=Allow redirect from remote daddr to local
74 */
70static struct rtable * 75static struct rtable *
71__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) 76__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
77 __be32 daddr, u32 rtos, int rt_mode)
72{ 78{
73 struct net *net = dev_net(skb->dev); 79 struct net *net = dev_net(skb_dst(skb)->dev);
74 struct rtable *rt; /* Route to the other host */ 80 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest; 81 struct rtable *ort; /* Original route */
82 int local;
76 83
77 if (dest) { 84 if (dest) {
78 spin_lock(&dest->dst_lock); 85 spin_lock(&dest->dst_lock);
@@ -104,23 +111,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
104 .oif = 0, 111 .oif = 0,
105 .nl_u = { 112 .nl_u = {
106 .ip4_u = { 113 .ip4_u = {
107 .daddr = cp->daddr.ip, 114 .daddr = daddr,
108 .saddr = 0, 115 .saddr = 0,
109 .tos = rtos, } }, 116 .tos = rtos, } },
110 }; 117 };
111 118
112 if (ip_route_output_key(net, &rt, &fl)) { 119 if (ip_route_output_key(net, &rt, &fl)) {
113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 120 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip); 121 &daddr);
115 return NULL; 122 return NULL;
116 } 123 }
117 } 124 }
118 125
126 local = rt->rt_flags & RTCF_LOCAL;
127 if (!((local ? 1 : 2) & rt_mode)) {
128 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
129 (rt->rt_flags & RTCF_LOCAL) ?
130 "local":"non-local", &rt->rt_dst);
131 ip_rt_put(rt);
132 return NULL;
133 }
134 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
135 ort->rt_flags & RTCF_LOCAL)) {
136 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
137 "requires NAT method, dest: %pI4\n",
138 &ip_hdr(skb)->daddr, &rt->rt_dst);
139 ip_rt_put(rt);
140 return NULL;
141 }
142 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
143 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
144 "to non-local address, dest: %pI4\n",
145 &ip_hdr(skb)->saddr, &rt->rt_dst);
146 ip_rt_put(rt);
147 return NULL;
148 }
149
119 return rt; 150 return rt;
120} 151}
121 152
153/* Reroute packet to local IPv4 stack after DNAT */
154static int
155__ip_vs_reroute_locally(struct sk_buff *skb)
156{
157 struct rtable *rt = skb_rtable(skb);
158 struct net_device *dev = rt->dst.dev;
159 struct net *net = dev_net(dev);
160 struct iphdr *iph = ip_hdr(skb);
161
162 if (rt->fl.iif) {
163 unsigned long orefdst = skb->_skb_refdst;
164
165 if (ip_route_input(skb, iph->daddr, iph->saddr,
166 iph->tos, skb->dev))
167 return 0;
168 refdst_drop(orefdst);
169 } else {
170 struct flowi fl = {
171 .oif = 0,
172 .nl_u = {
173 .ip4_u = {
174 .daddr = iph->daddr,
175 .saddr = iph->saddr,
176 .tos = RT_TOS(iph->tos),
177 }
178 },
179 .mark = skb->mark,
180 };
181 struct rtable *rt;
182
183 if (ip_route_output_key(net, &rt, &fl))
184 return 0;
185 if (!(rt->rt_flags & RTCF_LOCAL)) {
186 ip_rt_put(rt);
187 return 0;
188 }
189 /* Drop old route. */
190 skb_dst_drop(skb);
191 skb_dst_set(skb, &rt->dst);
192 }
193 return 1;
194}
195
122#ifdef CONFIG_IP_VS_IPV6 196#ifdef CONFIG_IP_VS_IPV6
123 197
198static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
199{
200 return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
201}
202
124static struct dst_entry * 203static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 204__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm) 205 struct in6_addr *ret_saddr, int do_xfrm)
@@ -155,14 +234,21 @@ out_err:
155 return NULL; 234 return NULL;
156} 235}
157 236
237/*
238 * Get route to destination or remote server
239 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
240 * &4=Allow redirect from remote daddr to local
241 */
158static struct rt6_info * 242static struct rt6_info *
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 243__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
160 struct in6_addr *ret_saddr, int do_xfrm) 244 struct in6_addr *daddr, struct in6_addr *ret_saddr,
245 int do_xfrm, int rt_mode)
161{ 246{
162 struct net *net = dev_net(skb->dev); 247 struct net *net = dev_net(skb_dst(skb)->dev);
163 struct rt6_info *rt; /* Route to the other host */ 248 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest; 249 struct rt6_info *ort; /* Original route */
165 struct dst_entry *dst; 250 struct dst_entry *dst;
251 int local;
166 252
167 if (dest) { 253 if (dest) {
168 spin_lock(&dest->dst_lock); 254 spin_lock(&dest->dst_lock);
@@ -188,13 +274,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr); 274 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
189 spin_unlock(&dest->dst_lock); 275 spin_unlock(&dest->dst_lock);
190 } else { 276 } else {
191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, 277 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
192 do_xfrm);
193 if (!dst) 278 if (!dst)
194 return NULL; 279 return NULL;
195 rt = (struct rt6_info *) dst; 280 rt = (struct rt6_info *) dst;
196 } 281 }
197 282
283 local = __ip_vs_is_local_route6(rt);
284 if (!((local ? 1 : 2) & rt_mode)) {
285 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
286 local ? "local":"non-local", daddr);
287 dst_release(&rt->dst);
288 return NULL;
289 }
290 if (local && !(rt_mode & 4) &&
291 !((ort = (struct rt6_info *) skb_dst(skb)) &&
292 __ip_vs_is_local_route6(ort))) {
293 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
294 "requires NAT method, dest: %pI6\n",
295 &ipv6_hdr(skb)->daddr, daddr);
296 dst_release(&rt->dst);
297 return NULL;
298 }
299 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
300 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
301 IPV6_ADDR_LOOPBACK)) {
302 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
303 "to non-local address, dest: %pI6\n",
304 &ipv6_hdr(skb)->saddr, daddr);
305 dst_release(&rt->dst);
306 return NULL;
307 }
308
198 return rt; 309 return rt;
199} 310}
200#endif 311#endif
@@ -227,23 +338,27 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
227 __ret; \ 338 __ret; \
228}) 339})
229 340
230#define IP_VS_XMIT_NAT(pf, skb, cp) \ 341#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
231do { \ 342do { \
232 (skb)->ipvs_property = 1; \ 343 (skb)->ipvs_property = 1; \
233 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 344 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
234 ip_vs_notrack(skb); \ 345 ip_vs_notrack(skb); \
235 else \ 346 else \
236 ip_vs_update_conntrack(skb, cp, 1); \ 347 ip_vs_update_conntrack(skb, cp, 1); \
348 if (local) \
349 return NF_ACCEPT; \
237 skb_forward_csum(skb); \ 350 skb_forward_csum(skb); \
238 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 351 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
239 skb_dst(skb)->dev, dst_output); \ 352 skb_dst(skb)->dev, dst_output); \
240} while (0) 353} while (0)
241 354
242#define IP_VS_XMIT(pf, skb, cp) \ 355#define IP_VS_XMIT(pf, skb, cp, local) \
243do { \ 356do { \
244 (skb)->ipvs_property = 1; \ 357 (skb)->ipvs_property = 1; \
245 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 358 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
246 ip_vs_notrack(skb); \ 359 ip_vs_notrack(skb); \
360 if (local) \
361 return NF_ACCEPT; \
247 skb_forward_csum(skb); \ 362 skb_forward_csum(skb); \
248 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 363 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
249 skb_dst(skb)->dev, dst_output); \ 364 skb_dst(skb)->dev, dst_output); \
@@ -258,7 +373,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258 struct ip_vs_protocol *pp) 373 struct ip_vs_protocol *pp)
259{ 374{
260 /* we do not touch skb and do not need pskb ptr */ 375 /* we do not touch skb and do not need pskb ptr */
261 return NF_ACCEPT; 376 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
262} 377}
263 378
264 379
@@ -271,27 +386,15 @@ int
271ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 386ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272 struct ip_vs_protocol *pp) 387 struct ip_vs_protocol *pp)
273{ 388{
274 struct net *net = dev_net(skb->dev);
275 struct rtable *rt; /* Route to the other host */ 389 struct rtable *rt; /* Route to the other host */
276 struct iphdr *iph = ip_hdr(skb); 390 struct iphdr *iph = ip_hdr(skb);
277 u8 tos = iph->tos;
278 int mtu; 391 int mtu;
279 struct flowi fl = {
280 .oif = 0,
281 .nl_u = {
282 .ip4_u = {
283 .daddr = iph->daddr,
284 .saddr = 0,
285 .tos = RT_TOS(tos), } },
286 };
287 392
288 EnterFunction(10); 393 EnterFunction(10);
289 394
290 if (ip_route_output_key(net, &rt, &fl)) { 395 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
291 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 396 RT_TOS(iph->tos), 2)))
292 __func__, &iph->daddr);
293 goto tx_error_icmp; 397 goto tx_error_icmp;
294 }
295 398
296 /* MTU checking */ 399 /* MTU checking */
297 mtu = dst_mtu(&rt->dst); 400 mtu = dst_mtu(&rt->dst);
@@ -319,7 +422,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
319 /* Another hack: avoid icmp_send in ip_fragment */ 422 /* Another hack: avoid icmp_send in ip_fragment */
320 skb->local_df = 1; 423 skb->local_df = 1;
321 424
322 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 425 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
323 426
324 LeaveFunction(10); 427 LeaveFunction(10);
325 return NF_STOLEN; 428 return NF_STOLEN;
@@ -337,18 +440,14 @@ int
337ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 440ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338 struct ip_vs_protocol *pp) 441 struct ip_vs_protocol *pp)
339{ 442{
340 struct net *net = dev_net(skb->dev);
341 struct dst_entry *dst;
342 struct rt6_info *rt; /* Route to the other host */ 443 struct rt6_info *rt; /* Route to the other host */
343 struct ipv6hdr *iph = ipv6_hdr(skb); 444 struct ipv6hdr *iph = ipv6_hdr(skb);
344 int mtu; 445 int mtu;
345 446
346 EnterFunction(10); 447 EnterFunction(10);
347 448
348 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); 449 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
349 if (!dst)
350 goto tx_error_icmp; 450 goto tx_error_icmp;
351 rt = (struct rt6_info *) dst;
352 451
353 /* MTU checking */ 452 /* MTU checking */
354 mtu = dst_mtu(&rt->dst); 453 mtu = dst_mtu(&rt->dst);
@@ -376,7 +475,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
376 /* Another hack: avoid icmp_send in ip_fragment */ 475 /* Another hack: avoid icmp_send in ip_fragment */
377 skb->local_df = 1; 476 skb->local_df = 1;
378 477
379 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 478 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
380 479
381 LeaveFunction(10); 480 LeaveFunction(10);
382 return NF_STOLEN; 481 return NF_STOLEN;
@@ -401,6 +500,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
401 struct rtable *rt; /* Route to the other host */ 500 struct rtable *rt; /* Route to the other host */
402 int mtu; 501 int mtu;
403 struct iphdr *iph = ip_hdr(skb); 502 struct iphdr *iph = ip_hdr(skb);
503 int local;
404 504
405 EnterFunction(10); 505 EnterFunction(10);
406 506
@@ -414,16 +514,40 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
414 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 514 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415 } 515 }
416 516
417 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) 517 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
518 RT_TOS(iph->tos), 1|2|4)))
418 goto tx_error_icmp; 519 goto tx_error_icmp;
520 local = rt->rt_flags & RTCF_LOCAL;
521 /*
522 * Avoid duplicate tuple in reply direction for NAT traffic
523 * to local address when connection is sync-ed
524 */
525#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
526 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
527 enum ip_conntrack_info ctinfo;
528 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
529
530 if (ct && !nf_ct_is_untracked(ct)) {
531 IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): "
532 "stopping DNAT to local address");
533 goto tx_error_put;
534 }
535 }
536#endif
537
538 /* From world but DNAT to loopback address? */
539 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
540 IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): "
541 "stopping DNAT to loopback address");
542 goto tx_error_put;
543 }
419 544
420 /* MTU checking */ 545 /* MTU checking */
421 mtu = dst_mtu(&rt->dst); 546 mtu = dst_mtu(&rt->dst);
422 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 547 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
423 ip_rt_put(rt);
424 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 548 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 549 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426 goto tx_error; 550 goto tx_error_put;
427 } 551 }
428 552
429 /* copy-on-write the packet before mangling it */ 553 /* copy-on-write the packet before mangling it */
@@ -433,16 +557,27 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
433 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 557 if (skb_cow(skb, rt->dst.dev->hard_header_len))
434 goto tx_error_put; 558 goto tx_error_put;
435 559
436 /* drop old route */
437 skb_dst_drop(skb);
438 skb_dst_set(skb, &rt->dst);
439
440 /* mangle the packet */ 560 /* mangle the packet */
441 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 561 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
442 goto tx_error; 562 goto tx_error_put;
443 ip_hdr(skb)->daddr = cp->daddr.ip; 563 ip_hdr(skb)->daddr = cp->daddr.ip;
444 ip_send_check(ip_hdr(skb)); 564 ip_send_check(ip_hdr(skb));
445 565
566 if (!local) {
567 /* drop old route */
568 skb_dst_drop(skb);
569 skb_dst_set(skb, &rt->dst);
570 } else {
571 ip_rt_put(rt);
572 /*
573 * Some IPv4 replies get local address from routes,
574 * not from iph, so while we DNAT after routing
575 * we need this second input/output route.
576 */
577 if (!__ip_vs_reroute_locally(skb))
578 goto tx_error;
579 }
580
446 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 581 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447 582
448 /* FIXME: when application helper enlarges the packet and the length 583 /* FIXME: when application helper enlarges the packet and the length
@@ -452,7 +587,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
452 /* Another hack: avoid icmp_send in ip_fragment */ 587 /* Another hack: avoid icmp_send in ip_fragment */
453 skb->local_df = 1; 588 skb->local_df = 1;
454 589
455 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); 590 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
456 591
457 LeaveFunction(10); 592 LeaveFunction(10);
458 return NF_STOLEN; 593 return NF_STOLEN;
@@ -475,6 +610,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
475{ 610{
476 struct rt6_info *rt; /* Route to the other host */ 611 struct rt6_info *rt; /* Route to the other host */
477 int mtu; 612 int mtu;
613 int local;
478 614
479 EnterFunction(10); 615 EnterFunction(10);
480 616
@@ -489,18 +625,44 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
489 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 625 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490 } 626 }
491 627
492 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 628 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
493 if (!rt) 629 0, 1|2|4)))
494 goto tx_error_icmp; 630 goto tx_error_icmp;
631 local = __ip_vs_is_local_route6(rt);
632 /*
633 * Avoid duplicate tuple in reply direction for NAT traffic
634 * to local address when connection is sync-ed
635 */
636#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
637 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
638 enum ip_conntrack_info ctinfo;
639 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
640
641 if (ct && !nf_ct_is_untracked(ct)) {
642 IP_VS_DBG_RL_PKT(10, pp, skb, 0,
643 "ip_vs_nat_xmit_v6(): "
644 "stopping DNAT to local address");
645 goto tx_error_put;
646 }
647 }
648#endif
649
650 /* From world but DNAT to loopback address? */
651 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
652 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
653 IP_VS_DBG_RL_PKT(1, pp, skb, 0,
654 "ip_vs_nat_xmit_v6(): "
655 "stopping DNAT to loopback address");
656 goto tx_error_put;
657 }
495 658
496 /* MTU checking */ 659 /* MTU checking */
497 mtu = dst_mtu(&rt->dst); 660 mtu = dst_mtu(&rt->dst);
498 if (skb->len > mtu) { 661 if (skb->len > mtu) {
499 dst_release(&rt->dst);
500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 662 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
501 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 663 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502 "ip_vs_nat_xmit_v6(): frag needed for"); 664 "ip_vs_nat_xmit_v6(): frag needed for");
503 goto tx_error; 665 goto tx_error_put;
504 } 666 }
505 667
506 /* copy-on-write the packet before mangling it */ 668 /* copy-on-write the packet before mangling it */
@@ -510,14 +672,19 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
510 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 672 if (skb_cow(skb, rt->dst.dev->hard_header_len))
511 goto tx_error_put; 673 goto tx_error_put;
512 674
513 /* drop old route */
514 skb_dst_drop(skb);
515 skb_dst_set(skb, &rt->dst);
516
517 /* mangle the packet */ 675 /* mangle the packet */
518 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 676 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519 goto tx_error; 677 goto tx_error;
520 ipv6_hdr(skb)->daddr = cp->daddr.in6; 678 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
679
680 if (!local || !skb->dev) {
681 /* drop the old route when skb is not shared */
682 skb_dst_drop(skb);
683 skb_dst_set(skb, &rt->dst);
684 } else {
685 /* destined to loopback, do we need to change route? */
686 dst_release(&rt->dst);
687 }
521 688
522 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 689 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523 690
@@ -528,7 +695,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
528 /* Another hack: avoid icmp_send in ip_fragment */ 695 /* Another hack: avoid icmp_send in ip_fragment */
529 skb->local_df = 1; 696 skb->local_df = 1;
530 697
531 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); 698 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
532 699
533 LeaveFunction(10); 700 LeaveFunction(10);
534 return NF_STOLEN; 701 return NF_STOLEN;
@@ -588,16 +755,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
588 goto tx_error; 755 goto tx_error;
589 } 756 }
590 757
591 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) 758 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
759 RT_TOS(tos), 1|2)))
592 goto tx_error_icmp; 760 goto tx_error_icmp;
761 if (rt->rt_flags & RTCF_LOCAL) {
762 ip_rt_put(rt);
763 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
764 }
593 765
594 tdev = rt->dst.dev; 766 tdev = rt->dst.dev;
595 767
596 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 768 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
597 if (mtu < 68) { 769 if (mtu < 68) {
598 ip_rt_put(rt);
599 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 770 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
600 goto tx_error; 771 goto tx_error_put;
601 } 772 }
602 if (skb_dst(skb)) 773 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 774 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -607,9 +778,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
607 if ((old_iph->frag_off & htons(IP_DF)) 778 if ((old_iph->frag_off & htons(IP_DF))
608 && mtu < ntohs(old_iph->tot_len)) { 779 && mtu < ntohs(old_iph->tot_len)) {
609 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 780 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610 ip_rt_put(rt);
611 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 781 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
612 goto tx_error; 782 goto tx_error_put;
613 } 783 }
614 784
615 /* 785 /*
@@ -678,6 +848,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
678 kfree_skb(skb); 848 kfree_skb(skb);
679 LeaveFunction(10); 849 LeaveFunction(10);
680 return NF_STOLEN; 850 return NF_STOLEN;
851tx_error_put:
852 ip_rt_put(rt);
853 goto tx_error;
681} 854}
682 855
683#ifdef CONFIG_IP_VS_IPV6 856#ifdef CONFIG_IP_VS_IPV6
@@ -703,27 +876,29 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
703 goto tx_error; 876 goto tx_error;
704 } 877 }
705 878
706 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); 879 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
707 if (!rt) 880 &saddr, 1, 1|2)))
708 goto tx_error_icmp; 881 goto tx_error_icmp;
882 if (__ip_vs_is_local_route6(rt)) {
883 dst_release(&rt->dst);
884 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
885 }
709 886
710 tdev = rt->dst.dev; 887 tdev = rt->dst.dev;
711 888
712 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 889 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
713 if (mtu < IPV6_MIN_MTU) { 890 if (mtu < IPV6_MIN_MTU) {
714 dst_release(&rt->dst);
715 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 891 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716 IPV6_MIN_MTU); 892 IPV6_MIN_MTU);
717 goto tx_error; 893 goto tx_error_put;
718 } 894 }
719 if (skb_dst(skb)) 895 if (skb_dst(skb))
720 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 896 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
721 897
722 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 898 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
723 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 899 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
724 dst_release(&rt->dst);
725 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 900 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
726 goto tx_error; 901 goto tx_error_put;
727 } 902 }
728 903
729 /* 904 /*
@@ -789,6 +964,9 @@ tx_error:
789 kfree_skb(skb); 964 kfree_skb(skb);
790 LeaveFunction(10); 965 LeaveFunction(10);
791 return NF_STOLEN; 966 return NF_STOLEN;
967tx_error_put:
968 dst_release(&rt->dst);
969 goto tx_error;
792} 970}
793#endif 971#endif
794 972
@@ -807,8 +985,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
807 985
808 EnterFunction(10); 986 EnterFunction(10);
809 987
810 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) 988 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
989 RT_TOS(iph->tos), 1|2)))
811 goto tx_error_icmp; 990 goto tx_error_icmp;
991 if (rt->rt_flags & RTCF_LOCAL) {
992 ip_rt_put(rt);
993 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
994 }
812 995
813 /* MTU checking */ 996 /* MTU checking */
814 mtu = dst_mtu(&rt->dst); 997 mtu = dst_mtu(&rt->dst);
@@ -836,7 +1019,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
836 /* Another hack: avoid icmp_send in ip_fragment */ 1019 /* Another hack: avoid icmp_send in ip_fragment */
837 skb->local_df = 1; 1020 skb->local_df = 1;
838 1021
839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 1022 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
840 1023
841 LeaveFunction(10); 1024 LeaveFunction(10);
842 return NF_STOLEN; 1025 return NF_STOLEN;
@@ -859,9 +1042,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
859 1042
860 EnterFunction(10); 1043 EnterFunction(10);
861 1044
862 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 1045 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
863 if (!rt) 1046 0, 1|2)))
864 goto tx_error_icmp; 1047 goto tx_error_icmp;
1048 if (__ip_vs_is_local_route6(rt)) {
1049 dst_release(&rt->dst);
1050 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1051 }
865 1052
866 /* MTU checking */ 1053 /* MTU checking */
867 mtu = dst_mtu(&rt->dst); 1054 mtu = dst_mtu(&rt->dst);
@@ -889,7 +1076,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
889 /* Another hack: avoid icmp_send in ip_fragment */ 1076 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1; 1077 skb->local_df = 1;
891 1078
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 1079 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
893 1080
894 LeaveFunction(10); 1081 LeaveFunction(10);
895 return NF_STOLEN; 1082 return NF_STOLEN;
@@ -915,6 +1102,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
915 struct rtable *rt; /* Route to the other host */ 1102 struct rtable *rt; /* Route to the other host */
916 int mtu; 1103 int mtu;
917 int rc; 1104 int rc;
1105 int local;
918 1106
919 EnterFunction(10); 1107 EnterFunction(10);
920 1108
@@ -935,16 +1123,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
935 * mangle and send the packet here (only for VS/NAT) 1123 * mangle and send the packet here (only for VS/NAT)
936 */ 1124 */
937 1125
938 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) 1126 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1127 RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
939 goto tx_error_icmp; 1128 goto tx_error_icmp;
1129 local = rt->rt_flags & RTCF_LOCAL;
1130
1131 /*
1132 * Avoid duplicate tuple in reply direction for NAT traffic
1133 * to local address when connection is sync-ed
1134 */
1135#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1136 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1137 enum ip_conntrack_info ctinfo;
1138 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1139
1140 if (ct && !nf_ct_is_untracked(ct)) {
1141 IP_VS_DBG(10, "%s(): "
1142 "stopping DNAT to local address %pI4\n",
1143 __func__, &cp->daddr.ip);
1144 goto tx_error_put;
1145 }
1146 }
1147#endif
1148
1149 /* From world but DNAT to loopback address? */
1150 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
1151 IP_VS_DBG(1, "%s(): "
1152 "stopping DNAT to loopback %pI4\n",
1153 __func__, &cp->daddr.ip);
1154 goto tx_error_put;
1155 }
940 1156
941 /* MTU checking */ 1157 /* MTU checking */
942 mtu = dst_mtu(&rt->dst); 1158 mtu = dst_mtu(&rt->dst);
943 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1159 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
944 ip_rt_put(rt);
945 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1160 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
946 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1161 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
947 goto tx_error; 1162 goto tx_error_put;
948 } 1163 }
949 1164
950 /* copy-on-write the packet before mangling it */ 1165 /* copy-on-write the packet before mangling it */
@@ -954,16 +1169,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
954 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1169 if (skb_cow(skb, rt->dst.dev->hard_header_len))
955 goto tx_error_put; 1170 goto tx_error_put;
956 1171
957 /* drop the old route when skb is not shared */
958 skb_dst_drop(skb);
959 skb_dst_set(skb, &rt->dst);
960
961 ip_vs_nat_icmp(skb, pp, cp, 0); 1172 ip_vs_nat_icmp(skb, pp, cp, 0);
962 1173
1174 if (!local) {
1175 /* drop the old route when skb is not shared */
1176 skb_dst_drop(skb);
1177 skb_dst_set(skb, &rt->dst);
1178 } else {
1179 ip_rt_put(rt);
1180 /*
1181 * Some IPv4 replies get local address from routes,
1182 * not from iph, so while we DNAT after routing
1183 * we need this second input/output route.
1184 */
1185 if (!__ip_vs_reroute_locally(skb))
1186 goto tx_error;
1187 }
1188
963 /* Another hack: avoid icmp_send in ip_fragment */ 1189 /* Another hack: avoid icmp_send in ip_fragment */
964 skb->local_df = 1; 1190 skb->local_df = 1;
965 1191
966 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 1192 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
967 1193
968 rc = NF_STOLEN; 1194 rc = NF_STOLEN;
969 goto out; 1195 goto out;
@@ -989,6 +1215,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
989 struct rt6_info *rt; /* Route to the other host */ 1215 struct rt6_info *rt; /* Route to the other host */
990 int mtu; 1216 int mtu;
991 int rc; 1217 int rc;
1218 int local;
992 1219
993 EnterFunction(10); 1220 EnterFunction(10);
994 1221
@@ -1009,17 +1236,44 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1009 * mangle and send the packet here (only for VS/NAT) 1236 * mangle and send the packet here (only for VS/NAT)
1010 */ 1237 */
1011 1238
1012 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 1239 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1013 if (!rt) 1240 0, 1|2|4)))
1014 goto tx_error_icmp; 1241 goto tx_error_icmp;
1015 1242
1243 local = __ip_vs_is_local_route6(rt);
1244 /*
1245 * Avoid duplicate tuple in reply direction for NAT traffic
1246 * to local address when connection is sync-ed
1247 */
1248#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1249 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1250 enum ip_conntrack_info ctinfo;
1251 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1252
1253 if (ct && !nf_ct_is_untracked(ct)) {
1254 IP_VS_DBG(10, "%s(): "
1255 "stopping DNAT to local address %pI6\n",
1256 __func__, &cp->daddr.in6);
1257 goto tx_error_put;
1258 }
1259 }
1260#endif
1261
1262 /* From world but DNAT to loopback address? */
1263 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1264 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1265 IP_VS_DBG(1, "%s(): "
1266 "stopping DNAT to loopback %pI6\n",
1267 __func__, &cp->daddr.in6);
1268 goto tx_error_put;
1269 }
1270
1016 /* MTU checking */ 1271 /* MTU checking */
1017 mtu = dst_mtu(&rt->dst); 1272 mtu = dst_mtu(&rt->dst);
1018 if (skb->len > mtu) { 1273 if (skb->len > mtu) {
1019 dst_release(&rt->dst);
1020 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1274 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1021 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1275 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1022 goto tx_error; 1276 goto tx_error_put;
1023 } 1277 }
1024 1278
1025 /* copy-on-write the packet before mangling it */ 1279 /* copy-on-write the packet before mangling it */
@@ -1029,16 +1283,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1029 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1283 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1030 goto tx_error_put; 1284 goto tx_error_put;
1031 1285
1032 /* drop the old route when skb is not shared */
1033 skb_dst_drop(skb);
1034 skb_dst_set(skb, &rt->dst);
1035
1036 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1286 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037 1287
1288 if (!local || !skb->dev) {
1289 /* drop the old route when skb is not shared */
1290 skb_dst_drop(skb);
1291 skb_dst_set(skb, &rt->dst);
1292 } else {
1293 /* destined to loopback, do we need to change route? */
1294 dst_release(&rt->dst);
1295 }
1296
1038 /* Another hack: avoid icmp_send in ip_fragment */ 1297 /* Another hack: avoid icmp_send in ip_fragment */
1039 skb->local_df = 1; 1298 skb->local_df = 1;
1040 1299
1041 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 1300 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1042 1301
1043 rc = NF_STOLEN; 1302 rc = NF_STOLEN;
1044 goto out; 1303 goto out;