aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/ip_vs.h1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c123
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c433
4 files changed, 458 insertions, 117 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9d5c1b96530..2f88d594233 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -409,6 +409,7 @@ struct ip_vs_conn {
409 /* packet transmitter for different forwarding methods. If it 409 /* packet transmitter for different forwarding methods. If it
410 mangles the packet, it must return NF_DROP or better NF_STOLEN, 410 mangles the packet, it must return NF_DROP or better NF_STOLEN,
411 otherwise this must be changed to a sk_buff **. 411 otherwise this must be changed to a sk_buff **.
412 NF_ACCEPT can be returned when destination is local.
412 */ 413 */
413 int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, 414 int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
414 struct ip_vs_protocol *pp); 415 struct ip_vs_protocol *pp);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c4f091d5a62..a6c8aff1b47 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -984,26 +984,34 @@ drop:
984} 984}
985 985
986/* 986/*
987 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
988 * Check if outgoing packet belongs to the established ip_vs_conn. 987 * Check if outgoing packet belongs to the established ip_vs_conn.
989 */ 988 */
990static unsigned int 989static unsigned int
991ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 990ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
992 const struct net_device *in, const struct net_device *out,
993 int (*okfn)(struct sk_buff *))
994{ 991{
995 struct ip_vs_iphdr iph; 992 struct ip_vs_iphdr iph;
996 struct ip_vs_protocol *pp; 993 struct ip_vs_protocol *pp;
997 struct ip_vs_conn *cp; 994 struct ip_vs_conn *cp;
998 int af;
999 995
1000 EnterFunction(11); 996 EnterFunction(11);
1001 997
1002 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 998 /* Already marked as IPVS request or reply? */
1003
1004 if (skb->ipvs_property) 999 if (skb->ipvs_property)
1005 return NF_ACCEPT; 1000 return NF_ACCEPT;
1006 1001
1002 /* Bad... Do not break raw sockets */
1003 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1004 af == AF_INET)) {
1005 struct sock *sk = skb->sk;
1006 struct inet_sock *inet = inet_sk(skb->sk);
1007
1008 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1009 return NF_ACCEPT;
1010 }
1011
1012 if (unlikely(!skb_dst(skb)))
1013 return NF_ACCEPT;
1014
1007 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1015 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1008#ifdef CONFIG_IP_VS_IPV6 1016#ifdef CONFIG_IP_VS_IPV6
1009 if (af == AF_INET6) { 1017 if (af == AF_INET6) {
@@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
1106 return handle_response(af, skb, pp, cp, iph.len); 1114 return handle_response(af, skb, pp, cp, iph.len);
1107} 1115}
1108 1116
1117/*
1118 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
1119 * Check if packet is reply for established ip_vs_conn.
1120 */
1121static unsigned int
1122ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1123 const struct net_device *in, const struct net_device *out,
1124 int (*okfn)(struct sk_buff *))
1125{
1126 return ip_vs_out(hooknum, skb, AF_INET);
1127}
1128
1129/*
1130 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1131 * Check if packet is reply for established ip_vs_conn.
1132 */
1133static unsigned int
1134ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1135 const struct net_device *in, const struct net_device *out,
1136 int (*okfn)(struct sk_buff *))
1137{
1138 unsigned int verdict;
1139
1140 /* Disable BH in LOCAL_OUT until all places are fixed */
1141 local_bh_disable();
1142 verdict = ip_vs_out(hooknum, skb, AF_INET);
1143 local_bh_enable();
1144 return verdict;
1145}
1146
1147#ifdef CONFIG_IP_VS_IPV6
1148
1149/*
1150 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
1151 * Check if packet is reply for established ip_vs_conn.
1152 */
1153static unsigned int
1154ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1155 const struct net_device *in, const struct net_device *out,
1156 int (*okfn)(struct sk_buff *))
1157{
1158 return ip_vs_out(hooknum, skb, AF_INET6);
1159}
1160
1161/*
1162 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1163 * Check if packet is reply for established ip_vs_conn.
1164 */
1165static unsigned int
1166ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1167 const struct net_device *in, const struct net_device *out,
1168 int (*okfn)(struct sk_buff *))
1169{
1170 unsigned int verdict;
1171
1172 /* Disable BH in LOCAL_OUT until all places are fixed */
1173 local_bh_disable();
1174 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1175 local_bh_enable();
1176 return verdict;
1177}
1178
1179#endif
1109 1180
1110/* 1181/*
1111 * Handle ICMP messages in the outside-to-inside direction (incoming). 1182 * Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1342 struct ip_vs_conn *cp; 1413 struct ip_vs_conn *cp;
1343 int ret, restart, af, pkts; 1414 int ret, restart, af, pkts;
1344 1415
1416 /* Already marked as IPVS request or reply? */
1417 if (skb->ipvs_property)
1418 return NF_ACCEPT;
1419
1345 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1420 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
1346 1421
1347 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1422 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1525 .hooknum = NF_INET_LOCAL_IN, 1600 .hooknum = NF_INET_LOCAL_IN,
1526 .priority = 100, 1601 .priority = 100,
1527 }, 1602 },
1528 /* After packet filtering, change source only for VS/NAT */ 1603 /* Before ip_vs_in, change source only for VS/NAT */
1529 { 1604 {
1530 .hook = ip_vs_out, 1605 .hook = ip_vs_local_reply4,
1531 .owner = THIS_MODULE, 1606 .owner = THIS_MODULE,
1532 .pf = PF_INET, 1607 .pf = PF_INET,
1533 .hooknum = NF_INET_FORWARD, 1608 .hooknum = NF_INET_LOCAL_OUT,
1534 .priority = 100, 1609 .priority = -99,
1535 }, 1610 },
1536 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1611 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1537 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1612 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1542 .hooknum = NF_INET_FORWARD, 1617 .hooknum = NF_INET_FORWARD,
1543 .priority = 99, 1618 .priority = 99,
1544 }, 1619 },
1620 /* After packet filtering, change source only for VS/NAT */
1621 {
1622 .hook = ip_vs_reply4,
1623 .owner = THIS_MODULE,
1624 .pf = PF_INET,
1625 .hooknum = NF_INET_FORWARD,
1626 .priority = 100,
1627 },
1545#ifdef CONFIG_IP_VS_IPV6 1628#ifdef CONFIG_IP_VS_IPV6
1546 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1629 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1547 * or VS/NAT(change destination), so that filtering rules can be 1630 * or VS/NAT(change destination), so that filtering rules can be
@@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1553 .hooknum = NF_INET_LOCAL_IN, 1636 .hooknum = NF_INET_LOCAL_IN,
1554 .priority = 100, 1637 .priority = 100,
1555 }, 1638 },
1556 /* After packet filtering, change source only for VS/NAT */ 1639 /* Before ip_vs_in, change source only for VS/NAT */
1557 { 1640 {
1558 .hook = ip_vs_out, 1641 .hook = ip_vs_local_reply6,
1559 .owner = THIS_MODULE, 1642 .owner = THIS_MODULE,
1560 .pf = PF_INET6, 1643 .pf = PF_INET,
1561 .hooknum = NF_INET_FORWARD, 1644 .hooknum = NF_INET_LOCAL_OUT,
1562 .priority = 100, 1645 .priority = -99,
1563 }, 1646 },
1564 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1647 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1565 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1648 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1570 .hooknum = NF_INET_FORWARD, 1653 .hooknum = NF_INET_FORWARD,
1571 .priority = 99, 1654 .priority = 99,
1572 }, 1655 },
1656 /* After packet filtering, change source only for VS/NAT */
1657 {
1658 .hook = ip_vs_reply6,
1659 .owner = THIS_MODULE,
1660 .pf = PF_INET6,
1661 .hooknum = NF_INET_FORWARD,
1662 .priority = 100,
1663 },
1573#endif 1664#endif
1574}; 1665};
1575 1666
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0b884d3e192..5f5daa30b0a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778 conn_flags |= IP_VS_CONN_F_INACTIVE; 778 conn_flags |= IP_VS_CONN_F_INACTIVE;
779 779
780 /* check if local node and update the flags */
781#ifdef CONFIG_IP_VS_IPV6
782 if (svc->af == AF_INET6) {
783 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
784 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
785 | IP_VS_CONN_F_LOCALNODE;
786 }
787 } else
788#endif
789 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
790 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
791 | IP_VS_CONN_F_LOCALNODE;
792 }
793
794 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
795 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
796 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
824 dest->u_threshold = udest->u_threshold; 810 dest->u_threshold = udest->u_threshold;
825 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
826 812
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
816
827 if (add) 817 if (add)
828 ip_vs_new_estimator(&dest->stats); 818 ip_vs_new_estimator(&dest->stats);
829 819
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 63cc0feaaef..8608882f89e 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -67,12 +67,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
67 return dst; 67 return dst;
68} 68}
69 69
70/*
71 * Get route to destination or remote server
72 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
73 * &4=Allow redirect from remote daddr to local
74 */
70static struct rtable * 75static struct rtable *
71__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) 76__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
77 __be32 daddr, u32 rtos, int rt_mode)
72{ 78{
73 struct net *net = dev_net(skb->dev); 79 struct net *net = dev_net(skb_dst(skb)->dev);
74 struct rtable *rt; /* Route to the other host */ 80 struct rtable *rt; /* Route to the other host */
75 struct ip_vs_dest *dest = cp->dest; 81 struct rtable *ort; /* Original route */
82 int local;
76 83
77 if (dest) { 84 if (dest) {
78 spin_lock(&dest->dst_lock); 85 spin_lock(&dest->dst_lock);
@@ -104,23 +111,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
104 .oif = 0, 111 .oif = 0,
105 .nl_u = { 112 .nl_u = {
106 .ip4_u = { 113 .ip4_u = {
107 .daddr = cp->daddr.ip, 114 .daddr = daddr,
108 .saddr = 0, 115 .saddr = 0,
109 .tos = rtos, } }, 116 .tos = rtos, } },
110 }; 117 };
111 118
112 if (ip_route_output_key(net, &rt, &fl)) { 119 if (ip_route_output_key(net, &rt, &fl)) {
113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 120 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
114 &cp->daddr.ip); 121 &daddr);
115 return NULL; 122 return NULL;
116 } 123 }
117 } 124 }
118 125
126 local = rt->rt_flags & RTCF_LOCAL;
127 if (!((local ? 1 : 2) & rt_mode)) {
128 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
129 (rt->rt_flags & RTCF_LOCAL) ?
130 "local":"non-local", &rt->rt_dst);
131 ip_rt_put(rt);
132 return NULL;
133 }
134 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
135 ort->rt_flags & RTCF_LOCAL)) {
136 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
137 "requires NAT method, dest: %pI4\n",
138 &ip_hdr(skb)->daddr, &rt->rt_dst);
139 ip_rt_put(rt);
140 return NULL;
141 }
142 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
143 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
144 "to non-local address, dest: %pI4\n",
145 &ip_hdr(skb)->saddr, &rt->rt_dst);
146 ip_rt_put(rt);
147 return NULL;
148 }
149
119 return rt; 150 return rt;
120} 151}
121 152
153/* Reroute packet to local IPv4 stack after DNAT */
154static int
155__ip_vs_reroute_locally(struct sk_buff *skb)
156{
157 struct rtable *rt = skb_rtable(skb);
158 struct net_device *dev = rt->dst.dev;
159 struct net *net = dev_net(dev);
160 struct iphdr *iph = ip_hdr(skb);
161
162 if (rt->fl.iif) {
163 unsigned long orefdst = skb->_skb_refdst;
164
165 if (ip_route_input(skb, iph->daddr, iph->saddr,
166 iph->tos, skb->dev))
167 return 0;
168 refdst_drop(orefdst);
169 } else {
170 struct flowi fl = {
171 .oif = 0,
172 .nl_u = {
173 .ip4_u = {
174 .daddr = iph->daddr,
175 .saddr = iph->saddr,
176 .tos = RT_TOS(iph->tos),
177 }
178 },
179 .mark = skb->mark,
180 };
181 struct rtable *rt;
182
183 if (ip_route_output_key(net, &rt, &fl))
184 return 0;
185 if (!(rt->rt_flags & RTCF_LOCAL)) {
186 ip_rt_put(rt);
187 return 0;
188 }
189 /* Drop old route. */
190 skb_dst_drop(skb);
191 skb_dst_set(skb, &rt->dst);
192 }
193 return 1;
194}
195
122#ifdef CONFIG_IP_VS_IPV6 196#ifdef CONFIG_IP_VS_IPV6
123 197
198static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
199{
200 return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
201}
202
124static struct dst_entry * 203static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 204__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm) 205 struct in6_addr *ret_saddr, int do_xfrm)
@@ -155,14 +234,21 @@ out_err:
155 return NULL; 234 return NULL;
156} 235}
157 236
237/*
238 * Get route to destination or remote server
239 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
240 * &4=Allow redirect from remote daddr to local
241 */
158static struct rt6_info * 242static struct rt6_info *
159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 243__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
160 struct in6_addr *ret_saddr, int do_xfrm) 244 struct in6_addr *daddr, struct in6_addr *ret_saddr,
245 int do_xfrm, int rt_mode)
161{ 246{
162 struct net *net = dev_net(skb->dev); 247 struct net *net = dev_net(skb_dst(skb)->dev);
163 struct rt6_info *rt; /* Route to the other host */ 248 struct rt6_info *rt; /* Route to the other host */
164 struct ip_vs_dest *dest = cp->dest; 249 struct rt6_info *ort; /* Original route */
165 struct dst_entry *dst; 250 struct dst_entry *dst;
251 int local;
166 252
167 if (dest) { 253 if (dest) {
168 spin_lock(&dest->dst_lock); 254 spin_lock(&dest->dst_lock);
@@ -188,13 +274,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr); 274 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
189 spin_unlock(&dest->dst_lock); 275 spin_unlock(&dest->dst_lock);
190 } else { 276 } else {
191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, 277 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
192 do_xfrm);
193 if (!dst) 278 if (!dst)
194 return NULL; 279 return NULL;
195 rt = (struct rt6_info *) dst; 280 rt = (struct rt6_info *) dst;
196 } 281 }
197 282
283 local = __ip_vs_is_local_route6(rt);
284 if (!((local ? 1 : 2) & rt_mode)) {
285 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
286 local ? "local":"non-local", daddr);
287 dst_release(&rt->dst);
288 return NULL;
289 }
290 if (local && !(rt_mode & 4) &&
291 !((ort = (struct rt6_info *) skb_dst(skb)) &&
292 __ip_vs_is_local_route6(ort))) {
293 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
294 "requires NAT method, dest: %pI6\n",
295 &ipv6_hdr(skb)->daddr, daddr);
296 dst_release(&rt->dst);
297 return NULL;
298 }
299 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
300 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
301 IPV6_ADDR_LOOPBACK)) {
302 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
303 "to non-local address, dest: %pI6\n",
304 &ipv6_hdr(skb)->saddr, daddr);
305 dst_release(&rt->dst);
306 return NULL;
307 }
308
198 return rt; 309 return rt;
199} 310}
200#endif 311#endif
@@ -227,23 +338,27 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
227 __ret; \ 338 __ret; \
228}) 339})
229 340
230#define IP_VS_XMIT_NAT(pf, skb, cp) \ 341#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
231do { \ 342do { \
232 (skb)->ipvs_property = 1; \ 343 (skb)->ipvs_property = 1; \
233 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 344 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
234 ip_vs_notrack(skb); \ 345 ip_vs_notrack(skb); \
235 else \ 346 else \
236 ip_vs_update_conntrack(skb, cp, 1); \ 347 ip_vs_update_conntrack(skb, cp, 1); \
348 if (local) \
349 return NF_ACCEPT; \
237 skb_forward_csum(skb); \ 350 skb_forward_csum(skb); \
238 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 351 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
239 skb_dst(skb)->dev, dst_output); \ 352 skb_dst(skb)->dev, dst_output); \
240} while (0) 353} while (0)
241 354
242#define IP_VS_XMIT(pf, skb, cp) \ 355#define IP_VS_XMIT(pf, skb, cp, local) \
243do { \ 356do { \
244 (skb)->ipvs_property = 1; \ 357 (skb)->ipvs_property = 1; \
245 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 358 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
246 ip_vs_notrack(skb); \ 359 ip_vs_notrack(skb); \
360 if (local) \
361 return NF_ACCEPT; \
247 skb_forward_csum(skb); \ 362 skb_forward_csum(skb); \
248 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 363 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
249 skb_dst(skb)->dev, dst_output); \ 364 skb_dst(skb)->dev, dst_output); \
@@ -258,7 +373,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
258 struct ip_vs_protocol *pp) 373 struct ip_vs_protocol *pp)
259{ 374{
260 /* we do not touch skb and do not need pskb ptr */ 375 /* we do not touch skb and do not need pskb ptr */
261 return NF_ACCEPT; 376 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
262} 377}
263 378
264 379
@@ -271,27 +386,15 @@ int
271ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 386ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
272 struct ip_vs_protocol *pp) 387 struct ip_vs_protocol *pp)
273{ 388{
274 struct net *net = dev_net(skb->dev);
275 struct rtable *rt; /* Route to the other host */ 389 struct rtable *rt; /* Route to the other host */
276 struct iphdr *iph = ip_hdr(skb); 390 struct iphdr *iph = ip_hdr(skb);
277 u8 tos = iph->tos;
278 int mtu; 391 int mtu;
279 struct flowi fl = {
280 .oif = 0,
281 .nl_u = {
282 .ip4_u = {
283 .daddr = iph->daddr,
284 .saddr = 0,
285 .tos = RT_TOS(tos), } },
286 };
287 392
288 EnterFunction(10); 393 EnterFunction(10);
289 394
290 if (ip_route_output_key(net, &rt, &fl)) { 395 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
291 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 396 RT_TOS(iph->tos), 2)))
292 __func__, &iph->daddr);
293 goto tx_error_icmp; 397 goto tx_error_icmp;
294 }
295 398
296 /* MTU checking */ 399 /* MTU checking */
297 mtu = dst_mtu(&rt->dst); 400 mtu = dst_mtu(&rt->dst);
@@ -319,7 +422,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
319 /* Another hack: avoid icmp_send in ip_fragment */ 422 /* Another hack: avoid icmp_send in ip_fragment */
320 skb->local_df = 1; 423 skb->local_df = 1;
321 424
322 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 425 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
323 426
324 LeaveFunction(10); 427 LeaveFunction(10);
325 return NF_STOLEN; 428 return NF_STOLEN;
@@ -337,18 +440,14 @@ int
337ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 440ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
338 struct ip_vs_protocol *pp) 441 struct ip_vs_protocol *pp)
339{ 442{
340 struct net *net = dev_net(skb->dev);
341 struct dst_entry *dst;
342 struct rt6_info *rt; /* Route to the other host */ 443 struct rt6_info *rt; /* Route to the other host */
343 struct ipv6hdr *iph = ipv6_hdr(skb); 444 struct ipv6hdr *iph = ipv6_hdr(skb);
344 int mtu; 445 int mtu;
345 446
346 EnterFunction(10); 447 EnterFunction(10);
347 448
348 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); 449 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
349 if (!dst)
350 goto tx_error_icmp; 450 goto tx_error_icmp;
351 rt = (struct rt6_info *) dst;
352 451
353 /* MTU checking */ 452 /* MTU checking */
354 mtu = dst_mtu(&rt->dst); 453 mtu = dst_mtu(&rt->dst);
@@ -376,7 +475,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
376 /* Another hack: avoid icmp_send in ip_fragment */ 475 /* Another hack: avoid icmp_send in ip_fragment */
377 skb->local_df = 1; 476 skb->local_df = 1;
378 477
379 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 478 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
380 479
381 LeaveFunction(10); 480 LeaveFunction(10);
382 return NF_STOLEN; 481 return NF_STOLEN;
@@ -401,6 +500,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
401 struct rtable *rt; /* Route to the other host */ 500 struct rtable *rt; /* Route to the other host */
402 int mtu; 501 int mtu;
403 struct iphdr *iph = ip_hdr(skb); 502 struct iphdr *iph = ip_hdr(skb);
503 int local;
404 504
405 EnterFunction(10); 505 EnterFunction(10);
406 506
@@ -414,16 +514,40 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
414 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 514 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
415 } 515 }
416 516
417 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) 517 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
518 RT_TOS(iph->tos), 1|2|4)))
418 goto tx_error_icmp; 519 goto tx_error_icmp;
520 local = rt->rt_flags & RTCF_LOCAL;
521 /*
522 * Avoid duplicate tuple in reply direction for NAT traffic
523 * to local address when connection is sync-ed
524 */
525#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
526 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
527 enum ip_conntrack_info ctinfo;
528 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
529
530 if (ct && !nf_ct_is_untracked(ct)) {
531 IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): "
532 "stopping DNAT to local address");
533 goto tx_error_put;
534 }
535 }
536#endif
537
538 /* From world but DNAT to loopback address? */
539 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
540 IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): "
541 "stopping DNAT to loopback address");
542 goto tx_error_put;
543 }
419 544
420 /* MTU checking */ 545 /* MTU checking */
421 mtu = dst_mtu(&rt->dst); 546 mtu = dst_mtu(&rt->dst);
422 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 547 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
423 ip_rt_put(rt);
424 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 548 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
425 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 549 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
426 goto tx_error; 550 goto tx_error_put;
427 } 551 }
428 552
429 /* copy-on-write the packet before mangling it */ 553 /* copy-on-write the packet before mangling it */
@@ -433,16 +557,27 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
433 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 557 if (skb_cow(skb, rt->dst.dev->hard_header_len))
434 goto tx_error_put; 558 goto tx_error_put;
435 559
436 /* drop old route */
437 skb_dst_drop(skb);
438 skb_dst_set(skb, &rt->dst);
439
440 /* mangle the packet */ 560 /* mangle the packet */
441 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 561 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
442 goto tx_error; 562 goto tx_error_put;
443 ip_hdr(skb)->daddr = cp->daddr.ip; 563 ip_hdr(skb)->daddr = cp->daddr.ip;
444 ip_send_check(ip_hdr(skb)); 564 ip_send_check(ip_hdr(skb));
445 565
566 if (!local) {
567 /* drop old route */
568 skb_dst_drop(skb);
569 skb_dst_set(skb, &rt->dst);
570 } else {
571 ip_rt_put(rt);
572 /*
573 * Some IPv4 replies get local address from routes,
574 * not from iph, so while we DNAT after routing
575 * we need this second input/output route.
576 */
577 if (!__ip_vs_reroute_locally(skb))
578 goto tx_error;
579 }
580
446 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 581 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
447 582
448 /* FIXME: when application helper enlarges the packet and the length 583 /* FIXME: when application helper enlarges the packet and the length
@@ -452,7 +587,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
452 /* Another hack: avoid icmp_send in ip_fragment */ 587 /* Another hack: avoid icmp_send in ip_fragment */
453 skb->local_df = 1; 588 skb->local_df = 1;
454 589
455 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); 590 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
456 591
457 LeaveFunction(10); 592 LeaveFunction(10);
458 return NF_STOLEN; 593 return NF_STOLEN;
@@ -475,6 +610,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
475{ 610{
476 struct rt6_info *rt; /* Route to the other host */ 611 struct rt6_info *rt; /* Route to the other host */
477 int mtu; 612 int mtu;
613 int local;
478 614
479 EnterFunction(10); 615 EnterFunction(10);
480 616
@@ -489,18 +625,44 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
489 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 625 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
490 } 626 }
491 627
492 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 628 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
493 if (!rt) 629 0, 1|2|4)))
494 goto tx_error_icmp; 630 goto tx_error_icmp;
631 local = __ip_vs_is_local_route6(rt);
632 /*
633 * Avoid duplicate tuple in reply direction for NAT traffic
634 * to local address when connection is sync-ed
635 */
636#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
637 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
638 enum ip_conntrack_info ctinfo;
639 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
640
641 if (ct && !nf_ct_is_untracked(ct)) {
642 IP_VS_DBG_RL_PKT(10, pp, skb, 0,
643 "ip_vs_nat_xmit_v6(): "
644 "stopping DNAT to local address");
645 goto tx_error_put;
646 }
647 }
648#endif
649
650 /* From world but DNAT to loopback address? */
651 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
652 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
653 IP_VS_DBG_RL_PKT(1, pp, skb, 0,
654 "ip_vs_nat_xmit_v6(): "
655 "stopping DNAT to loopback address");
656 goto tx_error_put;
657 }
495 658
496 /* MTU checking */ 659 /* MTU checking */
497 mtu = dst_mtu(&rt->dst); 660 mtu = dst_mtu(&rt->dst);
498 if (skb->len > mtu) { 661 if (skb->len > mtu) {
499 dst_release(&rt->dst);
500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 662 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
501 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 663 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
502 "ip_vs_nat_xmit_v6(): frag needed for"); 664 "ip_vs_nat_xmit_v6(): frag needed for");
503 goto tx_error; 665 goto tx_error_put;
504 } 666 }
505 667
506 /* copy-on-write the packet before mangling it */ 668 /* copy-on-write the packet before mangling it */
@@ -510,14 +672,19 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
510 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 672 if (skb_cow(skb, rt->dst.dev->hard_header_len))
511 goto tx_error_put; 673 goto tx_error_put;
512 674
513 /* drop old route */
514 skb_dst_drop(skb);
515 skb_dst_set(skb, &rt->dst);
516
517 /* mangle the packet */ 675 /* mangle the packet */
518 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 676 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
519 goto tx_error; 677 goto tx_error;
520 ipv6_hdr(skb)->daddr = cp->daddr.in6; 678 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
679
680 if (!local || !skb->dev) {
681 /* drop the old route when skb is not shared */
682 skb_dst_drop(skb);
683 skb_dst_set(skb, &rt->dst);
684 } else {
685 /* destined to loopback, do we need to change route? */
686 dst_release(&rt->dst);
687 }
521 688
522 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 689 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
523 690
@@ -528,7 +695,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
528 /* Another hack: avoid icmp_send in ip_fragment */ 695 /* Another hack: avoid icmp_send in ip_fragment */
529 skb->local_df = 1; 696 skb->local_df = 1;
530 697
531 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); 698 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
532 699
533 LeaveFunction(10); 700 LeaveFunction(10);
534 return NF_STOLEN; 701 return NF_STOLEN;
@@ -588,16 +755,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
588 goto tx_error; 755 goto tx_error;
589 } 756 }
590 757
591 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) 758 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
759 RT_TOS(tos), 1|2)))
592 goto tx_error_icmp; 760 goto tx_error_icmp;
761 if (rt->rt_flags & RTCF_LOCAL) {
762 ip_rt_put(rt);
763 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
764 }
593 765
594 tdev = rt->dst.dev; 766 tdev = rt->dst.dev;
595 767
596 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 768 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
597 if (mtu < 68) { 769 if (mtu < 68) {
598 ip_rt_put(rt);
599 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 770 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
600 goto tx_error; 771 goto tx_error_put;
601 } 772 }
602 if (skb_dst(skb)) 773 if (skb_dst(skb))
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 774 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -607,9 +778,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
607 if ((old_iph->frag_off & htons(IP_DF)) 778 if ((old_iph->frag_off & htons(IP_DF))
608 && mtu < ntohs(old_iph->tot_len)) { 779 && mtu < ntohs(old_iph->tot_len)) {
609 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 780 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
610 ip_rt_put(rt);
611 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 781 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
612 goto tx_error; 782 goto tx_error_put;
613 } 783 }
614 784
615 /* 785 /*
@@ -678,6 +848,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
678 kfree_skb(skb); 848 kfree_skb(skb);
679 LeaveFunction(10); 849 LeaveFunction(10);
680 return NF_STOLEN; 850 return NF_STOLEN;
851tx_error_put:
852 ip_rt_put(rt);
853 goto tx_error;
681} 854}
682 855
683#ifdef CONFIG_IP_VS_IPV6 856#ifdef CONFIG_IP_VS_IPV6
@@ -703,27 +876,29 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
703 goto tx_error; 876 goto tx_error;
704 } 877 }
705 878
706 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); 879 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
707 if (!rt) 880 &saddr, 1, 1|2)))
708 goto tx_error_icmp; 881 goto tx_error_icmp;
882 if (__ip_vs_is_local_route6(rt)) {
883 dst_release(&rt->dst);
884 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
885 }
709 886
710 tdev = rt->dst.dev; 887 tdev = rt->dst.dev;
711 888
712 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 889 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
713 if (mtu < IPV6_MIN_MTU) { 890 if (mtu < IPV6_MIN_MTU) {
714 dst_release(&rt->dst);
715 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 891 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
716 IPV6_MIN_MTU); 892 IPV6_MIN_MTU);
717 goto tx_error; 893 goto tx_error_put;
718 } 894 }
719 if (skb_dst(skb)) 895 if (skb_dst(skb))
720 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 896 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
721 897
722 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 898 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
723 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 899 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
724 dst_release(&rt->dst);
725 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 900 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
726 goto tx_error; 901 goto tx_error_put;
727 } 902 }
728 903
729 /* 904 /*
@@ -789,6 +964,9 @@ tx_error:
789 kfree_skb(skb); 964 kfree_skb(skb);
790 LeaveFunction(10); 965 LeaveFunction(10);
791 return NF_STOLEN; 966 return NF_STOLEN;
967tx_error_put:
968 dst_release(&rt->dst);
969 goto tx_error;
792} 970}
793#endif 971#endif
794 972
@@ -807,8 +985,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
807 985
808 EnterFunction(10); 986 EnterFunction(10);
809 987
810 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) 988 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
989 RT_TOS(iph->tos), 1|2)))
811 goto tx_error_icmp; 990 goto tx_error_icmp;
991 if (rt->rt_flags & RTCF_LOCAL) {
992 ip_rt_put(rt);
993 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
994 }
812 995
813 /* MTU checking */ 996 /* MTU checking */
814 mtu = dst_mtu(&rt->dst); 997 mtu = dst_mtu(&rt->dst);
@@ -836,7 +1019,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
836 /* Another hack: avoid icmp_send in ip_fragment */ 1019 /* Another hack: avoid icmp_send in ip_fragment */
837 skb->local_df = 1; 1020 skb->local_df = 1;
838 1021
839 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 1022 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
840 1023
841 LeaveFunction(10); 1024 LeaveFunction(10);
842 return NF_STOLEN; 1025 return NF_STOLEN;
@@ -859,9 +1042,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
859 1042
860 EnterFunction(10); 1043 EnterFunction(10);
861 1044
862 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 1045 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
863 if (!rt) 1046 0, 1|2)))
864 goto tx_error_icmp; 1047 goto tx_error_icmp;
1048 if (__ip_vs_is_local_route6(rt)) {
1049 dst_release(&rt->dst);
1050 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1051 }
865 1052
866 /* MTU checking */ 1053 /* MTU checking */
867 mtu = dst_mtu(&rt->dst); 1054 mtu = dst_mtu(&rt->dst);
@@ -889,7 +1076,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
889 /* Another hack: avoid icmp_send in ip_fragment */ 1076 /* Another hack: avoid icmp_send in ip_fragment */
890 skb->local_df = 1; 1077 skb->local_df = 1;
891 1078
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 1079 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
893 1080
894 LeaveFunction(10); 1081 LeaveFunction(10);
895 return NF_STOLEN; 1082 return NF_STOLEN;
@@ -915,6 +1102,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
915 struct rtable *rt; /* Route to the other host */ 1102 struct rtable *rt; /* Route to the other host */
916 int mtu; 1103 int mtu;
917 int rc; 1104 int rc;
1105 int local;
918 1106
919 EnterFunction(10); 1107 EnterFunction(10);
920 1108
@@ -935,16 +1123,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
935 * mangle and send the packet here (only for VS/NAT) 1123 * mangle and send the packet here (only for VS/NAT)
936 */ 1124 */
937 1125
938 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) 1126 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1127 RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
939 goto tx_error_icmp; 1128 goto tx_error_icmp;
1129 local = rt->rt_flags & RTCF_LOCAL;
1130
1131 /*
1132 * Avoid duplicate tuple in reply direction for NAT traffic
1133 * to local address when connection is sync-ed
1134 */
1135#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1136 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1137 enum ip_conntrack_info ctinfo;
1138 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1139
1140 if (ct && !nf_ct_is_untracked(ct)) {
1141 IP_VS_DBG(10, "%s(): "
1142 "stopping DNAT to local address %pI4\n",
1143 __func__, &cp->daddr.ip);
1144 goto tx_error_put;
1145 }
1146 }
1147#endif
1148
1149 /* From world but DNAT to loopback address? */
1150 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
1151 IP_VS_DBG(1, "%s(): "
1152 "stopping DNAT to loopback %pI4\n",
1153 __func__, &cp->daddr.ip);
1154 goto tx_error_put;
1155 }
940 1156
941 /* MTU checking */ 1157 /* MTU checking */
942 mtu = dst_mtu(&rt->dst); 1158 mtu = dst_mtu(&rt->dst);
943 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1159 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
944 ip_rt_put(rt);
945 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1160 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
946 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1161 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
947 goto tx_error; 1162 goto tx_error_put;
948 } 1163 }
949 1164
950 /* copy-on-write the packet before mangling it */ 1165 /* copy-on-write the packet before mangling it */
@@ -954,16 +1169,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
954 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1169 if (skb_cow(skb, rt->dst.dev->hard_header_len))
955 goto tx_error_put; 1170 goto tx_error_put;
956 1171
957 /* drop the old route when skb is not shared */
958 skb_dst_drop(skb);
959 skb_dst_set(skb, &rt->dst);
960
961 ip_vs_nat_icmp(skb, pp, cp, 0); 1172 ip_vs_nat_icmp(skb, pp, cp, 0);
962 1173
1174 if (!local) {
1175 /* drop the old route when skb is not shared */
1176 skb_dst_drop(skb);
1177 skb_dst_set(skb, &rt->dst);
1178 } else {
1179 ip_rt_put(rt);
1180 /*
1181 * Some IPv4 replies get local address from routes,
1182 * not from iph, so while we DNAT after routing
1183 * we need this second input/output route.
1184 */
1185 if (!__ip_vs_reroute_locally(skb))
1186 goto tx_error;
1187 }
1188
963 /* Another hack: avoid icmp_send in ip_fragment */ 1189 /* Another hack: avoid icmp_send in ip_fragment */
964 skb->local_df = 1; 1190 skb->local_df = 1;
965 1191
966 IP_VS_XMIT(NFPROTO_IPV4, skb, cp); 1192 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
967 1193
968 rc = NF_STOLEN; 1194 rc = NF_STOLEN;
969 goto out; 1195 goto out;
@@ -989,6 +1215,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
989 struct rt6_info *rt; /* Route to the other host */ 1215 struct rt6_info *rt; /* Route to the other host */
990 int mtu; 1216 int mtu;
991 int rc; 1217 int rc;
1218 int local;
992 1219
993 EnterFunction(10); 1220 EnterFunction(10);
994 1221
@@ -1009,17 +1236,44 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1009 * mangle and send the packet here (only for VS/NAT) 1236 * mangle and send the packet here (only for VS/NAT)
1010 */ 1237 */
1011 1238
1012 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); 1239 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1013 if (!rt) 1240 0, 1|2|4)))
1014 goto tx_error_icmp; 1241 goto tx_error_icmp;
1015 1242
1243 local = __ip_vs_is_local_route6(rt);
1244 /*
1245 * Avoid duplicate tuple in reply direction for NAT traffic
1246 * to local address when connection is sync-ed
1247 */
1248#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1249 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1250 enum ip_conntrack_info ctinfo;
1251 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1252
1253 if (ct && !nf_ct_is_untracked(ct)) {
1254 IP_VS_DBG(10, "%s(): "
1255 "stopping DNAT to local address %pI6\n",
1256 __func__, &cp->daddr.in6);
1257 goto tx_error_put;
1258 }
1259 }
1260#endif
1261
1262 /* From world but DNAT to loopback address? */
1263 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1264 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1265 IP_VS_DBG(1, "%s(): "
1266 "stopping DNAT to loopback %pI6\n",
1267 __func__, &cp->daddr.in6);
1268 goto tx_error_put;
1269 }
1270
1016 /* MTU checking */ 1271 /* MTU checking */
1017 mtu = dst_mtu(&rt->dst); 1272 mtu = dst_mtu(&rt->dst);
1018 if (skb->len > mtu) { 1273 if (skb->len > mtu) {
1019 dst_release(&rt->dst);
1020 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1274 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1021 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1275 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1022 goto tx_error; 1276 goto tx_error_put;
1023 } 1277 }
1024 1278
1025 /* copy-on-write the packet before mangling it */ 1279 /* copy-on-write the packet before mangling it */
@@ -1029,16 +1283,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1029 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1283 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1030 goto tx_error_put; 1284 goto tx_error_put;
1031 1285
1032 /* drop the old route when skb is not shared */
1033 skb_dst_drop(skb);
1034 skb_dst_set(skb, &rt->dst);
1035
1036 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1286 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1037 1287
1288 if (!local || !skb->dev) {
1289 /* drop the old route when skb is not shared */
1290 skb_dst_drop(skb);
1291 skb_dst_set(skb, &rt->dst);
1292 } else {
1293 /* destined to loopback, do we need to change route? */
1294 dst_release(&rt->dst);
1295 }
1296
1038 /* Another hack: avoid icmp_send in ip_fragment */ 1297 /* Another hack: avoid icmp_send in ip_fragment */
1039 skb->local_df = 1; 1298 skb->local_df = 1;
1040 1299
1041 IP_VS_XMIT(NFPROTO_IPV6, skb, cp); 1300 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1042 1301
1043 rc = NF_STOLEN; 1302 rc = NF_STOLEN;
1044 goto out; 1303 goto out;