aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c171
1 files changed, 92 insertions, 79 deletions
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 8817afa34e6a..b0bd8afbf368 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -26,6 +26,7 @@
26#include <net/route.h> /* for ip_route_output */ 26#include <net/route.h> /* for ip_route_output */
27#include <net/ipv6.h> 27#include <net/ipv6.h>
28#include <net/ip6_route.h> 28#include <net/ip6_route.h>
29#include <net/addrconf.h>
29#include <linux/icmpv6.h> 30#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 31#include <linux/netfilter.h>
31#include <linux/netfilter_ipv4.h> 32#include <linux/netfilter_ipv4.h>
@@ -37,26 +38,27 @@
37 * Destination cache to speed up outgoing route lookup 38 * Destination cache to speed up outgoing route lookup
38 */ 39 */
39static inline void 40static inline void
40__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) 41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
42 u32 dst_cookie)
41{ 43{
42 struct dst_entry *old_dst; 44 struct dst_entry *old_dst;
43 45
44 old_dst = dest->dst_cache; 46 old_dst = dest->dst_cache;
45 dest->dst_cache = dst; 47 dest->dst_cache = dst;
46 dest->dst_rtos = rtos; 48 dest->dst_rtos = rtos;
49 dest->dst_cookie = dst_cookie;
47 dst_release(old_dst); 50 dst_release(old_dst);
48} 51}
49 52
50static inline struct dst_entry * 53static inline struct dst_entry *
51__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) 54__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
52{ 55{
53 struct dst_entry *dst = dest->dst_cache; 56 struct dst_entry *dst = dest->dst_cache;
54 57
55 if (!dst) 58 if (!dst)
56 return NULL; 59 return NULL;
57 if ((dst->obsolete 60 if ((dst->obsolete || rtos != dest->dst_rtos) &&
58 || (dest->af == AF_INET && rtos != dest->dst_rtos)) && 61 dst->ops->check(dst, dest->dst_cookie) == NULL) {
59 dst->ops->check(dst, cookie) == NULL) {
60 dest->dst_cache = NULL; 62 dest->dst_cache = NULL;
61 dst_release(dst); 63 dst_release(dst);
62 return NULL; 64 return NULL;
@@ -66,15 +68,16 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
66} 68}
67 69
68static struct rtable * 70static struct rtable *
69__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 71__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
70{ 72{
73 struct net *net = dev_net(skb->dev);
71 struct rtable *rt; /* Route to the other host */ 74 struct rtable *rt; /* Route to the other host */
72 struct ip_vs_dest *dest = cp->dest; 75 struct ip_vs_dest *dest = cp->dest;
73 76
74 if (dest) { 77 if (dest) {
75 spin_lock(&dest->dst_lock); 78 spin_lock(&dest->dst_lock);
76 if (!(rt = (struct rtable *) 79 if (!(rt = (struct rtable *)
77 __ip_vs_dst_check(dest, rtos, 0))) { 80 __ip_vs_dst_check(dest, rtos))) {
78 struct flowi fl = { 81 struct flowi fl = {
79 .oif = 0, 82 .oif = 0,
80 .nl_u = { 83 .nl_u = {
@@ -84,13 +87,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
84 .tos = rtos, } }, 87 .tos = rtos, } },
85 }; 88 };
86 89
87 if (ip_route_output_key(&init_net, &rt, &fl)) { 90 if (ip_route_output_key(net, &rt, &fl)) {
88 spin_unlock(&dest->dst_lock); 91 spin_unlock(&dest->dst_lock);
89 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 92 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
90 &dest->addr.ip); 93 &dest->addr.ip);
91 return NULL; 94 return NULL;
92 } 95 }
93 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); 96 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
94 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 97 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
95 &dest->addr.ip, 98 &dest->addr.ip,
96 atomic_read(&rt->dst.__refcnt), rtos); 99 atomic_read(&rt->dst.__refcnt), rtos);
@@ -106,7 +109,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
106 .tos = rtos, } }, 109 .tos = rtos, } },
107 }; 110 };
108 111
109 if (ip_route_output_key(&init_net, &rt, &fl)) { 112 if (ip_route_output_key(net, &rt, &fl)) {
110 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 113 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
111 &cp->daddr.ip); 114 &cp->daddr.ip);
112 return NULL; 115 return NULL;
@@ -117,62 +120,79 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
117} 120}
118 121
119#ifdef CONFIG_IP_VS_IPV6 122#ifdef CONFIG_IP_VS_IPV6
123
124static struct dst_entry *
125__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
126 struct in6_addr *ret_saddr, int do_xfrm)
127{
128 struct dst_entry *dst;
129 struct flowi fl = {
130 .oif = 0,
131 .nl_u = {
132 .ip6_u = {
133 .daddr = *daddr,
134 },
135 },
136 };
137
138 dst = ip6_route_output(net, NULL, &fl);
139 if (dst->error)
140 goto out_err;
141 if (!ret_saddr)
142 return dst;
143 if (ipv6_addr_any(&fl.fl6_src) &&
144 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
145 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
146 goto out_err;
147 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
148 goto out_err;
149 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
150 return dst;
151
152out_err:
153 dst_release(dst);
154 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
155 return NULL;
156}
157
120static struct rt6_info * 158static struct rt6_info *
121__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) 159__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
160 struct in6_addr *ret_saddr, int do_xfrm)
122{ 161{
162 struct net *net = dev_net(skb->dev);
123 struct rt6_info *rt; /* Route to the other host */ 163 struct rt6_info *rt; /* Route to the other host */
124 struct ip_vs_dest *dest = cp->dest; 164 struct ip_vs_dest *dest = cp->dest;
165 struct dst_entry *dst;
125 166
126 if (dest) { 167 if (dest) {
127 spin_lock(&dest->dst_lock); 168 spin_lock(&dest->dst_lock);
128 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); 169 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
129 if (!rt) { 170 if (!rt) {
130 struct flowi fl = { 171 u32 cookie;
131 .oif = 0,
132 .nl_u = {
133 .ip6_u = {
134 .daddr = dest->addr.in6,
135 .saddr = {
136 .s6_addr32 =
137 { 0, 0, 0, 0 },
138 },
139 },
140 },
141 };
142 172
143 rt = (struct rt6_info *)ip6_route_output(&init_net, 173 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
144 NULL, &fl); 174 &dest->dst_saddr,
145 if (!rt) { 175 do_xfrm);
176 if (!dst) {
146 spin_unlock(&dest->dst_lock); 177 spin_unlock(&dest->dst_lock);
147 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
148 &dest->addr.in6);
149 return NULL; 178 return NULL;
150 } 179 }
151 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); 180 rt = (struct rt6_info *) dst;
152 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 181 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
153 &dest->addr.in6, 182 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
183 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
184 &dest->addr.in6, &dest->dst_saddr,
154 atomic_read(&rt->dst.__refcnt)); 185 atomic_read(&rt->dst.__refcnt));
155 } 186 }
187 if (ret_saddr)
188 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
156 spin_unlock(&dest->dst_lock); 189 spin_unlock(&dest->dst_lock);
157 } else { 190 } else {
158 struct flowi fl = { 191 dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
159 .oif = 0, 192 do_xfrm);
160 .nl_u = { 193 if (!dst)
161 .ip6_u = {
162 .daddr = cp->daddr.in6,
163 .saddr = {
164 .s6_addr32 = { 0, 0, 0, 0 },
165 },
166 },
167 },
168 };
169
170 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
171 if (!rt) {
172 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
173 &cp->daddr.in6);
174 return NULL; 194 return NULL;
175 } 195 rt = (struct rt6_info *) dst;
176 } 196 }
177 197
178 return rt; 198 return rt;
@@ -248,6 +268,7 @@ int
248ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 268ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
249 struct ip_vs_protocol *pp) 269 struct ip_vs_protocol *pp)
250{ 270{
271 struct net *net = dev_net(skb->dev);
251 struct rtable *rt; /* Route to the other host */ 272 struct rtable *rt; /* Route to the other host */
252 struct iphdr *iph = ip_hdr(skb); 273 struct iphdr *iph = ip_hdr(skb);
253 u8 tos = iph->tos; 274 u8 tos = iph->tos;
@@ -263,7 +284,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
263 284
264 EnterFunction(10); 285 EnterFunction(10);
265 286
266 if (ip_route_output_key(&init_net, &rt, &fl)) { 287 if (ip_route_output_key(net, &rt, &fl)) {
267 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 288 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
268 __func__, &iph->daddr); 289 __func__, &iph->daddr);
269 goto tx_error_icmp; 290 goto tx_error_icmp;
@@ -313,25 +334,18 @@ int
313ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 334ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
314 struct ip_vs_protocol *pp) 335 struct ip_vs_protocol *pp)
315{ 336{
337 struct net *net = dev_net(skb->dev);
338 struct dst_entry *dst;
316 struct rt6_info *rt; /* Route to the other host */ 339 struct rt6_info *rt; /* Route to the other host */
317 struct ipv6hdr *iph = ipv6_hdr(skb); 340 struct ipv6hdr *iph = ipv6_hdr(skb);
318 int mtu; 341 int mtu;
319 struct flowi fl = {
320 .oif = 0,
321 .nl_u = {
322 .ip6_u = {
323 .daddr = iph->daddr,
324 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
325 };
326 342
327 EnterFunction(10); 343 EnterFunction(10);
328 344
329 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 345 dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
330 if (!rt) { 346 if (!dst)
331 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
332 __func__, &iph->daddr);
333 goto tx_error_icmp; 347 goto tx_error_icmp;
334 } 348 rt = (struct rt6_info *) dst;
335 349
336 /* MTU checking */ 350 /* MTU checking */
337 mtu = dst_mtu(&rt->dst); 351 mtu = dst_mtu(&rt->dst);
@@ -397,7 +411,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
397 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 411 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
398 } 412 }
399 413
400 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 414 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
401 goto tx_error_icmp; 415 goto tx_error_icmp;
402 416
403 /* MTU checking */ 417 /* MTU checking */
@@ -472,7 +486,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
472 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 486 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
473 } 487 }
474 488
475 rt = __ip_vs_get_out_rt_v6(cp); 489 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
476 if (!rt) 490 if (!rt)
477 goto tx_error_icmp; 491 goto tx_error_icmp;
478 492
@@ -557,7 +571,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
557 struct iphdr *old_iph = ip_hdr(skb); 571 struct iphdr *old_iph = ip_hdr(skb);
558 u8 tos = old_iph->tos; 572 u8 tos = old_iph->tos;
559 __be16 df = old_iph->frag_off; 573 __be16 df = old_iph->frag_off;
560 sk_buff_data_t old_transport_header = skb->transport_header;
561 struct iphdr *iph; /* Our new IP header */ 574 struct iphdr *iph; /* Our new IP header */
562 unsigned int max_headroom; /* The extra header space needed */ 575 unsigned int max_headroom; /* The extra header space needed */
563 int mtu; 576 int mtu;
@@ -572,7 +585,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
572 goto tx_error; 585 goto tx_error;
573 } 586 }
574 587
575 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) 588 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
576 goto tx_error_icmp; 589 goto tx_error_icmp;
577 590
578 tdev = rt->dst.dev; 591 tdev = rt->dst.dev;
@@ -616,7 +629,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
616 old_iph = ip_hdr(skb); 629 old_iph = ip_hdr(skb);
617 } 630 }
618 631
619 skb->transport_header = old_transport_header; 632 skb->transport_header = skb->network_header;
620 633
621 /* fix old IP header checksum */ 634 /* fix old IP header checksum */
622 ip_send_check(old_iph); 635 ip_send_check(old_iph);
@@ -670,9 +683,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
670 struct ip_vs_protocol *pp) 683 struct ip_vs_protocol *pp)
671{ 684{
672 struct rt6_info *rt; /* Route to the other host */ 685 struct rt6_info *rt; /* Route to the other host */
686 struct in6_addr saddr; /* Source for tunnel */
673 struct net_device *tdev; /* Device to other host */ 687 struct net_device *tdev; /* Device to other host */
674 struct ipv6hdr *old_iph = ipv6_hdr(skb); 688 struct ipv6hdr *old_iph = ipv6_hdr(skb);
675 sk_buff_data_t old_transport_header = skb->transport_header;
676 struct ipv6hdr *iph; /* Our new IP header */ 689 struct ipv6hdr *iph; /* Our new IP header */
677 unsigned int max_headroom; /* The extra header space needed */ 690 unsigned int max_headroom; /* The extra header space needed */
678 int mtu; 691 int mtu;
@@ -687,17 +700,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
687 goto tx_error; 700 goto tx_error;
688 } 701 }
689 702
690 rt = __ip_vs_get_out_rt_v6(cp); 703 rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
691 if (!rt) 704 if (!rt)
692 goto tx_error_icmp; 705 goto tx_error_icmp;
693 706
694 tdev = rt->dst.dev; 707 tdev = rt->dst.dev;
695 708
696 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 709 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
697 /* TODO IPv6: do we need this check in IPv6? */ 710 if (mtu < IPV6_MIN_MTU) {
698 if (mtu < 1280) {
699 dst_release(&rt->dst); 711 dst_release(&rt->dst);
700 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 712 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
713 IPV6_MIN_MTU);
701 goto tx_error; 714 goto tx_error;
702 } 715 }
703 if (skb_dst(skb)) 716 if (skb_dst(skb))
@@ -730,7 +743,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
730 old_iph = ipv6_hdr(skb); 743 old_iph = ipv6_hdr(skb);
731 } 744 }
732 745
733 skb->transport_header = old_transport_header; 746 skb->transport_header = skb->network_header;
734 747
735 skb_push(skb, sizeof(struct ipv6hdr)); 748 skb_push(skb, sizeof(struct ipv6hdr));
736 skb_reset_network_header(skb); 749 skb_reset_network_header(skb);
@@ -750,8 +763,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
750 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 763 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
751 iph->priority = old_iph->priority; 764 iph->priority = old_iph->priority;
752 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 765 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
753 iph->daddr = rt->rt6i_dst.addr; 766 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
754 iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ 767 ipv6_addr_copy(&iph->saddr, &saddr);
755 iph->hop_limit = old_iph->hop_limit; 768 iph->hop_limit = old_iph->hop_limit;
756 769
757 /* Another hack: avoid icmp_send in ip_fragment */ 770 /* Another hack: avoid icmp_send in ip_fragment */
@@ -791,7 +804,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
791 804
792 EnterFunction(10); 805 EnterFunction(10);
793 806
794 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 807 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
795 goto tx_error_icmp; 808 goto tx_error_icmp;
796 809
797 /* MTU checking */ 810 /* MTU checking */
@@ -843,7 +856,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
843 856
844 EnterFunction(10); 857 EnterFunction(10);
845 858
846 rt = __ip_vs_get_out_rt_v6(cp); 859 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
847 if (!rt) 860 if (!rt)
848 goto tx_error_icmp; 861 goto tx_error_icmp;
849 862
@@ -919,7 +932,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
919 * mangle and send the packet here (only for VS/NAT) 932 * mangle and send the packet here (only for VS/NAT)
920 */ 933 */
921 934
922 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) 935 if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
923 goto tx_error_icmp; 936 goto tx_error_icmp;
924 937
925 /* MTU checking */ 938 /* MTU checking */
@@ -993,7 +1006,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
993 * mangle and send the packet here (only for VS/NAT) 1006 * mangle and send the packet here (only for VS/NAT)
994 */ 1007 */
995 1008
996 rt = __ip_vs_get_out_rt_v6(cp); 1009 rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
997 if (!rt) 1010 if (!rt)
998 goto tx_error_icmp; 1011 goto tx_error_icmp;
999 1012