aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/ip_vs.h195
-rw-r--r--net/netfilter/ipvs/Kconfig7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c404
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c41
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c73
-rw-r--r--net/netfilter/xt_ipvs.c4
17 files changed, 502 insertions, 362 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ee75ccdf5188..68c69d54d392 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -22,7 +22,10 @@
22#include <linux/ip.h> 22#include <linux/ip.h>
23#include <linux/ipv6.h> /* for struct ipv6hdr */ 23#include <linux/ipv6.h> /* for struct ipv6hdr */
24#include <net/ipv6.h> 24#include <net/ipv6.h>
25#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 25#if IS_ENABLED(CONFIG_IP_VS_IPV6)
26#include <linux/netfilter_ipv6/ip6_tables.h>
27#endif
28#if IS_ENABLED(CONFIG_NF_CONNTRACK)
26#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
27#endif 30#endif
28#include <net/net_namespace.h> /* Netw namespace */ 31#include <net/net_namespace.h> /* Netw namespace */
@@ -103,30 +106,117 @@ static inline struct net *seq_file_single_net(struct seq_file *seq)
103/* Connections' size value needed by ip_vs_ctl.c */ 106/* Connections' size value needed by ip_vs_ctl.c */
104extern int ip_vs_conn_tab_size; 107extern int ip_vs_conn_tab_size;
105 108
106
107struct ip_vs_iphdr { 109struct ip_vs_iphdr {
108 int len; 110 __u32 len; /* IPv4 simply where L4 starts
109 __u8 protocol; 111 IPv6 where L4 Transport Header starts */
112 __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
113 __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
114 __s16 protocol;
115 __s32 flags;
110 union nf_inet_addr saddr; 116 union nf_inet_addr saddr;
111 union nf_inet_addr daddr; 117 union nf_inet_addr daddr;
112}; 118};
113 119
120/* Dependency to module: nf_defrag_ipv6 */
121#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
122static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
123{
124 return skb->nfct_reasm;
125}
126static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
127 int len, void *buffer,
128 const struct ip_vs_iphdr *ipvsh)
129{
130 if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
131 return skb_header_pointer(skb_nfct_reasm(skb),
132 ipvsh->thoff_reasm, len, buffer);
133
134 return skb_header_pointer(skb, offset, len, buffer);
135}
136#else
137static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
138{
139 return NULL;
140}
141static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
142 int len, void *buffer,
143 const struct ip_vs_iphdr *ipvsh)
144{
145 return skb_header_pointer(skb, offset, len, buffer);
146}
147#endif
148
114static inline void 149static inline void
115ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) 150ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
151{
152 const struct iphdr *iph = nh;
153
154 iphdr->len = iph->ihl * 4;
155 iphdr->fragoffs = 0;
156 iphdr->protocol = iph->protocol;
157 iphdr->saddr.ip = iph->saddr;
158 iphdr->daddr.ip = iph->daddr;
159}
160
161/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
162 * IPv6 requires some extra work, as finding proper header position,
163 * depend on the IPv6 extension headers.
164 */
165static inline void
166ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
116{ 167{
117#ifdef CONFIG_IP_VS_IPV6 168#ifdef CONFIG_IP_VS_IPV6
118 if (af == AF_INET6) { 169 if (af == AF_INET6) {
119 const struct ipv6hdr *iph = nh; 170 const struct ipv6hdr *iph =
120 iphdr->len = sizeof(struct ipv6hdr); 171 (struct ipv6hdr *)skb_network_header(skb);
121 iphdr->protocol = iph->nexthdr;
122 iphdr->saddr.in6 = iph->saddr; 172 iphdr->saddr.in6 = iph->saddr;
123 iphdr->daddr.in6 = iph->daddr; 173 iphdr->daddr.in6 = iph->daddr;
174 /* ipv6_find_hdr() updates len, flags, thoff_reasm */
175 iphdr->thoff_reasm = 0;
176 iphdr->len = 0;
177 iphdr->flags = 0;
178 iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
179 &iphdr->fragoffs,
180 &iphdr->flags);
181 /* get proto from re-assembled packet and it's offset */
182 if (skb_nfct_reasm(skb))
183 iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
184 &iphdr->thoff_reasm,
185 -1, NULL, NULL);
186
124 } else 187 } else
125#endif 188#endif
126 { 189 {
127 const struct iphdr *iph = nh; 190 const struct iphdr *iph =
128 iphdr->len = iph->ihl * 4; 191 (struct iphdr *)skb_network_header(skb);
129 iphdr->protocol = iph->protocol; 192 iphdr->len = iph->ihl * 4;
193 iphdr->fragoffs = 0;
194 iphdr->protocol = iph->protocol;
195 iphdr->saddr.ip = iph->saddr;
196 iphdr->daddr.ip = iph->daddr;
197 }
198}
199
200/* This function is a faster version of ip_vs_fill_iph_skb().
201 * Where we only populate {s,d}addr (and avoid calling ipv6_find_hdr()).
202 * This is used by the some of the ip_vs_*_schedule() functions.
203 * (Mostly done to avoid ABI breakage of external schedulers)
204 */
205static inline void
206ip_vs_fill_iph_addr_only(int af, const struct sk_buff *skb,
207 struct ip_vs_iphdr *iphdr)
208{
209#ifdef CONFIG_IP_VS_IPV6
210 if (af == AF_INET6) {
211 const struct ipv6hdr *iph =
212 (struct ipv6hdr *)skb_network_header(skb);
213 iphdr->saddr.in6 = iph->saddr;
214 iphdr->daddr.in6 = iph->daddr;
215 } else
216#endif
217 {
218 const struct iphdr *iph =
219 (struct iphdr *)skb_network_header(skb);
130 iphdr->saddr.ip = iph->saddr; 220 iphdr->saddr.ip = iph->saddr;
131 iphdr->daddr.ip = iph->daddr; 221 iphdr->daddr.ip = iph->daddr;
132 } 222 }
@@ -165,7 +255,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
165 int len; 255 int len;
166#ifdef CONFIG_IP_VS_IPV6 256#ifdef CONFIG_IP_VS_IPV6
167 if (af == AF_INET6) 257 if (af == AF_INET6)
168 len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", 258 len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
169 &addr->in6) + 1; 259 &addr->in6) + 1;
170 else 260 else
171#endif 261#endif
@@ -398,27 +488,26 @@ struct ip_vs_protocol {
398 488
399 int (*conn_schedule)(int af, struct sk_buff *skb, 489 int (*conn_schedule)(int af, struct sk_buff *skb,
400 struct ip_vs_proto_data *pd, 490 struct ip_vs_proto_data *pd,
401 int *verdict, struct ip_vs_conn **cpp); 491 int *verdict, struct ip_vs_conn **cpp,
492 struct ip_vs_iphdr *iph);
402 493
403 struct ip_vs_conn * 494 struct ip_vs_conn *
404 (*conn_in_get)(int af, 495 (*conn_in_get)(int af,
405 const struct sk_buff *skb, 496 const struct sk_buff *skb,
406 const struct ip_vs_iphdr *iph, 497 const struct ip_vs_iphdr *iph,
407 unsigned int proto_off,
408 int inverse); 498 int inverse);
409 499
410 struct ip_vs_conn * 500 struct ip_vs_conn *
411 (*conn_out_get)(int af, 501 (*conn_out_get)(int af,
412 const struct sk_buff *skb, 502 const struct sk_buff *skb,
413 const struct ip_vs_iphdr *iph, 503 const struct ip_vs_iphdr *iph,
414 unsigned int proto_off,
415 int inverse); 504 int inverse);
416 505
417 int (*snat_handler)(struct sk_buff *skb, 506 int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
418 struct ip_vs_protocol *pp, struct ip_vs_conn *cp); 507 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
419 508
420 int (*dnat_handler)(struct sk_buff *skb, 509 int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
421 struct ip_vs_protocol *pp, struct ip_vs_conn *cp); 510 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
422 511
423 int (*csum_check)(int af, struct sk_buff *skb, 512 int (*csum_check)(int af, struct sk_buff *skb,
424 struct ip_vs_protocol *pp); 513 struct ip_vs_protocol *pp);
@@ -518,7 +607,7 @@ struct ip_vs_conn {
518 NF_ACCEPT can be returned when destination is local. 607 NF_ACCEPT can be returned when destination is local.
519 */ 608 */
520 int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, 609 int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
521 struct ip_vs_protocol *pp); 610 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
522 611
523 /* Note: we can group the following members into a structure, 612 /* Note: we can group the following members into a structure,
524 in order to save more space, and the following members are 613 in order to save more space, and the following members are
@@ -769,13 +858,11 @@ struct ip_vs_app {
769 858
770 struct ip_vs_conn * 859 struct ip_vs_conn *
771 (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app, 860 (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
772 const struct iphdr *iph, unsigned int proto_off, 861 const struct iphdr *iph, int inverse);
773 int inverse);
774 862
775 struct ip_vs_conn * 863 struct ip_vs_conn *
776 (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app, 864 (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
777 const struct iphdr *iph, unsigned int proto_off, 865 const struct iphdr *iph, int inverse);
778 int inverse);
779 866
780 int (*state_transition)(struct ip_vs_conn *cp, int direction, 867 int (*state_transition)(struct ip_vs_conn *cp, int direction,
781 const struct sk_buff *skb, 868 const struct sk_buff *skb,
@@ -1074,14 +1161,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1074 1161
1075struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 1162struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
1076 const struct ip_vs_iphdr *iph, 1163 const struct ip_vs_iphdr *iph,
1077 unsigned int proto_off,
1078 int inverse); 1164 int inverse);
1079 1165
1080struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); 1166struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1081 1167
1082struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 1168struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
1083 const struct ip_vs_iphdr *iph, 1169 const struct ip_vs_iphdr *iph,
1084 unsigned int proto_off,
1085 int inverse); 1170 int inverse);
1086 1171
1087/* put back the conn without restarting its timer */ 1172/* put back the conn without restarting its timer */
@@ -1254,9 +1339,10 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1254extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); 1339extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1255extern struct ip_vs_conn * 1340extern struct ip_vs_conn *
1256ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 1341ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1257 struct ip_vs_proto_data *pd, int *ignored); 1342 struct ip_vs_proto_data *pd, int *ignored,
1343 struct ip_vs_iphdr *iph);
1258extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 1344extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1259 struct ip_vs_proto_data *pd); 1345 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph);
1260 1346
1261extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); 1347extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1262 1348
@@ -1315,33 +1401,38 @@ extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
1315/* 1401/*
1316 * Various IPVS packet transmitters (from ip_vs_xmit.c) 1402 * Various IPVS packet transmitters (from ip_vs_xmit.c)
1317 */ 1403 */
1318extern int ip_vs_null_xmit 1404extern int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1319(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1405 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1320extern int ip_vs_bypass_xmit 1406extern int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1321(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1407 struct ip_vs_protocol *pp,
1322extern int ip_vs_nat_xmit 1408 struct ip_vs_iphdr *iph);
1323(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1409extern int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1324extern int ip_vs_tunnel_xmit 1410 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1325(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1411extern int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1326extern int ip_vs_dr_xmit 1412 struct ip_vs_protocol *pp,
1327(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1413 struct ip_vs_iphdr *iph);
1328extern int ip_vs_icmp_xmit 1414extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1329(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, 1415 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1330 int offset, unsigned int hooknum); 1416extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1417 struct ip_vs_protocol *pp, int offset,
1418 unsigned int hooknum, struct ip_vs_iphdr *iph);
1331extern void ip_vs_dst_reset(struct ip_vs_dest *dest); 1419extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
1332 1420
1333#ifdef CONFIG_IP_VS_IPV6 1421#ifdef CONFIG_IP_VS_IPV6
1334extern int ip_vs_bypass_xmit_v6 1422extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1335(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1423 struct ip_vs_protocol *pp,
1336extern int ip_vs_nat_xmit_v6 1424 struct ip_vs_iphdr *iph);
1337(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1425extern int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1338extern int ip_vs_tunnel_xmit_v6 1426 struct ip_vs_protocol *pp,
1339(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1427 struct ip_vs_iphdr *iph);
1340extern int ip_vs_dr_xmit_v6 1428extern int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1341(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1429 struct ip_vs_protocol *pp,
1342extern int ip_vs_icmp_xmit_v6 1430 struct ip_vs_iphdr *iph);
1343(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, 1431extern int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1344 int offset, unsigned int hooknum); 1432 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1433extern int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1434 struct ip_vs_protocol *pp, int offset,
1435 unsigned int hooknum, struct ip_vs_iphdr *iph);
1345#endif 1436#endif
1346 1437
1347#ifdef CONFIG_SYSCTL 1438#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 8b2cffdfdd99..0c3b1670b0d1 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -28,12 +28,11 @@ if IP_VS
28config IP_VS_IPV6 28config IP_VS_IPV6
29 bool "IPv6 support for IPVS" 29 bool "IPv6 support for IPVS"
30 depends on IPV6 = y || IP_VS = IPV6 30 depends on IPV6 = y || IP_VS = IPV6
31 select IP6_NF_IPTABLES
31 ---help--- 32 ---help---
32 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 33 Add IPv6 support to IPVS.
33 34
34 See http://www.mindbasket.com/ipvs for more information. 35 Say Y if unsure.
35
36 Say N if unsure.
37 36
38config IP_VS_DEBUG 37config IP_VS_DEBUG
39 bool "IP virtual server debugging" 38 bool "IP virtual server debugging"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 1548df9a7524..30e764ad021f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
308static int 308static int
309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, 309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
310 const struct ip_vs_iphdr *iph, 310 const struct ip_vs_iphdr *iph,
311 unsigned int proto_off, int inverse, 311 int inverse, struct ip_vs_conn_param *p)
312 struct ip_vs_conn_param *p)
313{ 312{
314 __be16 _ports[2], *pptr; 313 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb); 314 struct net *net = skb_net(skb);
316 315
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
318 if (pptr == NULL) 317 if (pptr == NULL)
319 return 1; 318 return 1;
320 319
@@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
329 328
330struct ip_vs_conn * 329struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 const struct ip_vs_iphdr *iph, 331 const struct ip_vs_iphdr *iph, int inverse)
333 unsigned int proto_off, int inverse)
334{ 332{
335 struct ip_vs_conn_param p; 333 struct ip_vs_conn_param p;
336 334
337 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 335 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
338 return NULL; 336 return NULL;
339 337
340 return ip_vs_conn_in_get(&p); 338 return ip_vs_conn_in_get(&p);
@@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
432 430
433struct ip_vs_conn * 431struct ip_vs_conn *
434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 432ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
435 const struct ip_vs_iphdr *iph, 433 const struct ip_vs_iphdr *iph, int inverse)
436 unsigned int proto_off, int inverse)
437{ 434{
438 struct ip_vs_conn_param p; 435 struct ip_vs_conn_param p;
439 436
440 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
441 return NULL; 438 return NULL;
442 439
443 return ip_vs_conn_out_get(&p); 440 return ip_vs_conn_out_get(&p);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58918e20f9d5..fb45640dc1fb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
222 */ 222 */
223static struct ip_vs_conn * 223static struct ip_vs_conn *
224ip_vs_sched_persist(struct ip_vs_service *svc, 224ip_vs_sched_persist(struct ip_vs_service *svc,
225 struct sk_buff *skb, 225 struct sk_buff *skb, __be16 src_port, __be16 dst_port,
226 __be16 src_port, __be16 dst_port, int *ignored) 226 int *ignored, struct ip_vs_iphdr *iph)
227{ 227{
228 struct ip_vs_conn *cp = NULL; 228 struct ip_vs_conn *cp = NULL;
229 struct ip_vs_iphdr iph;
230 struct ip_vs_dest *dest; 229 struct ip_vs_dest *dest;
231 struct ip_vs_conn *ct; 230 struct ip_vs_conn *ct;
232 __be16 dport = 0; /* destination port to forward */ 231 __be16 dport = 0; /* destination port to forward */
@@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
236 union nf_inet_addr snet; /* source network of the client, 235 union nf_inet_addr snet; /* source network of the client,
237 after masking */ 236 after masking */
238 237
239 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
240
241 /* Mask saddr with the netmask to adjust template granularity */ 238 /* Mask saddr with the netmask to adjust template granularity */
242#ifdef CONFIG_IP_VS_IPV6 239#ifdef CONFIG_IP_VS_IPV6
243 if (svc->af == AF_INET6) 240 if (svc->af == AF_INET6)
244 ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); 241 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
245 else 242 else
246#endif 243#endif
247 snet.ip = iph.saddr.ip & svc->netmask; 244 snet.ip = iph->saddr.ip & svc->netmask;
248 245
249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 246 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
250 "mnet %s\n", 247 "mnet %s\n",
251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), 248 IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), 249 IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
253 IP_VS_DBG_ADDR(svc->af, &snet)); 250 IP_VS_DBG_ADDR(svc->af, &snet));
254 251
255 /* 252 /*
@@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
266 * is created for other persistent services. 263 * is created for other persistent services.
267 */ 264 */
268 { 265 {
269 int protocol = iph.protocol; 266 int protocol = iph->protocol;
270 const union nf_inet_addr *vaddr = &iph.daddr; 267 const union nf_inet_addr *vaddr = &iph->daddr;
271 __be16 vport = 0; 268 __be16 vport = 0;
272 269
273 if (dst_port == svc->port) { 270 if (dst_port == svc->port) {
@@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
342 dport = dest->port; 339 dport = dest->port;
343 340
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 341 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)? 342 && iph->protocol == IPPROTO_UDP) ?
346 IP_VS_CONN_F_ONE_PACKET : 0; 343 IP_VS_CONN_F_ONE_PACKET : 0;
347 344
348 /* 345 /*
349 * Create a new connection according to the template 346 * Create a new connection according to the template
350 */ 347 */
351 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, 348 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
352 src_port, &iph.daddr, dst_port, &param); 349 src_port, &iph->daddr, dst_port, &param);
353 350
354 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); 351 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
355 if (cp == NULL) { 352 if (cp == NULL) {
@@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
392 */ 389 */
393struct ip_vs_conn * 390struct ip_vs_conn *
394ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 391ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
395 struct ip_vs_proto_data *pd, int *ignored) 392 struct ip_vs_proto_data *pd, int *ignored,
393 struct ip_vs_iphdr *iph)
396{ 394{
397 struct ip_vs_protocol *pp = pd->pp; 395 struct ip_vs_protocol *pp = pd->pp;
398 struct ip_vs_conn *cp = NULL; 396 struct ip_vs_conn *cp = NULL;
399 struct ip_vs_iphdr iph;
400 struct ip_vs_dest *dest; 397 struct ip_vs_dest *dest;
401 __be16 _ports[2], *pptr; 398 __be16 _ports[2], *pptr;
402 unsigned int flags; 399 unsigned int flags;
403 400
404 *ignored = 1; 401 *ignored = 1;
405 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 402 /*
406 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 403 * IPv6 frags, only the first hit here.
404 */
405 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
407 if (pptr == NULL) 406 if (pptr == NULL)
408 return NULL; 407 return NULL;
409 408
@@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
423 * Do not schedule replies from local real server. 422 * Do not schedule replies from local real server.
424 */ 423 */
425 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 424 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
426 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { 425 (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
427 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 426 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
428 "Not scheduling reply for existing connection"); 427 "Not scheduling reply for existing connection");
429 __ip_vs_conn_put(cp); 428 __ip_vs_conn_put(cp);
@@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
434 * Persistent service 433 * Persistent service
435 */ 434 */
436 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 435 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
437 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); 436 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
437 iph);
438 438
439 *ignored = 0; 439 *ignored = 0;
440 440
@@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
456 } 456 }
457 457
458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
459 && iph.protocol == IPPROTO_UDP)? 459 && iph->protocol == IPPROTO_UDP) ?
460 IP_VS_CONN_F_ONE_PACKET : 0; 460 IP_VS_CONN_F_ONE_PACKET : 0;
461 461
462 /* 462 /*
@@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
465 { 465 {
466 struct ip_vs_conn_param p; 466 struct ip_vs_conn_param p;
467 467
468 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 468 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
469 &iph.saddr, pptr[0], &iph.daddr, pptr[1], 469 &iph->saddr, pptr[0], &iph->daddr,
470 &p); 470 pptr[1], &p);
471 cp = ip_vs_conn_new(&p, &dest->addr, 471 cp = ip_vs_conn_new(&p, &dest->addr,
472 dest->port ? dest->port : pptr[1], 472 dest->port ? dest->port : pptr[1],
473 flags, dest, skb->mark); 473 flags, dest, skb->mark);
@@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
496 * no destination is available for a new connection. 496 * no destination is available for a new connection.
497 */ 497 */
498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
499 struct ip_vs_proto_data *pd) 499 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
500{ 500{
501 __be16 _ports[2], *pptr; 501 __be16 _ports[2], *pptr;
502 struct ip_vs_iphdr iph;
503#ifdef CONFIG_SYSCTL 502#ifdef CONFIG_SYSCTL
504 struct net *net; 503 struct net *net;
505 struct netns_ipvs *ipvs; 504 struct netns_ipvs *ipvs;
506 int unicast; 505 int unicast;
507#endif 506#endif
508 507
509 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
510
511 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
512 if (pptr == NULL) { 509 if (pptr == NULL) {
513 ip_vs_service_put(svc); 510 ip_vs_service_put(svc);
514 return NF_DROP; 511 return NF_DROP;
@@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
519 516
520#ifdef CONFIG_IP_VS_IPV6 517#ifdef CONFIG_IP_VS_IPV6
521 if (svc->af == AF_INET6) 518 if (svc->af == AF_INET6)
522 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
523 else 520 else
524#endif 521#endif
525 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); 522 unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
526 523
527 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 /* if it is fwmark-based service, the cache_bypass sysctl is up
528 and the destination is a non-local unicast, then create 525 and the destination is a non-local unicast, then create
@@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
532 int ret; 529 int ret;
533 struct ip_vs_conn *cp; 530 struct ip_vs_conn *cp;
534 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 531 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
535 iph.protocol == IPPROTO_UDP)? 532 iph->protocol == IPPROTO_UDP) ?
536 IP_VS_CONN_F_ONE_PACKET : 0; 533 IP_VS_CONN_F_ONE_PACKET : 0;
537 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
538 535
@@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
542 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
543 { 540 {
544 struct ip_vs_conn_param p; 541 struct ip_vs_conn_param p;
545 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 542 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
546 &iph.saddr, pptr[0], 543 &iph->saddr, pptr[0],
547 &iph.daddr, pptr[1], &p); 544 &iph->daddr, pptr[1], &p);
548 cp = ip_vs_conn_new(&p, &daddr, 0, 545 cp = ip_vs_conn_new(&p, &daddr, 0,
549 IP_VS_CONN_F_BYPASS | flags, 546 IP_VS_CONN_F_BYPASS | flags,
550 NULL, skb->mark); 547 NULL, skb->mark);
@@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
559 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 556 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
560 557
561 /* transmit the first SYN packet */ 558 /* transmit the first SYN packet */
562 ret = cp->packet_xmit(skb, cp, pd->pp); 559 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
563 /* do not touch skb anymore */ 560 /* do not touch skb anymore */
564 561
565 atomic_inc(&cp->in_pkts); 562 atomic_inc(&cp->in_pkts);
@@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
654 return err; 651 return err;
655} 652}
656 653
657#ifdef CONFIG_IP_VS_IPV6
658static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
659{
660 /* TODO IPv6: Find out what to do here for IPv6 */
661 return 0;
662}
663#endif
664
665static int ip_vs_route_me_harder(int af, struct sk_buff *skb) 654static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
666{ 655{
667#ifdef CONFIG_IP_VS_IPV6 656#ifdef CONFIG_IP_VS_IPV6
@@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
732 struct ip_vs_conn *cp, int inout) 721 struct ip_vs_conn *cp, int inout)
733{ 722{
734 struct ipv6hdr *iph = ipv6_hdr(skb); 723 struct ipv6hdr *iph = ipv6_hdr(skb);
735 unsigned int icmp_offset = sizeof(struct ipv6hdr); 724 unsigned int icmp_offset = 0;
736 struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + 725 unsigned int offs = 0; /* header offset*/
737 icmp_offset); 726 int protocol;
738 struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); 727 struct icmp6hdr *icmph;
728 struct ipv6hdr *ciph;
729 unsigned short fragoffs;
730
731 ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
732 icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
733 offs = icmp_offset + sizeof(struct icmp6hdr);
734 ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
735
736 protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
739 737
740 if (inout) { 738 if (inout) {
741 iph->saddr = cp->vaddr.in6; 739 iph->saddr = cp->vaddr.in6;
@@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
746 } 744 }
747 745
748 /* the TCP/UDP/SCTP port */ 746 /* the TCP/UDP/SCTP port */
749 if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || 747 if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
750 IPPROTO_SCTP == ciph->nexthdr) { 748 IPPROTO_SCTP == protocol)) {
751 __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); 749 __be16 *ports = (void *)(skb_network_header(skb) + offs);
752 750
751 IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
752 ntohs(inout ? ports[1] : ports[0]),
753 ntohs(inout ? cp->vport : cp->dport));
753 if (inout) 754 if (inout)
754 ports[1] = cp->vport; 755 ports[1] = cp->vport;
755 else 756 else
@@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
898 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, 899 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
899 "Checking outgoing ICMP for"); 900 "Checking outgoing ICMP for");
900 901
901 offset += cih->ihl * 4; 902 ip_vs_fill_ip4hdr(cih, &ciph);
902 903 ciph.len += offset;
903 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
904 /* The embedded headers contain source and dest in reverse order */ 904 /* The embedded headers contain source and dest in reverse order */
905 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 905 cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
906 if (!cp) 906 if (!cp)
907 return NF_ACCEPT; 907 return NF_ACCEPT;
908 908
909 snet.ip = iph->saddr; 909 snet.ip = iph->saddr;
910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, 910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
911 pp, offset, ihl); 911 pp, ciph.len, ihl);
912} 912}
913 913
914#ifdef CONFIG_IP_VS_IPV6 914#ifdef CONFIG_IP_VS_IPV6
915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, 915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
916 unsigned int hooknum) 916 unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
917{ 917{
918 struct ipv6hdr *iph;
919 struct icmp6hdr _icmph, *ic; 918 struct icmp6hdr _icmph, *ic;
920 struct ipv6hdr _ciph, *cih; /* The ip header contained 919 struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
921 within the ICMP */ 920 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
922 struct ip_vs_iphdr ciph;
923 struct ip_vs_conn *cp; 921 struct ip_vs_conn *cp;
924 struct ip_vs_protocol *pp; 922 struct ip_vs_protocol *pp;
925 unsigned int offset;
926 union nf_inet_addr snet; 923 union nf_inet_addr snet;
924 unsigned int writable;
927 925
928 *related = 1; 926 *related = 1;
929 927 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
930 /* reassemble IP fragments */
931 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
932 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
933 return NF_STOLEN;
934 }
935
936 iph = ipv6_hdr(skb);
937 offset = sizeof(struct ipv6hdr);
938 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
939 if (ic == NULL) 928 if (ic == NULL)
940 return NF_DROP; 929 return NF_DROP;
941 930
942 IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
943 ic->icmp6_type, ntohs(icmpv6_id(ic)),
944 &iph->saddr, &iph->daddr);
945
946 /* 931 /*
947 * Work through seeing if this is for us. 932 * Work through seeing if this is for us.
948 * These checks are supposed to be in an order that means easy 933 * These checks are supposed to be in an order that means easy
@@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
950 * this means that some packets will manage to get a long way 935 * this means that some packets will manage to get a long way
951 * down this stack and then be rejected, but that's life. 936 * down this stack and then be rejected, but that's life.
952 */ 937 */
953 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 938 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
954 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
955 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
956 *related = 0; 939 *related = 0;
957 return NF_ACCEPT; 940 return NF_ACCEPT;
958 } 941 }
942 /* Fragment header that is before ICMP header tells us that:
943 * it's not an error message since they can't be fragmented.
944 */
945 if (ipvsh->flags & IP6T_FH_F_FRAG)
946 return NF_DROP;
947
948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
949 ic->icmp6_type, ntohs(icmpv6_id(ic)),
950 &ipvsh->saddr, &ipvsh->daddr);
959 951
960 /* Now find the contained IP header */ 952 /* Now find the contained IP header */
961 offset += sizeof(_icmph); 953 ciph.len = ipvsh->len + sizeof(_icmph);
962 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 954 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
963 if (cih == NULL) 955 if (ip6h == NULL)
964 return NF_ACCEPT; /* The packet looks wrong, ignore */ 956 return NF_ACCEPT; /* The packet looks wrong, ignore */
965 957 ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
966 pp = ip_vs_proto_get(cih->nexthdr); 958 ciph.daddr.in6 = ip6h->daddr;
959 /* skip possible IPv6 exthdrs of contained IPv6 packet */
960 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
961 if (ciph.protocol < 0)
962 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
963
964 pp = ip_vs_proto_get(ciph.protocol);
967 if (!pp) 965 if (!pp)
968 return NF_ACCEPT; 966 return NF_ACCEPT;
969 967
970 /* Is the embedded protocol header present? */
971 /* TODO: we don't support fragmentation at the moment anyways */
972 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
973 return NF_ACCEPT;
974
975 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
976 "Checking outgoing ICMPv6 for");
977
978 offset += sizeof(struct ipv6hdr);
979
980 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
981 /* The embedded headers contain source and dest in reverse order */ 968 /* The embedded headers contain source and dest in reverse order */
982 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 969 cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
983 if (!cp) 970 if (!cp)
984 return NF_ACCEPT; 971 return NF_ACCEPT;
985 972
986 snet.in6 = iph->saddr; 973 snet.in6 = ciph.saddr.in6;
987 return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, 974 writable = ciph.len;
988 pp, offset, sizeof(struct ipv6hdr)); 975 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
976 pp, writable, sizeof(struct ipv6hdr));
989} 977}
990#endif 978#endif
991 979
@@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
1018 */ 1006 */
1019static unsigned int 1007static unsigned int
1020handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 1008handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1021 struct ip_vs_conn *cp, int ihl) 1009 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
1022{ 1010{
1023 struct ip_vs_protocol *pp = pd->pp; 1011 struct ip_vs_protocol *pp = pd->pp;
1024 1012
1025 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1013 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
1026 1014
1027 if (!skb_make_writable(skb, ihl)) 1015 if (!skb_make_writable(skb, iph->len))
1028 goto drop; 1016 goto drop;
1029 1017
1030 /* mangle the packet */ 1018 /* mangle the packet */
1031 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) 1019 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
1032 goto drop; 1020 goto drop;
1033 1021
1034#ifdef CONFIG_IP_VS_IPV6 1022#ifdef CONFIG_IP_VS_IPV6
@@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1115 if (!net_ipvs(net)->enable) 1103 if (!net_ipvs(net)->enable)
1116 return NF_ACCEPT; 1104 return NF_ACCEPT;
1117 1105
1118 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1106 ip_vs_fill_iph_skb(af, skb, &iph);
1119#ifdef CONFIG_IP_VS_IPV6 1107#ifdef CONFIG_IP_VS_IPV6
1120 if (af == AF_INET6) { 1108 if (af == AF_INET6) {
1109 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1110 struct sk_buff *reasm = skb_nfct_reasm(skb);
1111 /* Save fw mark for coming frags */
1112 reasm->ipvs_property = 1;
1113 reasm->mark = skb->mark;
1114 }
1121 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1115 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1122 int related; 1116 int related;
1123 int verdict = ip_vs_out_icmp_v6(skb, &related, 1117 int verdict = ip_vs_out_icmp_v6(skb, &related,
1124 hooknum); 1118 hooknum, &iph);
1125 1119
1126 if (related) 1120 if (related)
1127 return verdict; 1121 return verdict;
1128 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1129 } 1122 }
1130 } else 1123 } else
1131#endif 1124#endif
@@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1135 1128
1136 if (related) 1129 if (related)
1137 return verdict; 1130 return verdict;
1138 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1139 } 1131 }
1140 1132
1141 pd = ip_vs_proto_data_get(net, iph.protocol); 1133 pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1145 1137
1146 /* reassemble IP fragments */ 1138 /* reassemble IP fragments */
1147#ifdef CONFIG_IP_VS_IPV6 1139#ifdef CONFIG_IP_VS_IPV6
1148 if (af == AF_INET6) { 1140 if (af == AF_INET)
1149 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1150 if (ip_vs_gather_frags_v6(skb,
1151 ip_vs_defrag_user(hooknum)))
1152 return NF_STOLEN;
1153 }
1154
1155 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1156 } else
1157#endif 1141#endif
1158 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { 1142 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
1159 if (ip_vs_gather_frags(skb, 1143 if (ip_vs_gather_frags(skb,
1160 ip_vs_defrag_user(hooknum))) 1144 ip_vs_defrag_user(hooknum)))
1161 return NF_STOLEN; 1145 return NF_STOLEN;
1162 1146
1163 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1147 ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
1164 } 1148 }
1165 1149
1166 /* 1150 /*
1167 * Check if the packet belongs to an existing entry 1151 * Check if the packet belongs to an existing entry
1168 */ 1152 */
1169 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1153 cp = pp->conn_out_get(af, skb, &iph, 0);
1170 1154
1171 if (likely(cp)) 1155 if (likely(cp))
1172 return handle_response(af, skb, pd, cp, iph.len); 1156 return handle_response(af, skb, pd, cp, &iph);
1173 if (sysctl_nat_icmp_send(net) && 1157 if (sysctl_nat_icmp_send(net) &&
1174 (pp->protocol == IPPROTO_TCP || 1158 (pp->protocol == IPPROTO_TCP ||
1175 pp->protocol == IPPROTO_UDP || 1159 pp->protocol == IPPROTO_UDP ||
1176 pp->protocol == IPPROTO_SCTP)) { 1160 pp->protocol == IPPROTO_SCTP)) {
1177 __be16 _ports[2], *pptr; 1161 __be16 _ports[2], *pptr;
1178 1162
1179 pptr = skb_header_pointer(skb, iph.len, 1163 pptr = frag_safe_skb_hp(skb, iph.len,
1180 sizeof(_ports), _ports); 1164 sizeof(_ports), _ports, &iph);
1181 if (pptr == NULL) 1165 if (pptr == NULL)
1182 return NF_ACCEPT; /* Not for me */ 1166 return NF_ACCEPT; /* Not for me */
1183 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1167 if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1375 "Checking incoming ICMP for"); 1359 "Checking incoming ICMP for");
1376 1360
1377 offset2 = offset; 1361 offset2 = offset;
1378 offset += cih->ihl * 4; 1362 ip_vs_fill_ip4hdr(cih, &ciph);
1379 1363 ciph.len += offset;
1380 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1364 offset = ciph.len;
1381 /* The embedded headers contain source and dest in reverse order. 1365 /* The embedded headers contain source and dest in reverse order.
1382 * For IPIP this is error for request, not for reply. 1366 * For IPIP this is error for request, not for reply.
1383 */ 1367 */
1384 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); 1368 cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
1385 if (!cp) 1369 if (!cp)
1386 return NF_ACCEPT; 1370 return NF_ACCEPT;
1387 1371
@@ -1450,7 +1434,7 @@ ignore_ipip:
1450 ip_vs_in_stats(cp, skb); 1434 ip_vs_in_stats(cp, skb);
1451 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1435 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1452 offset += 2 * sizeof(__u16); 1436 offset += 2 * sizeof(__u16);
1453 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); 1437 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1454 1438
1455out: 1439out:
1456 __ip_vs_conn_put(cp); 1440 __ip_vs_conn_put(cp);
@@ -1459,38 +1443,24 @@ out:
1459} 1443}
1460 1444
1461#ifdef CONFIG_IP_VS_IPV6 1445#ifdef CONFIG_IP_VS_IPV6
1462static int 1446static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1463ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1447 unsigned int hooknum, struct ip_vs_iphdr *iph)
1464{ 1448{
1465 struct net *net = NULL; 1449 struct net *net = NULL;
1466 struct ipv6hdr *iph; 1450 struct ipv6hdr _ip6h, *ip6h;
1467 struct icmp6hdr _icmph, *ic; 1451 struct icmp6hdr _icmph, *ic;
1468 struct ipv6hdr _ciph, *cih; /* The ip header contained 1452 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
1469 within the ICMP */
1470 struct ip_vs_iphdr ciph;
1471 struct ip_vs_conn *cp; 1453 struct ip_vs_conn *cp;
1472 struct ip_vs_protocol *pp; 1454 struct ip_vs_protocol *pp;
1473 struct ip_vs_proto_data *pd; 1455 struct ip_vs_proto_data *pd;
1474 unsigned int offset, verdict; 1456 unsigned int offs_ciph, writable, verdict;
1475 1457
1476 *related = 1; 1458 *related = 1;
1477 1459
1478 /* reassemble IP fragments */ 1460 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
1479 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1480 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1481 return NF_STOLEN;
1482 }
1483
1484 iph = ipv6_hdr(skb);
1485 offset = sizeof(struct ipv6hdr);
1486 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
1487 if (ic == NULL) 1461 if (ic == NULL)
1488 return NF_DROP; 1462 return NF_DROP;
1489 1463
1490 IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
1491 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1492 &iph->saddr, &iph->daddr);
1493
1494 /* 1464 /*
1495 * Work through seeing if this is for us. 1465 * Work through seeing if this is for us.
1496 * These checks are supposed to be in an order that means easy 1466 * These checks are supposed to be in an order that means easy
@@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1498 * this means that some packets will manage to get a long way 1468 * this means that some packets will manage to get a long way
1499 * down this stack and then be rejected, but that's life. 1469 * down this stack and then be rejected, but that's life.
1500 */ 1470 */
1501 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 1471 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
1502 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
1503 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
1504 *related = 0; 1472 *related = 0;
1505 return NF_ACCEPT; 1473 return NF_ACCEPT;
1506 } 1474 }
1475 /* Fragment header that is before ICMP header tells us that:
1476 * it's not an error message since they can't be fragmented.
1477 */
1478 if (iph->flags & IP6T_FH_F_FRAG)
1479 return NF_DROP;
1480
1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
1482 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1483 &iph->saddr, &iph->daddr);
1507 1484
1508 /* Now find the contained IP header */ 1485 /* Now find the contained IP header */
1509 offset += sizeof(_icmph); 1486 ciph.len = iph->len + sizeof(_icmph);
1510 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 1487 offs_ciph = ciph.len; /* Save ip header offset */
1511 if (cih == NULL) 1488 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
1489 if (ip6h == NULL)
1512 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1490 return NF_ACCEPT; /* The packet looks wrong, ignore */
1491 ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
1492 ciph.daddr.in6 = ip6h->daddr;
1493 /* skip possible IPv6 exthdrs of contained IPv6 packet */
1494 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
1495 if (ciph.protocol < 0)
1496 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
1513 1497
1514 net = skb_net(skb); 1498 net = skb_net(skb);
1515 pd = ip_vs_proto_data_get(net, cih->nexthdr); 1499 pd = ip_vs_proto_data_get(net, ciph.protocol);
1516 if (!pd) 1500 if (!pd)
1517 return NF_ACCEPT; 1501 return NF_ACCEPT;
1518 pp = pd->pp; 1502 pp = pd->pp;
1519 1503
1520 /* Is the embedded protocol header present? */ 1504 /* Cannot handle fragmented embedded protocol */
1521 /* TODO: we don't support fragmentation at the moment anyways */ 1505 if (ciph.fragoffs)
1522 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1523 return NF_ACCEPT; 1506 return NF_ACCEPT;
1524 1507
1525 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, 1508 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
1526 "Checking incoming ICMPv6 for"); 1509 "Checking incoming ICMPv6 for");
1527 1510
1528 offset += sizeof(struct ipv6hdr); 1511 /* The embedded headers contain source and dest in reverse order
1512 * if not from localhost
1513 */
1514 cp = pp->conn_in_get(AF_INET6, skb, &ciph,
1515 (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
1529 1516
1530 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1531 /* The embedded headers contain source and dest in reverse order */
1532 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1533 if (!cp) 1517 if (!cp)
1534 return NF_ACCEPT; 1518 return NF_ACCEPT;
1519 /* VS/TUN, VS/DR and LOCALNODE just let it go */
1520 if ((hooknum == NF_INET_LOCAL_OUT) &&
1521 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
1522 __ip_vs_conn_put(cp);
1523 return NF_ACCEPT;
1524 }
1535 1525
1536 /* do the statistics and put it back */ 1526 /* do the statistics and put it back */
1537 ip_vs_in_stats(cp, skb); 1527 ip_vs_in_stats(cp, skb);
1538 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1528
1539 IPPROTO_SCTP == cih->nexthdr) 1529 /* Need to mangle contained IPv6 header in ICMPv6 packet */
1540 offset += 2 * sizeof(__u16); 1530 writable = ciph.len;
1541 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); 1531 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
1532 IPPROTO_SCTP == ciph.protocol)
1533 writable += 2 * sizeof(__u16); /* Also mangle ports */
1534
1535 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
1542 1536
1543 __ip_vs_conn_put(cp); 1537 __ip_vs_conn_put(cp);
1544 1538
@@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1574 if (unlikely((skb->pkt_type != PACKET_HOST && 1568 if (unlikely((skb->pkt_type != PACKET_HOST &&
1575 hooknum != NF_INET_LOCAL_OUT) || 1569 hooknum != NF_INET_LOCAL_OUT) ||
1576 !skb_dst(skb))) { 1570 !skb_dst(skb))) {
1577 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1571 ip_vs_fill_iph_skb(af, skb, &iph);
1578 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" 1572 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1579 " ignored in hook %u\n", 1573 " ignored in hook %u\n",
1580 skb->pkt_type, iph.protocol, 1574 skb->pkt_type, iph.protocol,
@@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1586 if (!net_ipvs(net)->enable) 1580 if (!net_ipvs(net)->enable)
1587 return NF_ACCEPT; 1581 return NF_ACCEPT;
1588 1582
1589 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1583 ip_vs_fill_iph_skb(af, skb, &iph);
1590 1584
1591 /* Bad... Do not break raw sockets */ 1585 /* Bad... Do not break raw sockets */
1592 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1586 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1600 1594
1601#ifdef CONFIG_IP_VS_IPV6 1595#ifdef CONFIG_IP_VS_IPV6
1602 if (af == AF_INET6) { 1596 if (af == AF_INET6) {
1597 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1598 struct sk_buff *reasm = skb_nfct_reasm(skb);
1599 /* Save fw mark for coming frags. */
1600 reasm->ipvs_property = 1;
1601 reasm->mark = skb->mark;
1602 }
1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1604 int related; 1604 int related;
1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
1606 &iph);
1606 1607
1607 if (related) 1608 if (related)
1608 return verdict; 1609 return verdict;
1609 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1610 } 1610 }
1611 } else 1611 } else
1612#endif 1612#endif
@@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1616 1616
1617 if (related) 1617 if (related)
1618 return verdict; 1618 return verdict;
1619 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1620 } 1619 }
1621 1620
1622 /* Protocol supported? */ 1621 /* Protocol supported? */
@@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1627 /* 1626 /*
1628 * Check if the packet belongs to an existing connection entry 1627 * Check if the packet belongs to an existing connection entry
1629 */ 1628 */
1630 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); 1629 cp = pp->conn_in_get(af, skb, &iph, 0);
1631 1630 if (unlikely(!cp) && !iph.fragoffs) {
1632 if (unlikely(!cp)) { 1631 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1632 * replayed fragment zero will already have created the cp
1633 */
1633 int v; 1634 int v;
1634 1635
1635 if (!pp->conn_schedule(af, skb, pd, &v, &cp)) 1636 /* Schedule and create new connection entry into &cp */
1637 if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
1636 return v; 1638 return v;
1637 } 1639 }
1638 1640
@@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1640 /* sorry, all this trouble for a no-hit :) */ 1642 /* sorry, all this trouble for a no-hit :) */
1641 IP_VS_DBG_PKT(12, af, pp, skb, 0, 1643 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1642 "ip_vs_in: packet continues traversal as normal"); 1644 "ip_vs_in: packet continues traversal as normal");
1645 if (iph.fragoffs && !skb_nfct_reasm(skb)) {
1646 /* Fragment that couldn't be mapped to a conn entry
1647 * and don't have any pointer to a reasm skb
1648 * is missing module nf_defrag_ipv6
1649 */
1650 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1651 IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
1652 }
1643 return NF_ACCEPT; 1653 return NF_ACCEPT;
1644 } 1654 }
1645 1655
@@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1662 ip_vs_in_stats(cp, skb); 1672 ip_vs_in_stats(cp, skb);
1663 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 1673 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1664 if (cp->packet_xmit) 1674 if (cp->packet_xmit)
1665 ret = cp->packet_xmit(skb, cp, pp); 1675 ret = cp->packet_xmit(skb, cp, pp, &iph);
1666 /* do not touch skb anymore */ 1676 /* do not touch skb anymore */
1667 else { 1677 else {
1668 IP_VS_DBG_RL("warning: packet_xmit is null"); 1678 IP_VS_DBG_RL("warning: packet_xmit is null");
@@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1724#ifdef CONFIG_IP_VS_IPV6 1734#ifdef CONFIG_IP_VS_IPV6
1725 1735
1726/* 1736/*
1737 * AF_INET6 fragment handling
1738 * Copy info from first fragment, to the rest of them.
1739 */
1740static unsigned int
1741ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
1742 const struct net_device *in,
1743 const struct net_device *out,
1744 int (*okfn)(struct sk_buff *))
1745{
1746 struct sk_buff *reasm = skb_nfct_reasm(skb);
1747 struct net *net;
1748
1749 /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
1750 * ipvs_property is set when checking first fragment
1751 * in ip_vs_in() and ip_vs_out().
1752 */
1753 if (reasm)
1754 IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
1755 if (!reasm || !reasm->ipvs_property)
1756 return NF_ACCEPT;
1757
1758 net = skb_net(skb);
1759 if (!net_ipvs(net)->enable)
1760 return NF_ACCEPT;
1761
1762 /* Copy stored fw mark, saved in ip_vs_{in,out} */
1763 skb->mark = reasm->mark;
1764
1765 return NF_ACCEPT;
1766}
1767
1768/*
1727 * AF_INET6 handler in NF_INET_LOCAL_IN chain 1769 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1728 * Schedule and forward packets from remote clients 1770 * Schedule and forward packets from remote clients
1729 */ 1771 */
@@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1793{ 1835{
1794 int r; 1836 int r;
1795 struct net *net; 1837 struct net *net;
1838 struct ip_vs_iphdr iphdr;
1796 1839
1797 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) 1840 ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
1841 if (iphdr.protocol != IPPROTO_ICMPV6)
1798 return NF_ACCEPT; 1842 return NF_ACCEPT;
1799 1843
1800 /* ipvs enabled in this netns ? */ 1844 /* ipvs enabled in this netns ? */
@@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1802 if (!net_ipvs(net)->enable) 1846 if (!net_ipvs(net)->enable)
1803 return NF_ACCEPT; 1847 return NF_ACCEPT;
1804 1848
1805 return ip_vs_in_icmp_v6(skb, &r, hooknum); 1849 return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
1806} 1850}
1807#endif 1851#endif
1808 1852
@@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1860 .priority = 100, 1904 .priority = 100,
1861 }, 1905 },
1862#ifdef CONFIG_IP_VS_IPV6 1906#ifdef CONFIG_IP_VS_IPV6
1907 /* After mangle & nat fetch 2:nd fragment and following */
1908 {
1909 .hook = ip_vs_preroute_frag6,
1910 .owner = THIS_MODULE,
1911 .pf = NFPROTO_IPV6,
1912 .hooknum = NF_INET_PRE_ROUTING,
1913 .priority = NF_IP6_PRI_NAT_DST + 1,
1914 },
1863 /* After packet filtering, change source only for VS/NAT */ 1915 /* After packet filtering, change source only for VS/NAT */
1864 { 1916 {
1865 .hook = ip_vs_reply6, 1917 .hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 8b7dca9ea422..7f3b0cc00b7a 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
215 struct ip_vs_dh_bucket *tbl; 215 struct ip_vs_dh_bucket *tbl;
216 struct ip_vs_iphdr iph; 216 struct ip_vs_iphdr iph;
217 217
218 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 219
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 221
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index df646ccf08a7..cbd37489ac77 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
479 struct ip_vs_dest *dest = NULL; 479 struct ip_vs_dest *dest = NULL;
480 struct ip_vs_lblc_entry *en; 480 struct ip_vs_lblc_entry *en;
481 481
482 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 482 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
483 483
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 485
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 570e31ea427a..161b67972e3f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
649 struct ip_vs_dest *dest = NULL; 649 struct ip_vs_dest *dest = NULL;
650 struct ip_vs_lblcr_entry *en; 650 struct ip_vs_lblcr_entry *en;
651 651
652 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
653 653
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 655
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1aa5cac748c4..12475ef88daf 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff,
68static int 68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) 69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{ 70{
71 struct sk_buff *reasm = skb_nfct_reasm(skb);
71 struct ip_vs_iphdr iph; 72 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 73 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 74 const char *dptr;
74 int retc; 75 int retc;
75 76
76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 77 ip_vs_fill_iph_skb(p->af, skb, &iph);
77 78
78 /* Only useful with UDP */ 79 /* Only useful with UDP */
79 if (iph.protocol != IPPROTO_UDP) 80 if (iph.protocol != IPPROTO_UDP)
80 return -EINVAL; 81 return -EINVAL;
82 /* todo: IPv6 fragments:
83 * I think this only should be done for the first fragment. /HS
84 */
85 if (reasm) {
86 skb = reasm;
87 dataoff = iph.thoff_reasm + sizeof(struct udphdr);
88 } else
89 dataoff = iph.len + sizeof(struct udphdr);
81 90
82 /* No Data ? */
83 dataoff = iph.len + sizeof(struct udphdr);
84 if (dataoff >= skb->len) 91 if (dataoff >= skb->len)
85 return -EINVAL; 92 return -EINVAL;
86 93 /* todo: Check if this will mess-up the reasm skb !!! /HS */
87 if ((retc=skb_linearize(skb)) < 0) 94 retc = skb_linearize(skb);
95 if (retc < 0)
88 return retc; 96 return retc;
89 dptr = skb->data + dataoff; 97 dptr = skb->data + dataoff;
90 datalen = skb->len - dataoff; 98 datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 50d82186da87..939f7fbe9b46 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
280 if (ih == NULL) 280 if (ih == NULL)
281 sprintf(buf, "TRUNCATED"); 281 sprintf(buf, "TRUNCATED");
282 else if (ih->nexthdr == IPPROTO_FRAGMENT) 282 else if (ih->nexthdr == IPPROTO_FRAGMENT)
283 sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); 283 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
284 else { 284 else {
285 __be16 _ports[2], *pptr; 285 __be16 _ports[2], *pptr;
286 286
287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
288 sizeof(_ports), _ports); 288 sizeof(_ports), _ports);
289 if (pptr == NULL) 289 if (pptr == NULL)
290 sprintf(buf, "TRUNCATED %pI6->%pI6", 290 sprintf(buf, "TRUNCATED %pI6c->%pI6c",
291 &ih->saddr, &ih->daddr); 291 &ih->saddr, &ih->daddr);
292 else 292 else
293 sprintf(buf, "%pI6:%u->%pI6:%u", 293 sprintf(buf, "%pI6c:%u->%pI6c:%u",
294 &ih->saddr, ntohs(pptr[0]), 294 &ih->saddr, ntohs(pptr[0]),
295 &ih->daddr, ntohs(pptr[1])); 295 &ih->daddr, ntohs(pptr[1]));
296 } 296 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5b8eb8b12c3e..5de3dd312c0f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
57 57
58static struct ip_vs_conn * 58static struct ip_vs_conn *
59ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
60 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph,
61 int inverse) 61 int inverse)
62{ 62{
63 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
@@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
85 85
86static struct ip_vs_conn * 86static struct ip_vs_conn *
87ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
88 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph, int inverse)
89 unsigned int proto_off,
90 int inverse)
91{ 89{
92 struct ip_vs_conn *cp; 90 struct ip_vs_conn *cp;
93 struct ip_vs_conn_param p; 91 struct ip_vs_conn_param p;
@@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
110 108
111static int 109static int
112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
113 int *verdict, struct ip_vs_conn **cpp) 111 int *verdict, struct ip_vs_conn **cpp,
112 struct ip_vs_iphdr *iph)
114{ 113{
115 /* 114 /*
116 * AH/ESP is only related traffic. Pass the packet to IP stack. 115 * AH/ESP is only related traffic. Pass the packet to IP stack.
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9f3fb751c491..746048b13ef3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,28 +10,26 @@
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp,
14 struct ip_vs_iphdr *iph)
14{ 15{
15 struct net *net; 16 struct net *net;
16 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
17 sctp_chunkhdr_t _schunkh, *sch; 18 sctp_chunkhdr_t _schunkh, *sch;
18 sctp_sctphdr_t *sh, _sctph; 19 sctp_sctphdr_t *sh, _sctph;
19 struct ip_vs_iphdr iph;
20 20
21 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 21 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
22
23 sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
24 if (sh == NULL) 22 if (sh == NULL)
25 return 0; 23 return 0;
26 24
27 sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), 25 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
28 sizeof(_schunkh), &_schunkh); 26 sizeof(_schunkh), &_schunkh);
29 if (sch == NULL) 27 if (sch == NULL)
30 return 0; 28 return 0;
31 net = skb_net(skb); 29 net = skb_net(skb);
32 if ((sch->type == SCTP_CID_INIT) && 30 if ((sch->type == SCTP_CID_INIT) &&
33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
34 &iph.daddr, sh->dest))) { 32 &iph->daddr, sh->dest))) {
35 int ignored; 33 int ignored;
36 34
37 if (ip_vs_todrop(net_ipvs(net))) { 35 if (ip_vs_todrop(net_ipvs(net))) {
@@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 * Let the virtual server select a real server for the 45 * Let the virtual server select a real server for the
48 * incoming connection, and create a connection entry. 46 * incoming connection, and create a connection entry.
49 */ 47 */
50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 48 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
51 if (!*cpp && ignored <= 0) { 49 if (!*cpp && ignored <= 0) {
52 if (!ignored) 50 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd); 51 *verdict = ip_vs_leave(svc, skb, pd, iph);
54 else { 52 else {
55 ip_vs_service_put(svc); 53 ip_vs_service_put(svc);
56 *verdict = NF_DROP; 54 *verdict = NF_DROP;
@@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64} 62}
65 63
66static int 64static int
67sctp_snat_handler(struct sk_buff *skb, 65sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
68 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 66 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
69{ 67{
70 sctp_sctphdr_t *sctph; 68 sctp_sctphdr_t *sctph;
71 unsigned int sctphoff; 69 unsigned int sctphoff = iph->len;
72 struct sk_buff *iter; 70 struct sk_buff *iter;
73 __be32 crc32; 71 __be32 crc32;
74 72
75#ifdef CONFIG_IP_VS_IPV6 73#ifdef CONFIG_IP_VS_IPV6
76 if (cp->af == AF_INET6) 74 if (cp->af == AF_INET6 && iph->fragoffs)
77 sctphoff = sizeof(struct ipv6hdr); 75 return 1;
78 else
79#endif 76#endif
80 sctphoff = ip_hdrlen(skb);
81 77
82 /* csum_check requires unshared skb */ 78 /* csum_check requires unshared skb */
83 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 79 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
@@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb,
108} 104}
109 105
110static int 106static int
111sctp_dnat_handler(struct sk_buff *skb, 107sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
112 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 108 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
113{ 109{
114 sctp_sctphdr_t *sctph; 110 sctp_sctphdr_t *sctph;
115 unsigned int sctphoff; 111 unsigned int sctphoff = iph->len;
116 struct sk_buff *iter; 112 struct sk_buff *iter;
117 __be32 crc32; 113 __be32 crc32;
118 114
119#ifdef CONFIG_IP_VS_IPV6 115#ifdef CONFIG_IP_VS_IPV6
120 if (cp->af == AF_INET6) 116 if (cp->af == AF_INET6 && iph->fragoffs)
121 sctphoff = sizeof(struct ipv6hdr); 117 return 1;
122 else
123#endif 118#endif
124 sctphoff = ip_hdrlen(skb);
125 119
126 /* csum_check requires unshared skb */ 120 /* csum_check requires unshared skb */
127 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 121 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index cd609cc62721..9af653a75825 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -33,16 +33,14 @@
33 33
34static int 34static int
35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
36 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp,
37 struct ip_vs_iphdr *iph)
37{ 38{
38 struct net *net; 39 struct net *net;
39 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
40 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
41 struct ip_vs_iphdr iph;
42 42
43 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 43 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
44
45 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
46 if (th == NULL) { 44 if (th == NULL) {
47 *verdict = NF_DROP; 45 *verdict = NF_DROP;
48 return 0; 46 return 0;
@@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
50 net = skb_net(skb); 48 net = skb_net(skb);
51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52 if (th->syn && 50 if (th->syn &&
53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
54 &iph.daddr, th->dest))) { 52 &iph->daddr, th->dest))) {
55 int ignored; 53 int ignored;
56 54
57 if (ip_vs_todrop(net_ipvs(net))) { 55 if (ip_vs_todrop(net_ipvs(net))) {
@@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
68 * Let the virtual server select a real server for the 66 * Let the virtual server select a real server for the
69 * incoming connection, and create a connection entry. 67 * incoming connection, and create a connection entry.
70 */ 68 */
71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 69 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
72 if (!*cpp && ignored <= 0) { 70 if (!*cpp && ignored <= 0) {
73 if (!ignored) 71 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd); 72 *verdict = ip_vs_leave(svc, skb, pd, iph);
75 else { 73 else {
76 ip_vs_service_put(svc); 74 ip_vs_service_put(svc);
77 *verdict = NF_DROP; 75 *verdict = NF_DROP;
@@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
128 126
129 127
130static int 128static int
131tcp_snat_handler(struct sk_buff *skb, 129tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
132 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 130 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
133{ 131{
134 struct tcphdr *tcph; 132 struct tcphdr *tcph;
135 unsigned int tcphoff; 133 unsigned int tcphoff = iph->len;
136 int oldlen; 134 int oldlen;
137 int payload_csum = 0; 135 int payload_csum = 0;
138 136
139#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
140 if (cp->af == AF_INET6) 138 if (cp->af == AF_INET6 && iph->fragoffs)
141 tcphoff = sizeof(struct ipv6hdr); 139 return 1;
142 else
143#endif 140#endif
144 tcphoff = ip_hdrlen(skb);
145 oldlen = skb->len - tcphoff; 141 oldlen = skb->len - tcphoff;
146 142
147 /* csum_check requires unshared skb */ 143 /* csum_check requires unshared skb */
@@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb,
208 204
209 205
210static int 206static int
211tcp_dnat_handler(struct sk_buff *skb, 207tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
212 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 208 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
213{ 209{
214 struct tcphdr *tcph; 210 struct tcphdr *tcph;
215 unsigned int tcphoff; 211 unsigned int tcphoff = iph->len;
216 int oldlen; 212 int oldlen;
217 int payload_csum = 0; 213 int payload_csum = 0;
218 214
219#ifdef CONFIG_IP_VS_IPV6 215#ifdef CONFIG_IP_VS_IPV6
220 if (cp->af == AF_INET6) 216 if (cp->af == AF_INET6 && iph->fragoffs)
221 tcphoff = sizeof(struct ipv6hdr); 217 return 1;
222 else
223#endif 218#endif
224 tcphoff = ip_hdrlen(skb);
225 oldlen = skb->len - tcphoff; 219 oldlen = skb->len - tcphoff;
226 220
227 /* csum_check requires unshared skb */ 221 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 2fedb2dcb3d1..503a842c90d2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -30,23 +30,22 @@
30 30
31static int 31static int
32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
33 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp,
34 struct ip_vs_iphdr *iph)
34{ 35{
35 struct net *net; 36 struct net *net;
36 struct ip_vs_service *svc; 37 struct ip_vs_service *svc;
37 struct udphdr _udph, *uh; 38 struct udphdr _udph, *uh;
38 struct ip_vs_iphdr iph;
39 39
40 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 40 /* IPv6 fragments, only first fragment will hit this */
41 41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
42 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
43 if (uh == NULL) { 42 if (uh == NULL) {
44 *verdict = NF_DROP; 43 *verdict = NF_DROP;
45 return 0; 44 return 0;
46 } 45 }
47 net = skb_net(skb); 46 net = skb_net(skb);
48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
49 &iph.daddr, uh->dest); 48 &iph->daddr, uh->dest);
50 if (svc) { 49 if (svc) {
51 int ignored; 50 int ignored;
52 51
@@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
65 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
66 */ 65 */
67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 66 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
68 if (!*cpp && ignored <= 0) { 67 if (!*cpp && ignored <= 0) {
69 if (!ignored) 68 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd); 69 *verdict = ip_vs_leave(svc, skb, pd, iph);
71 else { 70 else {
72 ip_vs_service_put(svc); 71 ip_vs_service_put(svc);
73 *verdict = NF_DROP; 72 *verdict = NF_DROP;
@@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
125 124
126 125
127static int 126static int
128udp_snat_handler(struct sk_buff *skb, 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
129 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 128 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
130{ 129{
131 struct udphdr *udph; 130 struct udphdr *udph;
132 unsigned int udphoff; 131 unsigned int udphoff = iph->len;
133 int oldlen; 132 int oldlen;
134 int payload_csum = 0; 133 int payload_csum = 0;
135 134
136#ifdef CONFIG_IP_VS_IPV6 135#ifdef CONFIG_IP_VS_IPV6
137 if (cp->af == AF_INET6) 136 if (cp->af == AF_INET6 && iph->fragoffs)
138 udphoff = sizeof(struct ipv6hdr); 137 return 1;
139 else
140#endif 138#endif
141 udphoff = ip_hdrlen(skb);
142 oldlen = skb->len - udphoff; 139 oldlen = skb->len - udphoff;
143 140
144 /* csum_check requires unshared skb */ 141 /* csum_check requires unshared skb */
@@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb,
210 207
211 208
212static int 209static int
213udp_dnat_handler(struct sk_buff *skb, 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
214 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 211 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
215{ 212{
216 struct udphdr *udph; 213 struct udphdr *udph;
217 unsigned int udphoff; 214 unsigned int udphoff = iph->len;
218 int oldlen; 215 int oldlen;
219 int payload_csum = 0; 216 int payload_csum = 0;
220 217
221#ifdef CONFIG_IP_VS_IPV6 218#ifdef CONFIG_IP_VS_IPV6
222 if (cp->af == AF_INET6) 219 if (cp->af == AF_INET6 && iph->fragoffs)
223 udphoff = sizeof(struct ipv6hdr); 220 return 1;
224 else
225#endif 221#endif
226 udphoff = ip_hdrlen(skb);
227 oldlen = skb->len - udphoff; 222 oldlen = skb->len - udphoff;
228 223
229 /* csum_check requires unshared skb */ 224 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 08dbdd5bc18f..d6bf20d6cdbe 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
159 svc->fwmark, msg); 159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", 162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol), 164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 165 &svc->addr.in6, ntohs(svc->port), msg);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 05126521743e..e33126994628 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
228 struct ip_vs_sh_bucket *tbl; 228 struct ip_vs_sh_bucket *tbl;
229 struct ip_vs_iphdr iph; 229 struct ip_vs_iphdr iph;
230 230
231 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 232
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 234
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index cc4c8095681a..12008b47e5ca 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
338 local = __ip_vs_is_local_route6(rt); 338 local = __ip_vs_is_local_route6(rt);
339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340 rt_mode)) { 340 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", 341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 342 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 343 dst_release(&rt->dst);
344 return NULL; 344 return NULL;
@@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 347 !((ort = (struct rt6_info *) skb_dst(skb)) &&
348 __ip_vs_is_local_route6(ort))) { 348 __ip_vs_is_local_route6(ort))) {
349 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " 349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
350 "requires NAT method, dest: %pI6\n", 350 "requires NAT method, dest: %pI6c\n",
351 &ipv6_hdr(skb)->daddr, daddr); 351 &ipv6_hdr(skb)->daddr, daddr);
352 dst_release(&rt->dst); 352 dst_release(&rt->dst);
353 return NULL; 353 return NULL;
@@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
357 IPV6_ADDR_LOOPBACK)) { 357 IPV6_ADDR_LOOPBACK)) {
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " 358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
359 "to non-local address, dest: %pI6\n", 359 "to non-local address, dest: %pI6c\n",
360 &ipv6_hdr(skb)->saddr, daddr); 360 &ipv6_hdr(skb)->saddr, daddr);
361 dst_release(&rt->dst); 361 dst_release(&rt->dst);
362 return NULL; 362 return NULL;
@@ -427,7 +427,7 @@ do { \
427 */ 427 */
428int 428int
429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp) 430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431{ 431{
432 /* we do not touch skb and do not need pskb ptr */ 432 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
@@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
441 */ 441 */
442int 442int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp) 444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445{ 445{
446 struct rtable *rt; /* Route to the other host */ 446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 447 struct iphdr *iph = ip_hdr(skb);
@@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
496#ifdef CONFIG_IP_VS_IPV6 496#ifdef CONFIG_IP_VS_IPV6
497int 497int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp) 499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
500{ 500{
501 struct rt6_info *rt; /* Route to the other host */ 501 struct rt6_info *rt; /* Route to the other host */
502 struct ipv6hdr *iph = ipv6_hdr(skb);
503 int mtu; 502 int mtu;
504 503
505 EnterFunction(10); 504 EnterFunction(10);
506 505
507 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
508 IP_VS_RT_MODE_NON_LOCAL))) 507 IP_VS_RT_MODE_NON_LOCAL);
508 if (!rt)
509 goto tx_error_icmp; 509 goto tx_error_icmp;
510 510
511 /* MTU checking */ 511 /* MTU checking */
@@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
516 516
517 skb->dev = net->loopback_dev; 517 skb->dev = net->loopback_dev;
518 } 518 }
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 dst_release(&rt->dst); 522 dst_release(&rt->dst);
521 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
522 goto tx_error; 524 goto tx_error;
@@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
559 */ 561 */
560int 562int
561ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 563ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
562 struct ip_vs_protocol *pp) 564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
563{ 565{
564 struct rtable *rt; /* Route to the other host */ 566 struct rtable *rt; /* Route to the other host */
565 int mtu; 567 int mtu;
@@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
629 goto tx_error_put; 631 goto tx_error_put;
630 632
631 /* mangle the packet */ 633 /* mangle the packet */
632 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
633 goto tx_error_put; 635 goto tx_error_put;
634 ip_hdr(skb)->daddr = cp->daddr.ip; 636 ip_hdr(skb)->daddr = cp->daddr.ip;
635 ip_send_check(ip_hdr(skb)); 637 ip_send_check(ip_hdr(skb));
@@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
677#ifdef CONFIG_IP_VS_IPV6 679#ifdef CONFIG_IP_VS_IPV6
678int 680int
679ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 681ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
680 struct ip_vs_protocol *pp) 682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
681{ 683{
682 struct rt6_info *rt; /* Route to the other host */ 684 struct rt6_info *rt; /* Route to the other host */
683 int mtu; 685 int mtu;
@@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 EnterFunction(10); 688 EnterFunction(10);
687 689
688 /* check if it is a connection of no-client-port */ 690 /* check if it is a connection of no-client-port */
689 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
690 __be16 _pt, *p; 692 __be16 _pt, *p;
691 p = skb_header_pointer(skb, sizeof(struct ipv6hdr), 693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
692 sizeof(_pt), &_pt);
693 if (p == NULL) 694 if (p == NULL)
694 goto tx_error; 695 goto tx_error;
695 ip_vs_conn_fill_cport(cp, *p); 696 ip_vs_conn_fill_cport(cp, *p);
@@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
737 738
738 skb->dev = net->loopback_dev; 739 skb->dev = net->loopback_dev;
739 } 740 }
740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
742 "ip_vs_nat_xmit_v6(): frag needed for"); 745 "ip_vs_nat_xmit_v6(): frag needed for");
743 goto tx_error_put; 746 goto tx_error_put;
@@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
751 goto tx_error_put; 754 goto tx_error_put;
752 755
753 /* mangle the packet */ 756 /* mangle the packet */
754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
755 goto tx_error; 758 goto tx_error;
756 ipv6_hdr(skb)->daddr = cp->daddr.in6; 759 ipv6_hdr(skb)->daddr = cp->daddr.in6;
757 760
@@ -812,7 +815,7 @@ tx_error_put:
812 */ 815 */
813int 816int
814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 817ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815 struct ip_vs_protocol *pp) 818 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
816{ 819{
817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 820 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818 struct rtable *rt; /* Route to the other host */ 821 struct rtable *rt; /* Route to the other host */
@@ -932,7 +935,7 @@ tx_error_put:
932#ifdef CONFIG_IP_VS_IPV6 935#ifdef CONFIG_IP_VS_IPV6
933int 936int
934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 937ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935 struct ip_vs_protocol *pp) 938 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
936{ 939{
937 struct rt6_info *rt; /* Route to the other host */ 940 struct rt6_info *rt; /* Route to the other host */
938 struct in6_addr saddr; /* Source for tunnel */ 941 struct in6_addr saddr; /* Source for tunnel */
@@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
972 975
973 skb->dev = net->loopback_dev; 976 skb->dev = net->loopback_dev;
974 } 977 }
975 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
976 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
977 goto tx_error_put; 982 goto tx_error_put;
978 } 983 }
@@ -1053,7 +1058,7 @@ tx_error_put:
1053 */ 1058 */
1054int 1059int
1055ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1060ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056 struct ip_vs_protocol *pp) 1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1057{ 1062{
1058 struct rtable *rt; /* Route to the other host */ 1063 struct rtable *rt; /* Route to the other host */
1059 struct iphdr *iph = ip_hdr(skb); 1064 struct iphdr *iph = ip_hdr(skb);
@@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1115#ifdef CONFIG_IP_VS_IPV6 1120#ifdef CONFIG_IP_VS_IPV6
1116int 1121int
1117ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1122ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118 struct ip_vs_protocol *pp) 1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1119{ 1124{
1120 struct rt6_info *rt; /* Route to the other host */ 1125 struct rt6_info *rt; /* Route to the other host */
1121 int mtu; 1126 int mtu;
@@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1139 1144
1140 skb->dev = net->loopback_dev; 1145 skb->dev = net->loopback_dev;
1141 } 1146 }
1142 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143 dst_release(&rt->dst); 1150 dst_release(&rt->dst);
1144 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145 goto tx_error; 1152 goto tx_error;
@@ -1183,7 +1190,8 @@ tx_error:
1183 */ 1190 */
1184int 1191int
1185ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1192ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1193 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1194 struct ip_vs_iphdr *iph)
1187{ 1195{
1188 struct rtable *rt; /* Route to the other host */ 1196 struct rtable *rt; /* Route to the other host */
1189 int mtu; 1197 int mtu;
@@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1198 translate address/port back */ 1206 translate address/port back */
1199 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1207 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200 if (cp->packet_xmit) 1208 if (cp->packet_xmit)
1201 rc = cp->packet_xmit(skb, cp, pp); 1209 rc = cp->packet_xmit(skb, cp, pp, iph);
1202 else 1210 else
1203 rc = NF_ACCEPT; 1211 rc = NF_ACCEPT;
1204 /* do not touch skb anymore */ 1212 /* do not touch skb anymore */
@@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1304#ifdef CONFIG_IP_VS_IPV6 1312#ifdef CONFIG_IP_VS_IPV6
1305int 1313int
1306ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1314ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph)
1308{ 1317{
1309 struct rt6_info *rt; /* Route to the other host */ 1318 struct rt6_info *rt; /* Route to the other host */
1310 int mtu; 1319 int mtu;
@@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1319 translate address/port back */ 1328 translate address/port back */
1320 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321 if (cp->packet_xmit) 1330 if (cp->packet_xmit)
1322 rc = cp->packet_xmit(skb, cp, pp); 1331 rc = cp->packet_xmit(skb, cp, pp, iph);
1323 else 1332 else
1324 rc = NF_ACCEPT; 1333 rc = NF_ACCEPT;
1325 /* do not touch skb anymore */ 1334 /* do not touch skb anymore */
@@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1375 1384
1376 skb->dev = net->loopback_dev; 1385 skb->dev = net->loopback_dev;
1377 } 1386 }
1378 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380 goto tx_error_put; 1391 goto tx_error_put;
1381 } 1392 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index bb10b0717f1b..8d47c3780fda 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
67 goto out; 67 goto out;
68 } 68 }
69 69
70 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); 70 ip_vs_fill_iph_skb(family, skb, &iph);
71 71
72 if (data->bitmask & XT_IPVS_PROTO) 72 if (data->bitmask & XT_IPVS_PROTO)
73 if ((iph.protocol == data->l4proto) ^ 73 if ((iph.protocol == data->l4proto) ^
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;