aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-10-26 14:40:55 -0400
committerDavid S. Miller <davem@davemloft.net>2012-10-26 14:40:55 -0400
commitf019948dbb388eacbc0fb61db1d30e686224df77 (patch)
tree77aac489eb5de3682c5ae67501627170509d3f6f /net
parentdc95a2c00671cf383cd037d943fbfe178f9ba81a (diff)
parent54d83efa44aac87983f973abb42c508a25a2b554 (diff)
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Pablo Neira Ayuso says: ==================== The following changeset contains updates for IPVS from Jesper Dangaard Brouer that did not reach the previous merge window in time. More specifically, updates to improve IPv6 support in IPVS. More relevantly, some of the existing code performed wrong handling of the extensions headers and better fragmentation handling. Jesper promised more follow-up patches to refine this after this batch hits net-next. Yet to come. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipvs/Kconfig7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c404
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c41
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c73
-rw-r--r--net/netfilter/xt_ipvs.c4
16 files changed, 359 insertions, 310 deletions
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 8b2cffdfdd99..0c3b1670b0d1 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -28,12 +28,11 @@ if IP_VS
28config IP_VS_IPV6 28config IP_VS_IPV6
29 bool "IPv6 support for IPVS" 29 bool "IPv6 support for IPVS"
30 depends on IPV6 = y || IP_VS = IPV6 30 depends on IPV6 = y || IP_VS = IPV6
31 select IP6_NF_IPTABLES
31 ---help--- 32 ---help---
32 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 33 Add IPv6 support to IPVS.
33 34
34 See http://www.mindbasket.com/ipvs for more information. 35 Say Y if unsure.
35
36 Say N if unsure.
37 36
38config IP_VS_DEBUG 37config IP_VS_DEBUG
39 bool "IP virtual server debugging" 38 bool "IP virtual server debugging"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 1548df9a7524..30e764ad021f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
308static int 308static int
309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, 309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
310 const struct ip_vs_iphdr *iph, 310 const struct ip_vs_iphdr *iph,
311 unsigned int proto_off, int inverse, 311 int inverse, struct ip_vs_conn_param *p)
312 struct ip_vs_conn_param *p)
313{ 312{
314 __be16 _ports[2], *pptr; 313 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb); 314 struct net *net = skb_net(skb);
316 315
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
318 if (pptr == NULL) 317 if (pptr == NULL)
319 return 1; 318 return 1;
320 319
@@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
329 328
330struct ip_vs_conn * 329struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 const struct ip_vs_iphdr *iph, 331 const struct ip_vs_iphdr *iph, int inverse)
333 unsigned int proto_off, int inverse)
334{ 332{
335 struct ip_vs_conn_param p; 333 struct ip_vs_conn_param p;
336 334
337 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 335 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
338 return NULL; 336 return NULL;
339 337
340 return ip_vs_conn_in_get(&p); 338 return ip_vs_conn_in_get(&p);
@@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
432 430
433struct ip_vs_conn * 431struct ip_vs_conn *
434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 432ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
435 const struct ip_vs_iphdr *iph, 433 const struct ip_vs_iphdr *iph, int inverse)
436 unsigned int proto_off, int inverse)
437{ 434{
438 struct ip_vs_conn_param p; 435 struct ip_vs_conn_param p;
439 436
440 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
441 return NULL; 438 return NULL;
442 439
443 return ip_vs_conn_out_get(&p); 440 return ip_vs_conn_out_get(&p);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58918e20f9d5..fb45640dc1fb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
222 */ 222 */
223static struct ip_vs_conn * 223static struct ip_vs_conn *
224ip_vs_sched_persist(struct ip_vs_service *svc, 224ip_vs_sched_persist(struct ip_vs_service *svc,
225 struct sk_buff *skb, 225 struct sk_buff *skb, __be16 src_port, __be16 dst_port,
226 __be16 src_port, __be16 dst_port, int *ignored) 226 int *ignored, struct ip_vs_iphdr *iph)
227{ 227{
228 struct ip_vs_conn *cp = NULL; 228 struct ip_vs_conn *cp = NULL;
229 struct ip_vs_iphdr iph;
230 struct ip_vs_dest *dest; 229 struct ip_vs_dest *dest;
231 struct ip_vs_conn *ct; 230 struct ip_vs_conn *ct;
232 __be16 dport = 0; /* destination port to forward */ 231 __be16 dport = 0; /* destination port to forward */
@@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
236 union nf_inet_addr snet; /* source network of the client, 235 union nf_inet_addr snet; /* source network of the client,
237 after masking */ 236 after masking */
238 237
239 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
240
241 /* Mask saddr with the netmask to adjust template granularity */ 238 /* Mask saddr with the netmask to adjust template granularity */
242#ifdef CONFIG_IP_VS_IPV6 239#ifdef CONFIG_IP_VS_IPV6
243 if (svc->af == AF_INET6) 240 if (svc->af == AF_INET6)
244 ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); 241 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
245 else 242 else
246#endif 243#endif
247 snet.ip = iph.saddr.ip & svc->netmask; 244 snet.ip = iph->saddr.ip & svc->netmask;
248 245
249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 246 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
250 "mnet %s\n", 247 "mnet %s\n",
251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), 248 IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), 249 IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
253 IP_VS_DBG_ADDR(svc->af, &snet)); 250 IP_VS_DBG_ADDR(svc->af, &snet));
254 251
255 /* 252 /*
@@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
266 * is created for other persistent services. 263 * is created for other persistent services.
267 */ 264 */
268 { 265 {
269 int protocol = iph.protocol; 266 int protocol = iph->protocol;
270 const union nf_inet_addr *vaddr = &iph.daddr; 267 const union nf_inet_addr *vaddr = &iph->daddr;
271 __be16 vport = 0; 268 __be16 vport = 0;
272 269
273 if (dst_port == svc->port) { 270 if (dst_port == svc->port) {
@@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
342 dport = dest->port; 339 dport = dest->port;
343 340
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 341 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)? 342 && iph->protocol == IPPROTO_UDP) ?
346 IP_VS_CONN_F_ONE_PACKET : 0; 343 IP_VS_CONN_F_ONE_PACKET : 0;
347 344
348 /* 345 /*
349 * Create a new connection according to the template 346 * Create a new connection according to the template
350 */ 347 */
351 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, 348 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
352 src_port, &iph.daddr, dst_port, &param); 349 src_port, &iph->daddr, dst_port, &param);
353 350
354 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); 351 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
355 if (cp == NULL) { 352 if (cp == NULL) {
@@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
392 */ 389 */
393struct ip_vs_conn * 390struct ip_vs_conn *
394ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 391ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
395 struct ip_vs_proto_data *pd, int *ignored) 392 struct ip_vs_proto_data *pd, int *ignored,
393 struct ip_vs_iphdr *iph)
396{ 394{
397 struct ip_vs_protocol *pp = pd->pp; 395 struct ip_vs_protocol *pp = pd->pp;
398 struct ip_vs_conn *cp = NULL; 396 struct ip_vs_conn *cp = NULL;
399 struct ip_vs_iphdr iph;
400 struct ip_vs_dest *dest; 397 struct ip_vs_dest *dest;
401 __be16 _ports[2], *pptr; 398 __be16 _ports[2], *pptr;
402 unsigned int flags; 399 unsigned int flags;
403 400
404 *ignored = 1; 401 *ignored = 1;
405 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 402 /*
406 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 403 * IPv6 frags, only the first hit here.
404 */
405 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
407 if (pptr == NULL) 406 if (pptr == NULL)
408 return NULL; 407 return NULL;
409 408
@@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
423 * Do not schedule replies from local real server. 422 * Do not schedule replies from local real server.
424 */ 423 */
425 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 424 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
426 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { 425 (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
427 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 426 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
428 "Not scheduling reply for existing connection"); 427 "Not scheduling reply for existing connection");
429 __ip_vs_conn_put(cp); 428 __ip_vs_conn_put(cp);
@@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
434 * Persistent service 433 * Persistent service
435 */ 434 */
436 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 435 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
437 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); 436 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
437 iph);
438 438
439 *ignored = 0; 439 *ignored = 0;
440 440
@@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
456 } 456 }
457 457
458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
459 && iph.protocol == IPPROTO_UDP)? 459 && iph->protocol == IPPROTO_UDP) ?
460 IP_VS_CONN_F_ONE_PACKET : 0; 460 IP_VS_CONN_F_ONE_PACKET : 0;
461 461
462 /* 462 /*
@@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
465 { 465 {
466 struct ip_vs_conn_param p; 466 struct ip_vs_conn_param p;
467 467
468 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 468 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
469 &iph.saddr, pptr[0], &iph.daddr, pptr[1], 469 &iph->saddr, pptr[0], &iph->daddr,
470 &p); 470 pptr[1], &p);
471 cp = ip_vs_conn_new(&p, &dest->addr, 471 cp = ip_vs_conn_new(&p, &dest->addr,
472 dest->port ? dest->port : pptr[1], 472 dest->port ? dest->port : pptr[1],
473 flags, dest, skb->mark); 473 flags, dest, skb->mark);
@@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
496 * no destination is available for a new connection. 496 * no destination is available for a new connection.
497 */ 497 */
498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
499 struct ip_vs_proto_data *pd) 499 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
500{ 500{
501 __be16 _ports[2], *pptr; 501 __be16 _ports[2], *pptr;
502 struct ip_vs_iphdr iph;
503#ifdef CONFIG_SYSCTL 502#ifdef CONFIG_SYSCTL
504 struct net *net; 503 struct net *net;
505 struct netns_ipvs *ipvs; 504 struct netns_ipvs *ipvs;
506 int unicast; 505 int unicast;
507#endif 506#endif
508 507
509 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
510
511 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
512 if (pptr == NULL) { 509 if (pptr == NULL) {
513 ip_vs_service_put(svc); 510 ip_vs_service_put(svc);
514 return NF_DROP; 511 return NF_DROP;
@@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
519 516
520#ifdef CONFIG_IP_VS_IPV6 517#ifdef CONFIG_IP_VS_IPV6
521 if (svc->af == AF_INET6) 518 if (svc->af == AF_INET6)
522 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
523 else 520 else
524#endif 521#endif
525 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); 522 unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
526 523
527 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 /* if it is fwmark-based service, the cache_bypass sysctl is up
528 and the destination is a non-local unicast, then create 525 and the destination is a non-local unicast, then create
@@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
532 int ret; 529 int ret;
533 struct ip_vs_conn *cp; 530 struct ip_vs_conn *cp;
534 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 531 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
535 iph.protocol == IPPROTO_UDP)? 532 iph->protocol == IPPROTO_UDP) ?
536 IP_VS_CONN_F_ONE_PACKET : 0; 533 IP_VS_CONN_F_ONE_PACKET : 0;
537 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
538 535
@@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
542 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
543 { 540 {
544 struct ip_vs_conn_param p; 541 struct ip_vs_conn_param p;
545 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 542 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
546 &iph.saddr, pptr[0], 543 &iph->saddr, pptr[0],
547 &iph.daddr, pptr[1], &p); 544 &iph->daddr, pptr[1], &p);
548 cp = ip_vs_conn_new(&p, &daddr, 0, 545 cp = ip_vs_conn_new(&p, &daddr, 0,
549 IP_VS_CONN_F_BYPASS | flags, 546 IP_VS_CONN_F_BYPASS | flags,
550 NULL, skb->mark); 547 NULL, skb->mark);
@@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
559 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 556 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
560 557
561 /* transmit the first SYN packet */ 558 /* transmit the first SYN packet */
562 ret = cp->packet_xmit(skb, cp, pd->pp); 559 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
563 /* do not touch skb anymore */ 560 /* do not touch skb anymore */
564 561
565 atomic_inc(&cp->in_pkts); 562 atomic_inc(&cp->in_pkts);
@@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
654 return err; 651 return err;
655} 652}
656 653
657#ifdef CONFIG_IP_VS_IPV6
658static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
659{
660 /* TODO IPv6: Find out what to do here for IPv6 */
661 return 0;
662}
663#endif
664
665static int ip_vs_route_me_harder(int af, struct sk_buff *skb) 654static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
666{ 655{
667#ifdef CONFIG_IP_VS_IPV6 656#ifdef CONFIG_IP_VS_IPV6
@@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
732 struct ip_vs_conn *cp, int inout) 721 struct ip_vs_conn *cp, int inout)
733{ 722{
734 struct ipv6hdr *iph = ipv6_hdr(skb); 723 struct ipv6hdr *iph = ipv6_hdr(skb);
735 unsigned int icmp_offset = sizeof(struct ipv6hdr); 724 unsigned int icmp_offset = 0;
736 struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + 725 unsigned int offs = 0; /* header offset*/
737 icmp_offset); 726 int protocol;
738 struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); 727 struct icmp6hdr *icmph;
728 struct ipv6hdr *ciph;
729 unsigned short fragoffs;
730
731 ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
732 icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
733 offs = icmp_offset + sizeof(struct icmp6hdr);
734 ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
735
736 protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
739 737
740 if (inout) { 738 if (inout) {
741 iph->saddr = cp->vaddr.in6; 739 iph->saddr = cp->vaddr.in6;
@@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
746 } 744 }
747 745
748 /* the TCP/UDP/SCTP port */ 746 /* the TCP/UDP/SCTP port */
749 if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || 747 if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
750 IPPROTO_SCTP == ciph->nexthdr) { 748 IPPROTO_SCTP == protocol)) {
751 __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); 749 __be16 *ports = (void *)(skb_network_header(skb) + offs);
752 750
751 IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
752 ntohs(inout ? ports[1] : ports[0]),
753 ntohs(inout ? cp->vport : cp->dport));
753 if (inout) 754 if (inout)
754 ports[1] = cp->vport; 755 ports[1] = cp->vport;
755 else 756 else
@@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
898 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, 899 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
899 "Checking outgoing ICMP for"); 900 "Checking outgoing ICMP for");
900 901
901 offset += cih->ihl * 4; 902 ip_vs_fill_ip4hdr(cih, &ciph);
902 903 ciph.len += offset;
903 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
904 /* The embedded headers contain source and dest in reverse order */ 904 /* The embedded headers contain source and dest in reverse order */
905 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 905 cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
906 if (!cp) 906 if (!cp)
907 return NF_ACCEPT; 907 return NF_ACCEPT;
908 908
909 snet.ip = iph->saddr; 909 snet.ip = iph->saddr;
910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, 910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
911 pp, offset, ihl); 911 pp, ciph.len, ihl);
912} 912}
913 913
914#ifdef CONFIG_IP_VS_IPV6 914#ifdef CONFIG_IP_VS_IPV6
915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, 915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
916 unsigned int hooknum) 916 unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
917{ 917{
918 struct ipv6hdr *iph;
919 struct icmp6hdr _icmph, *ic; 918 struct icmp6hdr _icmph, *ic;
920 struct ipv6hdr _ciph, *cih; /* The ip header contained 919 struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
921 within the ICMP */ 920 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
922 struct ip_vs_iphdr ciph;
923 struct ip_vs_conn *cp; 921 struct ip_vs_conn *cp;
924 struct ip_vs_protocol *pp; 922 struct ip_vs_protocol *pp;
925 unsigned int offset;
926 union nf_inet_addr snet; 923 union nf_inet_addr snet;
924 unsigned int writable;
927 925
928 *related = 1; 926 *related = 1;
929 927 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
930 /* reassemble IP fragments */
931 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
932 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
933 return NF_STOLEN;
934 }
935
936 iph = ipv6_hdr(skb);
937 offset = sizeof(struct ipv6hdr);
938 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
939 if (ic == NULL) 928 if (ic == NULL)
940 return NF_DROP; 929 return NF_DROP;
941 930
942 IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
943 ic->icmp6_type, ntohs(icmpv6_id(ic)),
944 &iph->saddr, &iph->daddr);
945
946 /* 931 /*
947 * Work through seeing if this is for us. 932 * Work through seeing if this is for us.
948 * These checks are supposed to be in an order that means easy 933 * These checks are supposed to be in an order that means easy
@@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
950 * this means that some packets will manage to get a long way 935 * this means that some packets will manage to get a long way
951 * down this stack and then be rejected, but that's life. 936 * down this stack and then be rejected, but that's life.
952 */ 937 */
953 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 938 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
954 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
955 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
956 *related = 0; 939 *related = 0;
957 return NF_ACCEPT; 940 return NF_ACCEPT;
958 } 941 }
942 /* Fragment header that is before ICMP header tells us that:
943 * it's not an error message since they can't be fragmented.
944 */
945 if (ipvsh->flags & IP6T_FH_F_FRAG)
946 return NF_DROP;
947
948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
949 ic->icmp6_type, ntohs(icmpv6_id(ic)),
950 &ipvsh->saddr, &ipvsh->daddr);
959 951
960 /* Now find the contained IP header */ 952 /* Now find the contained IP header */
961 offset += sizeof(_icmph); 953 ciph.len = ipvsh->len + sizeof(_icmph);
962 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 954 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
963 if (cih == NULL) 955 if (ip6h == NULL)
964 return NF_ACCEPT; /* The packet looks wrong, ignore */ 956 return NF_ACCEPT; /* The packet looks wrong, ignore */
965 957 ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
966 pp = ip_vs_proto_get(cih->nexthdr); 958 ciph.daddr.in6 = ip6h->daddr;
959 /* skip possible IPv6 exthdrs of contained IPv6 packet */
960 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
961 if (ciph.protocol < 0)
962 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
963
964 pp = ip_vs_proto_get(ciph.protocol);
967 if (!pp) 965 if (!pp)
968 return NF_ACCEPT; 966 return NF_ACCEPT;
969 967
970 /* Is the embedded protocol header present? */
971 /* TODO: we don't support fragmentation at the moment anyways */
972 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
973 return NF_ACCEPT;
974
975 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
976 "Checking outgoing ICMPv6 for");
977
978 offset += sizeof(struct ipv6hdr);
979
980 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
981 /* The embedded headers contain source and dest in reverse order */ 968 /* The embedded headers contain source and dest in reverse order */
982 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 969 cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
983 if (!cp) 970 if (!cp)
984 return NF_ACCEPT; 971 return NF_ACCEPT;
985 972
986 snet.in6 = iph->saddr; 973 snet.in6 = ciph.saddr.in6;
987 return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, 974 writable = ciph.len;
988 pp, offset, sizeof(struct ipv6hdr)); 975 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
976 pp, writable, sizeof(struct ipv6hdr));
989} 977}
990#endif 978#endif
991 979
@@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
1018 */ 1006 */
1019static unsigned int 1007static unsigned int
1020handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 1008handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1021 struct ip_vs_conn *cp, int ihl) 1009 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
1022{ 1010{
1023 struct ip_vs_protocol *pp = pd->pp; 1011 struct ip_vs_protocol *pp = pd->pp;
1024 1012
1025 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1013 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
1026 1014
1027 if (!skb_make_writable(skb, ihl)) 1015 if (!skb_make_writable(skb, iph->len))
1028 goto drop; 1016 goto drop;
1029 1017
1030 /* mangle the packet */ 1018 /* mangle the packet */
1031 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) 1019 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
1032 goto drop; 1020 goto drop;
1033 1021
1034#ifdef CONFIG_IP_VS_IPV6 1022#ifdef CONFIG_IP_VS_IPV6
@@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1115 if (!net_ipvs(net)->enable) 1103 if (!net_ipvs(net)->enable)
1116 return NF_ACCEPT; 1104 return NF_ACCEPT;
1117 1105
1118 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1106 ip_vs_fill_iph_skb(af, skb, &iph);
1119#ifdef CONFIG_IP_VS_IPV6 1107#ifdef CONFIG_IP_VS_IPV6
1120 if (af == AF_INET6) { 1108 if (af == AF_INET6) {
1109 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1110 struct sk_buff *reasm = skb_nfct_reasm(skb);
1111 /* Save fw mark for coming frags */
1112 reasm->ipvs_property = 1;
1113 reasm->mark = skb->mark;
1114 }
1121 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1115 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1122 int related; 1116 int related;
1123 int verdict = ip_vs_out_icmp_v6(skb, &related, 1117 int verdict = ip_vs_out_icmp_v6(skb, &related,
1124 hooknum); 1118 hooknum, &iph);
1125 1119
1126 if (related) 1120 if (related)
1127 return verdict; 1121 return verdict;
1128 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1129 } 1122 }
1130 } else 1123 } else
1131#endif 1124#endif
@@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1135 1128
1136 if (related) 1129 if (related)
1137 return verdict; 1130 return verdict;
1138 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1139 } 1131 }
1140 1132
1141 pd = ip_vs_proto_data_get(net, iph.protocol); 1133 pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1145 1137
1146 /* reassemble IP fragments */ 1138 /* reassemble IP fragments */
1147#ifdef CONFIG_IP_VS_IPV6 1139#ifdef CONFIG_IP_VS_IPV6
1148 if (af == AF_INET6) { 1140 if (af == AF_INET)
1149 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1150 if (ip_vs_gather_frags_v6(skb,
1151 ip_vs_defrag_user(hooknum)))
1152 return NF_STOLEN;
1153 }
1154
1155 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1156 } else
1157#endif 1141#endif
1158 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { 1142 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
1159 if (ip_vs_gather_frags(skb, 1143 if (ip_vs_gather_frags(skb,
1160 ip_vs_defrag_user(hooknum))) 1144 ip_vs_defrag_user(hooknum)))
1161 return NF_STOLEN; 1145 return NF_STOLEN;
1162 1146
1163 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1147 ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
1164 } 1148 }
1165 1149
1166 /* 1150 /*
1167 * Check if the packet belongs to an existing entry 1151 * Check if the packet belongs to an existing entry
1168 */ 1152 */
1169 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1153 cp = pp->conn_out_get(af, skb, &iph, 0);
1170 1154
1171 if (likely(cp)) 1155 if (likely(cp))
1172 return handle_response(af, skb, pd, cp, iph.len); 1156 return handle_response(af, skb, pd, cp, &iph);
1173 if (sysctl_nat_icmp_send(net) && 1157 if (sysctl_nat_icmp_send(net) &&
1174 (pp->protocol == IPPROTO_TCP || 1158 (pp->protocol == IPPROTO_TCP ||
1175 pp->protocol == IPPROTO_UDP || 1159 pp->protocol == IPPROTO_UDP ||
1176 pp->protocol == IPPROTO_SCTP)) { 1160 pp->protocol == IPPROTO_SCTP)) {
1177 __be16 _ports[2], *pptr; 1161 __be16 _ports[2], *pptr;
1178 1162
1179 pptr = skb_header_pointer(skb, iph.len, 1163 pptr = frag_safe_skb_hp(skb, iph.len,
1180 sizeof(_ports), _ports); 1164 sizeof(_ports), _ports, &iph);
1181 if (pptr == NULL) 1165 if (pptr == NULL)
1182 return NF_ACCEPT; /* Not for me */ 1166 return NF_ACCEPT; /* Not for me */
1183 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1167 if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1375 "Checking incoming ICMP for"); 1359 "Checking incoming ICMP for");
1376 1360
1377 offset2 = offset; 1361 offset2 = offset;
1378 offset += cih->ihl * 4; 1362 ip_vs_fill_ip4hdr(cih, &ciph);
1379 1363 ciph.len += offset;
1380 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1364 offset = ciph.len;
1381 /* The embedded headers contain source and dest in reverse order. 1365 /* The embedded headers contain source and dest in reverse order.
1382 * For IPIP this is error for request, not for reply. 1366 * For IPIP this is error for request, not for reply.
1383 */ 1367 */
1384 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); 1368 cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
1385 if (!cp) 1369 if (!cp)
1386 return NF_ACCEPT; 1370 return NF_ACCEPT;
1387 1371
@@ -1450,7 +1434,7 @@ ignore_ipip:
1450 ip_vs_in_stats(cp, skb); 1434 ip_vs_in_stats(cp, skb);
1451 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1435 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1452 offset += 2 * sizeof(__u16); 1436 offset += 2 * sizeof(__u16);
1453 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); 1437 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1454 1438
1455out: 1439out:
1456 __ip_vs_conn_put(cp); 1440 __ip_vs_conn_put(cp);
@@ -1459,38 +1443,24 @@ out:
1459} 1443}
1460 1444
1461#ifdef CONFIG_IP_VS_IPV6 1445#ifdef CONFIG_IP_VS_IPV6
1462static int 1446static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1463ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1447 unsigned int hooknum, struct ip_vs_iphdr *iph)
1464{ 1448{
1465 struct net *net = NULL; 1449 struct net *net = NULL;
1466 struct ipv6hdr *iph; 1450 struct ipv6hdr _ip6h, *ip6h;
1467 struct icmp6hdr _icmph, *ic; 1451 struct icmp6hdr _icmph, *ic;
1468 struct ipv6hdr _ciph, *cih; /* The ip header contained 1452 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
1469 within the ICMP */
1470 struct ip_vs_iphdr ciph;
1471 struct ip_vs_conn *cp; 1453 struct ip_vs_conn *cp;
1472 struct ip_vs_protocol *pp; 1454 struct ip_vs_protocol *pp;
1473 struct ip_vs_proto_data *pd; 1455 struct ip_vs_proto_data *pd;
1474 unsigned int offset, verdict; 1456 unsigned int offs_ciph, writable, verdict;
1475 1457
1476 *related = 1; 1458 *related = 1;
1477 1459
1478 /* reassemble IP fragments */ 1460 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
1479 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1480 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1481 return NF_STOLEN;
1482 }
1483
1484 iph = ipv6_hdr(skb);
1485 offset = sizeof(struct ipv6hdr);
1486 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
1487 if (ic == NULL) 1461 if (ic == NULL)
1488 return NF_DROP; 1462 return NF_DROP;
1489 1463
1490 IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
1491 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1492 &iph->saddr, &iph->daddr);
1493
1494 /* 1464 /*
1495 * Work through seeing if this is for us. 1465 * Work through seeing if this is for us.
1496 * These checks are supposed to be in an order that means easy 1466 * These checks are supposed to be in an order that means easy
@@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1498 * this means that some packets will manage to get a long way 1468 * this means that some packets will manage to get a long way
1499 * down this stack and then be rejected, but that's life. 1469 * down this stack and then be rejected, but that's life.
1500 */ 1470 */
1501 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 1471 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
1502 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
1503 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
1504 *related = 0; 1472 *related = 0;
1505 return NF_ACCEPT; 1473 return NF_ACCEPT;
1506 } 1474 }
1475 /* Fragment header that is before ICMP header tells us that:
1476 * it's not an error message since they can't be fragmented.
1477 */
1478 if (iph->flags & IP6T_FH_F_FRAG)
1479 return NF_DROP;
1480
1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
1482 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1483 &iph->saddr, &iph->daddr);
1507 1484
1508 /* Now find the contained IP header */ 1485 /* Now find the contained IP header */
1509 offset += sizeof(_icmph); 1486 ciph.len = iph->len + sizeof(_icmph);
1510 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 1487 offs_ciph = ciph.len; /* Save ip header offset */
1511 if (cih == NULL) 1488 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
1489 if (ip6h == NULL)
1512 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1490 return NF_ACCEPT; /* The packet looks wrong, ignore */
1491 ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
1492 ciph.daddr.in6 = ip6h->daddr;
1493 /* skip possible IPv6 exthdrs of contained IPv6 packet */
1494 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
1495 if (ciph.protocol < 0)
1496 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
1513 1497
1514 net = skb_net(skb); 1498 net = skb_net(skb);
1515 pd = ip_vs_proto_data_get(net, cih->nexthdr); 1499 pd = ip_vs_proto_data_get(net, ciph.protocol);
1516 if (!pd) 1500 if (!pd)
1517 return NF_ACCEPT; 1501 return NF_ACCEPT;
1518 pp = pd->pp; 1502 pp = pd->pp;
1519 1503
1520 /* Is the embedded protocol header present? */ 1504 /* Cannot handle fragmented embedded protocol */
1521 /* TODO: we don't support fragmentation at the moment anyways */ 1505 if (ciph.fragoffs)
1522 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1523 return NF_ACCEPT; 1506 return NF_ACCEPT;
1524 1507
1525 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, 1508 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
1526 "Checking incoming ICMPv6 for"); 1509 "Checking incoming ICMPv6 for");
1527 1510
1528 offset += sizeof(struct ipv6hdr); 1511 /* The embedded headers contain source and dest in reverse order
1512 * if not from localhost
1513 */
1514 cp = pp->conn_in_get(AF_INET6, skb, &ciph,
1515 (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
1529 1516
1530 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1531 /* The embedded headers contain source and dest in reverse order */
1532 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1533 if (!cp) 1517 if (!cp)
1534 return NF_ACCEPT; 1518 return NF_ACCEPT;
1519 /* VS/TUN, VS/DR and LOCALNODE just let it go */
1520 if ((hooknum == NF_INET_LOCAL_OUT) &&
1521 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
1522 __ip_vs_conn_put(cp);
1523 return NF_ACCEPT;
1524 }
1535 1525
1536 /* do the statistics and put it back */ 1526 /* do the statistics and put it back */
1537 ip_vs_in_stats(cp, skb); 1527 ip_vs_in_stats(cp, skb);
1538 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1528
1539 IPPROTO_SCTP == cih->nexthdr) 1529 /* Need to mangle contained IPv6 header in ICMPv6 packet */
1540 offset += 2 * sizeof(__u16); 1530 writable = ciph.len;
1541 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); 1531 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
1532 IPPROTO_SCTP == ciph.protocol)
1533 writable += 2 * sizeof(__u16); /* Also mangle ports */
1534
1535 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
1542 1536
1543 __ip_vs_conn_put(cp); 1537 __ip_vs_conn_put(cp);
1544 1538
@@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1574 if (unlikely((skb->pkt_type != PACKET_HOST && 1568 if (unlikely((skb->pkt_type != PACKET_HOST &&
1575 hooknum != NF_INET_LOCAL_OUT) || 1569 hooknum != NF_INET_LOCAL_OUT) ||
1576 !skb_dst(skb))) { 1570 !skb_dst(skb))) {
1577 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1571 ip_vs_fill_iph_skb(af, skb, &iph);
1578 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" 1572 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1579 " ignored in hook %u\n", 1573 " ignored in hook %u\n",
1580 skb->pkt_type, iph.protocol, 1574 skb->pkt_type, iph.protocol,
@@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1586 if (!net_ipvs(net)->enable) 1580 if (!net_ipvs(net)->enable)
1587 return NF_ACCEPT; 1581 return NF_ACCEPT;
1588 1582
1589 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1583 ip_vs_fill_iph_skb(af, skb, &iph);
1590 1584
1591 /* Bad... Do not break raw sockets */ 1585 /* Bad... Do not break raw sockets */
1592 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1586 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1600 1594
1601#ifdef CONFIG_IP_VS_IPV6 1595#ifdef CONFIG_IP_VS_IPV6
1602 if (af == AF_INET6) { 1596 if (af == AF_INET6) {
1597 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1598 struct sk_buff *reasm = skb_nfct_reasm(skb);
1599 /* Save fw mark for coming frags. */
1600 reasm->ipvs_property = 1;
1601 reasm->mark = skb->mark;
1602 }
1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1604 int related; 1604 int related;
1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
1606 &iph);
1606 1607
1607 if (related) 1608 if (related)
1608 return verdict; 1609 return verdict;
1609 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1610 } 1610 }
1611 } else 1611 } else
1612#endif 1612#endif
@@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1616 1616
1617 if (related) 1617 if (related)
1618 return verdict; 1618 return verdict;
1619 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1620 } 1619 }
1621 1620
1622 /* Protocol supported? */ 1621 /* Protocol supported? */
@@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1627 /* 1626 /*
1628 * Check if the packet belongs to an existing connection entry 1627 * Check if the packet belongs to an existing connection entry
1629 */ 1628 */
1630 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); 1629 cp = pp->conn_in_get(af, skb, &iph, 0);
1631 1630 if (unlikely(!cp) && !iph.fragoffs) {
1632 if (unlikely(!cp)) { 1631 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1632 * replayed fragment zero will already have created the cp
1633 */
1633 int v; 1634 int v;
1634 1635
1635 if (!pp->conn_schedule(af, skb, pd, &v, &cp)) 1636 /* Schedule and create new connection entry into &cp */
1637 if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
1636 return v; 1638 return v;
1637 } 1639 }
1638 1640
@@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1640 /* sorry, all this trouble for a no-hit :) */ 1642 /* sorry, all this trouble for a no-hit :) */
1641 IP_VS_DBG_PKT(12, af, pp, skb, 0, 1643 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1642 "ip_vs_in: packet continues traversal as normal"); 1644 "ip_vs_in: packet continues traversal as normal");
1645 if (iph.fragoffs && !skb_nfct_reasm(skb)) {
1646 /* Fragment that couldn't be mapped to a conn entry
1647 * and don't have any pointer to a reasm skb
1648 * is missing module nf_defrag_ipv6
1649 */
1650 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1651 IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
1652 }
1643 return NF_ACCEPT; 1653 return NF_ACCEPT;
1644 } 1654 }
1645 1655
@@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1662 ip_vs_in_stats(cp, skb); 1672 ip_vs_in_stats(cp, skb);
1663 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 1673 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1664 if (cp->packet_xmit) 1674 if (cp->packet_xmit)
1665 ret = cp->packet_xmit(skb, cp, pp); 1675 ret = cp->packet_xmit(skb, cp, pp, &iph);
1666 /* do not touch skb anymore */ 1676 /* do not touch skb anymore */
1667 else { 1677 else {
1668 IP_VS_DBG_RL("warning: packet_xmit is null"); 1678 IP_VS_DBG_RL("warning: packet_xmit is null");
@@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1724#ifdef CONFIG_IP_VS_IPV6 1734#ifdef CONFIG_IP_VS_IPV6
1725 1735
1726/* 1736/*
1737 * AF_INET6 fragment handling
1738 * Copy info from first fragment, to the rest of them.
1739 */
1740static unsigned int
1741ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
1742 const struct net_device *in,
1743 const struct net_device *out,
1744 int (*okfn)(struct sk_buff *))
1745{
1746 struct sk_buff *reasm = skb_nfct_reasm(skb);
1747 struct net *net;
1748
1749 /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
1750 * ipvs_property is set when checking first fragment
1751 * in ip_vs_in() and ip_vs_out().
1752 */
1753 if (reasm)
1754 IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
1755 if (!reasm || !reasm->ipvs_property)
1756 return NF_ACCEPT;
1757
1758 net = skb_net(skb);
1759 if (!net_ipvs(net)->enable)
1760 return NF_ACCEPT;
1761
1762 /* Copy stored fw mark, saved in ip_vs_{in,out} */
1763 skb->mark = reasm->mark;
1764
1765 return NF_ACCEPT;
1766}
1767
1768/*
1727 * AF_INET6 handler in NF_INET_LOCAL_IN chain 1769 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1728 * Schedule and forward packets from remote clients 1770 * Schedule and forward packets from remote clients
1729 */ 1771 */
@@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1793{ 1835{
1794 int r; 1836 int r;
1795 struct net *net; 1837 struct net *net;
1838 struct ip_vs_iphdr iphdr;
1796 1839
1797 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) 1840 ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
1841 if (iphdr.protocol != IPPROTO_ICMPV6)
1798 return NF_ACCEPT; 1842 return NF_ACCEPT;
1799 1843
1800 /* ipvs enabled in this netns ? */ 1844 /* ipvs enabled in this netns ? */
@@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1802 if (!net_ipvs(net)->enable) 1846 if (!net_ipvs(net)->enable)
1803 return NF_ACCEPT; 1847 return NF_ACCEPT;
1804 1848
1805 return ip_vs_in_icmp_v6(skb, &r, hooknum); 1849 return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
1806} 1850}
1807#endif 1851#endif
1808 1852
@@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1860 .priority = 100, 1904 .priority = 100,
1861 }, 1905 },
1862#ifdef CONFIG_IP_VS_IPV6 1906#ifdef CONFIG_IP_VS_IPV6
1907 /* After mangle & nat fetch 2:nd fragment and following */
1908 {
1909 .hook = ip_vs_preroute_frag6,
1910 .owner = THIS_MODULE,
1911 .pf = NFPROTO_IPV6,
1912 .hooknum = NF_INET_PRE_ROUTING,
1913 .priority = NF_IP6_PRI_NAT_DST + 1,
1914 },
1863 /* After packet filtering, change source only for VS/NAT */ 1915 /* After packet filtering, change source only for VS/NAT */
1864 { 1916 {
1865 .hook = ip_vs_reply6, 1917 .hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 8b7dca9ea422..7f3b0cc00b7a 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
215 struct ip_vs_dh_bucket *tbl; 215 struct ip_vs_dh_bucket *tbl;
216 struct ip_vs_iphdr iph; 216 struct ip_vs_iphdr iph;
217 217
218 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 219
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 221
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index df646ccf08a7..cbd37489ac77 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
479 struct ip_vs_dest *dest = NULL; 479 struct ip_vs_dest *dest = NULL;
480 struct ip_vs_lblc_entry *en; 480 struct ip_vs_lblc_entry *en;
481 481
482 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 482 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
483 483
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 485
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 570e31ea427a..161b67972e3f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
649 struct ip_vs_dest *dest = NULL; 649 struct ip_vs_dest *dest = NULL;
650 struct ip_vs_lblcr_entry *en; 650 struct ip_vs_lblcr_entry *en;
651 651
652 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
653 653
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 655
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1aa5cac748c4..12475ef88daf 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff,
68static int 68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) 69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{ 70{
71 struct sk_buff *reasm = skb_nfct_reasm(skb);
71 struct ip_vs_iphdr iph; 72 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 73 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 74 const char *dptr;
74 int retc; 75 int retc;
75 76
76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 77 ip_vs_fill_iph_skb(p->af, skb, &iph);
77 78
78 /* Only useful with UDP */ 79 /* Only useful with UDP */
79 if (iph.protocol != IPPROTO_UDP) 80 if (iph.protocol != IPPROTO_UDP)
80 return -EINVAL; 81 return -EINVAL;
82 /* todo: IPv6 fragments:
83 * I think this only should be done for the first fragment. /HS
84 */
85 if (reasm) {
86 skb = reasm;
87 dataoff = iph.thoff_reasm + sizeof(struct udphdr);
88 } else
89 dataoff = iph.len + sizeof(struct udphdr);
81 90
82 /* No Data ? */
83 dataoff = iph.len + sizeof(struct udphdr);
84 if (dataoff >= skb->len) 91 if (dataoff >= skb->len)
85 return -EINVAL; 92 return -EINVAL;
86 93 /* todo: Check if this will mess-up the reasm skb !!! /HS */
87 if ((retc=skb_linearize(skb)) < 0) 94 retc = skb_linearize(skb);
95 if (retc < 0)
88 return retc; 96 return retc;
89 dptr = skb->data + dataoff; 97 dptr = skb->data + dataoff;
90 datalen = skb->len - dataoff; 98 datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 50d82186da87..939f7fbe9b46 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
280 if (ih == NULL) 280 if (ih == NULL)
281 sprintf(buf, "TRUNCATED"); 281 sprintf(buf, "TRUNCATED");
282 else if (ih->nexthdr == IPPROTO_FRAGMENT) 282 else if (ih->nexthdr == IPPROTO_FRAGMENT)
283 sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); 283 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
284 else { 284 else {
285 __be16 _ports[2], *pptr; 285 __be16 _ports[2], *pptr;
286 286
287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
288 sizeof(_ports), _ports); 288 sizeof(_ports), _ports);
289 if (pptr == NULL) 289 if (pptr == NULL)
290 sprintf(buf, "TRUNCATED %pI6->%pI6", 290 sprintf(buf, "TRUNCATED %pI6c->%pI6c",
291 &ih->saddr, &ih->daddr); 291 &ih->saddr, &ih->daddr);
292 else 292 else
293 sprintf(buf, "%pI6:%u->%pI6:%u", 293 sprintf(buf, "%pI6c:%u->%pI6c:%u",
294 &ih->saddr, ntohs(pptr[0]), 294 &ih->saddr, ntohs(pptr[0]),
295 &ih->daddr, ntohs(pptr[1])); 295 &ih->daddr, ntohs(pptr[1]));
296 } 296 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5b8eb8b12c3e..5de3dd312c0f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
57 57
58static struct ip_vs_conn * 58static struct ip_vs_conn *
59ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
60 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph,
61 int inverse) 61 int inverse)
62{ 62{
63 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
@@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
85 85
86static struct ip_vs_conn * 86static struct ip_vs_conn *
87ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
88 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph, int inverse)
89 unsigned int proto_off,
90 int inverse)
91{ 89{
92 struct ip_vs_conn *cp; 90 struct ip_vs_conn *cp;
93 struct ip_vs_conn_param p; 91 struct ip_vs_conn_param p;
@@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
110 108
111static int 109static int
112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
113 int *verdict, struct ip_vs_conn **cpp) 111 int *verdict, struct ip_vs_conn **cpp,
112 struct ip_vs_iphdr *iph)
114{ 113{
115 /* 114 /*
116 * AH/ESP is only related traffic. Pass the packet to IP stack. 115 * AH/ESP is only related traffic. Pass the packet to IP stack.
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9f3fb751c491..746048b13ef3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,28 +10,26 @@
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp,
14 struct ip_vs_iphdr *iph)
14{ 15{
15 struct net *net; 16 struct net *net;
16 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
17 sctp_chunkhdr_t _schunkh, *sch; 18 sctp_chunkhdr_t _schunkh, *sch;
18 sctp_sctphdr_t *sh, _sctph; 19 sctp_sctphdr_t *sh, _sctph;
19 struct ip_vs_iphdr iph;
20 20
21 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 21 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
22
23 sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
24 if (sh == NULL) 22 if (sh == NULL)
25 return 0; 23 return 0;
26 24
27 sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), 25 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
28 sizeof(_schunkh), &_schunkh); 26 sizeof(_schunkh), &_schunkh);
29 if (sch == NULL) 27 if (sch == NULL)
30 return 0; 28 return 0;
31 net = skb_net(skb); 29 net = skb_net(skb);
32 if ((sch->type == SCTP_CID_INIT) && 30 if ((sch->type == SCTP_CID_INIT) &&
33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
34 &iph.daddr, sh->dest))) { 32 &iph->daddr, sh->dest))) {
35 int ignored; 33 int ignored;
36 34
37 if (ip_vs_todrop(net_ipvs(net))) { 35 if (ip_vs_todrop(net_ipvs(net))) {
@@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 * Let the virtual server select a real server for the 45 * Let the virtual server select a real server for the
48 * incoming connection, and create a connection entry. 46 * incoming connection, and create a connection entry.
49 */ 47 */
50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 48 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
51 if (!*cpp && ignored <= 0) { 49 if (!*cpp && ignored <= 0) {
52 if (!ignored) 50 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd); 51 *verdict = ip_vs_leave(svc, skb, pd, iph);
54 else { 52 else {
55 ip_vs_service_put(svc); 53 ip_vs_service_put(svc);
56 *verdict = NF_DROP; 54 *verdict = NF_DROP;
@@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64} 62}
65 63
66static int 64static int
67sctp_snat_handler(struct sk_buff *skb, 65sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
68 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 66 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
69{ 67{
70 sctp_sctphdr_t *sctph; 68 sctp_sctphdr_t *sctph;
71 unsigned int sctphoff; 69 unsigned int sctphoff = iph->len;
72 struct sk_buff *iter; 70 struct sk_buff *iter;
73 __be32 crc32; 71 __be32 crc32;
74 72
75#ifdef CONFIG_IP_VS_IPV6 73#ifdef CONFIG_IP_VS_IPV6
76 if (cp->af == AF_INET6) 74 if (cp->af == AF_INET6 && iph->fragoffs)
77 sctphoff = sizeof(struct ipv6hdr); 75 return 1;
78 else
79#endif 76#endif
80 sctphoff = ip_hdrlen(skb);
81 77
82 /* csum_check requires unshared skb */ 78 /* csum_check requires unshared skb */
83 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 79 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
@@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb,
108} 104}
109 105
110static int 106static int
111sctp_dnat_handler(struct sk_buff *skb, 107sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
112 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 108 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
113{ 109{
114 sctp_sctphdr_t *sctph; 110 sctp_sctphdr_t *sctph;
115 unsigned int sctphoff; 111 unsigned int sctphoff = iph->len;
116 struct sk_buff *iter; 112 struct sk_buff *iter;
117 __be32 crc32; 113 __be32 crc32;
118 114
119#ifdef CONFIG_IP_VS_IPV6 115#ifdef CONFIG_IP_VS_IPV6
120 if (cp->af == AF_INET6) 116 if (cp->af == AF_INET6 && iph->fragoffs)
121 sctphoff = sizeof(struct ipv6hdr); 117 return 1;
122 else
123#endif 118#endif
124 sctphoff = ip_hdrlen(skb);
125 119
126 /* csum_check requires unshared skb */ 120 /* csum_check requires unshared skb */
127 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 121 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index cd609cc62721..9af653a75825 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -33,16 +33,14 @@
33 33
34static int 34static int
35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
36 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp,
37 struct ip_vs_iphdr *iph)
37{ 38{
38 struct net *net; 39 struct net *net;
39 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
40 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
41 struct ip_vs_iphdr iph;
42 42
43 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 43 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
44
45 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
46 if (th == NULL) { 44 if (th == NULL) {
47 *verdict = NF_DROP; 45 *verdict = NF_DROP;
48 return 0; 46 return 0;
@@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
50 net = skb_net(skb); 48 net = skb_net(skb);
51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52 if (th->syn && 50 if (th->syn &&
53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
54 &iph.daddr, th->dest))) { 52 &iph->daddr, th->dest))) {
55 int ignored; 53 int ignored;
56 54
57 if (ip_vs_todrop(net_ipvs(net))) { 55 if (ip_vs_todrop(net_ipvs(net))) {
@@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
68 * Let the virtual server select a real server for the 66 * Let the virtual server select a real server for the
69 * incoming connection, and create a connection entry. 67 * incoming connection, and create a connection entry.
70 */ 68 */
71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 69 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
72 if (!*cpp && ignored <= 0) { 70 if (!*cpp && ignored <= 0) {
73 if (!ignored) 71 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd); 72 *verdict = ip_vs_leave(svc, skb, pd, iph);
75 else { 73 else {
76 ip_vs_service_put(svc); 74 ip_vs_service_put(svc);
77 *verdict = NF_DROP; 75 *verdict = NF_DROP;
@@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
128 126
129 127
130static int 128static int
131tcp_snat_handler(struct sk_buff *skb, 129tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
132 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 130 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
133{ 131{
134 struct tcphdr *tcph; 132 struct tcphdr *tcph;
135 unsigned int tcphoff; 133 unsigned int tcphoff = iph->len;
136 int oldlen; 134 int oldlen;
137 int payload_csum = 0; 135 int payload_csum = 0;
138 136
139#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
140 if (cp->af == AF_INET6) 138 if (cp->af == AF_INET6 && iph->fragoffs)
141 tcphoff = sizeof(struct ipv6hdr); 139 return 1;
142 else
143#endif 140#endif
144 tcphoff = ip_hdrlen(skb);
145 oldlen = skb->len - tcphoff; 141 oldlen = skb->len - tcphoff;
146 142
147 /* csum_check requires unshared skb */ 143 /* csum_check requires unshared skb */
@@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb,
208 204
209 205
210static int 206static int
211tcp_dnat_handler(struct sk_buff *skb, 207tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
212 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 208 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
213{ 209{
214 struct tcphdr *tcph; 210 struct tcphdr *tcph;
215 unsigned int tcphoff; 211 unsigned int tcphoff = iph->len;
216 int oldlen; 212 int oldlen;
217 int payload_csum = 0; 213 int payload_csum = 0;
218 214
219#ifdef CONFIG_IP_VS_IPV6 215#ifdef CONFIG_IP_VS_IPV6
220 if (cp->af == AF_INET6) 216 if (cp->af == AF_INET6 && iph->fragoffs)
221 tcphoff = sizeof(struct ipv6hdr); 217 return 1;
222 else
223#endif 218#endif
224 tcphoff = ip_hdrlen(skb);
225 oldlen = skb->len - tcphoff; 219 oldlen = skb->len - tcphoff;
226 220
227 /* csum_check requires unshared skb */ 221 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 2fedb2dcb3d1..503a842c90d2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -30,23 +30,22 @@
30 30
31static int 31static int
32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
33 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp,
34 struct ip_vs_iphdr *iph)
34{ 35{
35 struct net *net; 36 struct net *net;
36 struct ip_vs_service *svc; 37 struct ip_vs_service *svc;
37 struct udphdr _udph, *uh; 38 struct udphdr _udph, *uh;
38 struct ip_vs_iphdr iph;
39 39
40 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 40 /* IPv6 fragments, only first fragment will hit this */
41 41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
42 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
43 if (uh == NULL) { 42 if (uh == NULL) {
44 *verdict = NF_DROP; 43 *verdict = NF_DROP;
45 return 0; 44 return 0;
46 } 45 }
47 net = skb_net(skb); 46 net = skb_net(skb);
48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
49 &iph.daddr, uh->dest); 48 &iph->daddr, uh->dest);
50 if (svc) { 49 if (svc) {
51 int ignored; 50 int ignored;
52 51
@@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
65 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
66 */ 65 */
67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 66 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
68 if (!*cpp && ignored <= 0) { 67 if (!*cpp && ignored <= 0) {
69 if (!ignored) 68 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd); 69 *verdict = ip_vs_leave(svc, skb, pd, iph);
71 else { 70 else {
72 ip_vs_service_put(svc); 71 ip_vs_service_put(svc);
73 *verdict = NF_DROP; 72 *verdict = NF_DROP;
@@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
125 124
126 125
127static int 126static int
128udp_snat_handler(struct sk_buff *skb, 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
129 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 128 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
130{ 129{
131 struct udphdr *udph; 130 struct udphdr *udph;
132 unsigned int udphoff; 131 unsigned int udphoff = iph->len;
133 int oldlen; 132 int oldlen;
134 int payload_csum = 0; 133 int payload_csum = 0;
135 134
136#ifdef CONFIG_IP_VS_IPV6 135#ifdef CONFIG_IP_VS_IPV6
137 if (cp->af == AF_INET6) 136 if (cp->af == AF_INET6 && iph->fragoffs)
138 udphoff = sizeof(struct ipv6hdr); 137 return 1;
139 else
140#endif 138#endif
141 udphoff = ip_hdrlen(skb);
142 oldlen = skb->len - udphoff; 139 oldlen = skb->len - udphoff;
143 140
144 /* csum_check requires unshared skb */ 141 /* csum_check requires unshared skb */
@@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb,
210 207
211 208
212static int 209static int
213udp_dnat_handler(struct sk_buff *skb, 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
214 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 211 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
215{ 212{
216 struct udphdr *udph; 213 struct udphdr *udph;
217 unsigned int udphoff; 214 unsigned int udphoff = iph->len;
218 int oldlen; 215 int oldlen;
219 int payload_csum = 0; 216 int payload_csum = 0;
220 217
221#ifdef CONFIG_IP_VS_IPV6 218#ifdef CONFIG_IP_VS_IPV6
222 if (cp->af == AF_INET6) 219 if (cp->af == AF_INET6 && iph->fragoffs)
223 udphoff = sizeof(struct ipv6hdr); 220 return 1;
224 else
225#endif 221#endif
226 udphoff = ip_hdrlen(skb);
227 oldlen = skb->len - udphoff; 222 oldlen = skb->len - udphoff;
228 223
229 /* csum_check requires unshared skb */ 224 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 08dbdd5bc18f..d6bf20d6cdbe 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
159 svc->fwmark, msg); 159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", 162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol), 164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 165 &svc->addr.in6, ntohs(svc->port), msg);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 05126521743e..e33126994628 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
228 struct ip_vs_sh_bucket *tbl; 228 struct ip_vs_sh_bucket *tbl;
229 struct ip_vs_iphdr iph; 229 struct ip_vs_iphdr iph;
230 230
231 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 232
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 234
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index cc4c8095681a..12008b47e5ca 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
338 local = __ip_vs_is_local_route6(rt); 338 local = __ip_vs_is_local_route6(rt);
339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340 rt_mode)) { 340 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", 341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 342 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 343 dst_release(&rt->dst);
344 return NULL; 344 return NULL;
@@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 347 !((ort = (struct rt6_info *) skb_dst(skb)) &&
348 __ip_vs_is_local_route6(ort))) { 348 __ip_vs_is_local_route6(ort))) {
349 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " 349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
350 "requires NAT method, dest: %pI6\n", 350 "requires NAT method, dest: %pI6c\n",
351 &ipv6_hdr(skb)->daddr, daddr); 351 &ipv6_hdr(skb)->daddr, daddr);
352 dst_release(&rt->dst); 352 dst_release(&rt->dst);
353 return NULL; 353 return NULL;
@@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
357 IPV6_ADDR_LOOPBACK)) { 357 IPV6_ADDR_LOOPBACK)) {
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " 358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
359 "to non-local address, dest: %pI6\n", 359 "to non-local address, dest: %pI6c\n",
360 &ipv6_hdr(skb)->saddr, daddr); 360 &ipv6_hdr(skb)->saddr, daddr);
361 dst_release(&rt->dst); 361 dst_release(&rt->dst);
362 return NULL; 362 return NULL;
@@ -427,7 +427,7 @@ do { \
427 */ 427 */
428int 428int
429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp) 430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431{ 431{
432 /* we do not touch skb and do not need pskb ptr */ 432 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
@@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
441 */ 441 */
442int 442int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp) 444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445{ 445{
446 struct rtable *rt; /* Route to the other host */ 446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 447 struct iphdr *iph = ip_hdr(skb);
@@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
496#ifdef CONFIG_IP_VS_IPV6 496#ifdef CONFIG_IP_VS_IPV6
497int 497int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp) 499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
500{ 500{
501 struct rt6_info *rt; /* Route to the other host */ 501 struct rt6_info *rt; /* Route to the other host */
502 struct ipv6hdr *iph = ipv6_hdr(skb);
503 int mtu; 502 int mtu;
504 503
505 EnterFunction(10); 504 EnterFunction(10);
506 505
507 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
508 IP_VS_RT_MODE_NON_LOCAL))) 507 IP_VS_RT_MODE_NON_LOCAL);
508 if (!rt)
509 goto tx_error_icmp; 509 goto tx_error_icmp;
510 510
511 /* MTU checking */ 511 /* MTU checking */
@@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
516 516
517 skb->dev = net->loopback_dev; 517 skb->dev = net->loopback_dev;
518 } 518 }
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 dst_release(&rt->dst); 522 dst_release(&rt->dst);
521 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
522 goto tx_error; 524 goto tx_error;
@@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
559 */ 561 */
560int 562int
561ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 563ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
562 struct ip_vs_protocol *pp) 564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
563{ 565{
564 struct rtable *rt; /* Route to the other host */ 566 struct rtable *rt; /* Route to the other host */
565 int mtu; 567 int mtu;
@@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
629 goto tx_error_put; 631 goto tx_error_put;
630 632
631 /* mangle the packet */ 633 /* mangle the packet */
632 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
633 goto tx_error_put; 635 goto tx_error_put;
634 ip_hdr(skb)->daddr = cp->daddr.ip; 636 ip_hdr(skb)->daddr = cp->daddr.ip;
635 ip_send_check(ip_hdr(skb)); 637 ip_send_check(ip_hdr(skb));
@@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
677#ifdef CONFIG_IP_VS_IPV6 679#ifdef CONFIG_IP_VS_IPV6
678int 680int
679ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 681ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
680 struct ip_vs_protocol *pp) 682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
681{ 683{
682 struct rt6_info *rt; /* Route to the other host */ 684 struct rt6_info *rt; /* Route to the other host */
683 int mtu; 685 int mtu;
@@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 EnterFunction(10); 688 EnterFunction(10);
687 689
688 /* check if it is a connection of no-client-port */ 690 /* check if it is a connection of no-client-port */
689 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
690 __be16 _pt, *p; 692 __be16 _pt, *p;
691 p = skb_header_pointer(skb, sizeof(struct ipv6hdr), 693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
692 sizeof(_pt), &_pt);
693 if (p == NULL) 694 if (p == NULL)
694 goto tx_error; 695 goto tx_error;
695 ip_vs_conn_fill_cport(cp, *p); 696 ip_vs_conn_fill_cport(cp, *p);
@@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
737 738
738 skb->dev = net->loopback_dev; 739 skb->dev = net->loopback_dev;
739 } 740 }
740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
742 "ip_vs_nat_xmit_v6(): frag needed for"); 745 "ip_vs_nat_xmit_v6(): frag needed for");
743 goto tx_error_put; 746 goto tx_error_put;
@@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
751 goto tx_error_put; 754 goto tx_error_put;
752 755
753 /* mangle the packet */ 756 /* mangle the packet */
754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
755 goto tx_error; 758 goto tx_error;
756 ipv6_hdr(skb)->daddr = cp->daddr.in6; 759 ipv6_hdr(skb)->daddr = cp->daddr.in6;
757 760
@@ -812,7 +815,7 @@ tx_error_put:
812 */ 815 */
813int 816int
814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 817ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815 struct ip_vs_protocol *pp) 818 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
816{ 819{
817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 820 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818 struct rtable *rt; /* Route to the other host */ 821 struct rtable *rt; /* Route to the other host */
@@ -932,7 +935,7 @@ tx_error_put:
932#ifdef CONFIG_IP_VS_IPV6 935#ifdef CONFIG_IP_VS_IPV6
933int 936int
934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 937ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935 struct ip_vs_protocol *pp) 938 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
936{ 939{
937 struct rt6_info *rt; /* Route to the other host */ 940 struct rt6_info *rt; /* Route to the other host */
938 struct in6_addr saddr; /* Source for tunnel */ 941 struct in6_addr saddr; /* Source for tunnel */
@@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
972 975
973 skb->dev = net->loopback_dev; 976 skb->dev = net->loopback_dev;
974 } 977 }
975 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
976 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
977 goto tx_error_put; 982 goto tx_error_put;
978 } 983 }
@@ -1053,7 +1058,7 @@ tx_error_put:
1053 */ 1058 */
1054int 1059int
1055ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1060ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056 struct ip_vs_protocol *pp) 1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1057{ 1062{
1058 struct rtable *rt; /* Route to the other host */ 1063 struct rtable *rt; /* Route to the other host */
1059 struct iphdr *iph = ip_hdr(skb); 1064 struct iphdr *iph = ip_hdr(skb);
@@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1115#ifdef CONFIG_IP_VS_IPV6 1120#ifdef CONFIG_IP_VS_IPV6
1116int 1121int
1117ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1122ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118 struct ip_vs_protocol *pp) 1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1119{ 1124{
1120 struct rt6_info *rt; /* Route to the other host */ 1125 struct rt6_info *rt; /* Route to the other host */
1121 int mtu; 1126 int mtu;
@@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1139 1144
1140 skb->dev = net->loopback_dev; 1145 skb->dev = net->loopback_dev;
1141 } 1146 }
1142 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143 dst_release(&rt->dst); 1150 dst_release(&rt->dst);
1144 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145 goto tx_error; 1152 goto tx_error;
@@ -1183,7 +1190,8 @@ tx_error:
1183 */ 1190 */
1184int 1191int
1185ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1192ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1193 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1194 struct ip_vs_iphdr *iph)
1187{ 1195{
1188 struct rtable *rt; /* Route to the other host */ 1196 struct rtable *rt; /* Route to the other host */
1189 int mtu; 1197 int mtu;
@@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1198 translate address/port back */ 1206 translate address/port back */
1199 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1207 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200 if (cp->packet_xmit) 1208 if (cp->packet_xmit)
1201 rc = cp->packet_xmit(skb, cp, pp); 1209 rc = cp->packet_xmit(skb, cp, pp, iph);
1202 else 1210 else
1203 rc = NF_ACCEPT; 1211 rc = NF_ACCEPT;
1204 /* do not touch skb anymore */ 1212 /* do not touch skb anymore */
@@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1304#ifdef CONFIG_IP_VS_IPV6 1312#ifdef CONFIG_IP_VS_IPV6
1305int 1313int
1306ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1314ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph)
1308{ 1317{
1309 struct rt6_info *rt; /* Route to the other host */ 1318 struct rt6_info *rt; /* Route to the other host */
1310 int mtu; 1319 int mtu;
@@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1319 translate address/port back */ 1328 translate address/port back */
1320 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321 if (cp->packet_xmit) 1330 if (cp->packet_xmit)
1322 rc = cp->packet_xmit(skb, cp, pp); 1331 rc = cp->packet_xmit(skb, cp, pp, iph);
1323 else 1332 else
1324 rc = NF_ACCEPT; 1333 rc = NF_ACCEPT;
1325 /* do not touch skb anymore */ 1334 /* do not touch skb anymore */
@@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1375 1384
1376 skb->dev = net->loopback_dev; 1385 skb->dev = net->loopback_dev;
1377 } 1386 }
1378 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380 goto tx_error_put; 1391 goto tx_error_put;
1381 } 1392 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index bb10b0717f1b..8d47c3780fda 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
67 goto out; 67 goto out;
68 } 68 }
69 69
70 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); 70 ip_vs_fill_iph_skb(family, skb, &iph);
71 71
72 if (data->bitmask & XT_IPVS_PROTO) 72 if (data->bitmask & XT_IPVS_PROTO)
73 if ((iph.protocol == data->l4proto) ^ 73 if ((iph.protocol == data->l4proto) ^
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;