aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2012-10-22 06:30:41 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2012-10-22 06:30:41 -0400
commitbcc58c4d9141160d6448e4589acbd46e5c647518 (patch)
tree28f189f32feef5ac28b1a59268d42c415f63a2b5 /net/netfilter
parent7fe0b14b725d6d09a1d9e1409bd465cb88b587f9 (diff)
parent92eec78d25aee6bbc9bd295f51c022ddfa80cdd9 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next
Pull updates from Jesper Dangaard Brouer for IPVS mostly targeted to improve IPv6 support (7 commits): ipvs: Trivial changes, use compressed IPv6 address in output ipvs: IPv6 extend ICMPv6 handling for future types ipvs: Use config macro IS_ENABLED() ipvs: Fix faulty IPv6 extension header handling in IPVS ipvs: Complete IPv6 fragment handling for IPVS ipvs: API change to avoid rescan of IPv6 exthdr ipvs: SIP fragment handling
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipvs/Kconfig7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c404
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c41
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c73
-rw-r--r--net/netfilter/xt_ipvs.c4
16 files changed, 359 insertions, 310 deletions
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 8b2cffdfdd9..0c3b1670b0d 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -28,12 +28,11 @@ if IP_VS
28config IP_VS_IPV6 28config IP_VS_IPV6
29 bool "IPv6 support for IPVS" 29 bool "IPv6 support for IPVS"
30 depends on IPV6 = y || IP_VS = IPV6 30 depends on IPV6 = y || IP_VS = IPV6
31 select IP6_NF_IPTABLES
31 ---help--- 32 ---help---
32 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 33 Add IPv6 support to IPVS.
33 34
34 See http://www.mindbasket.com/ipvs for more information. 35 Say Y if unsure.
35
36 Say N if unsure.
37 36
38config IP_VS_DEBUG 37config IP_VS_DEBUG
39 bool "IP virtual server debugging" 38 bool "IP virtual server debugging"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 1548df9a752..30e764ad021 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
308static int 308static int
309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, 309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
310 const struct ip_vs_iphdr *iph, 310 const struct ip_vs_iphdr *iph,
311 unsigned int proto_off, int inverse, 311 int inverse, struct ip_vs_conn_param *p)
312 struct ip_vs_conn_param *p)
313{ 312{
314 __be16 _ports[2], *pptr; 313 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb); 314 struct net *net = skb_net(skb);
316 315
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
318 if (pptr == NULL) 317 if (pptr == NULL)
319 return 1; 318 return 1;
320 319
@@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
329 328
330struct ip_vs_conn * 329struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 const struct ip_vs_iphdr *iph, 331 const struct ip_vs_iphdr *iph, int inverse)
333 unsigned int proto_off, int inverse)
334{ 332{
335 struct ip_vs_conn_param p; 333 struct ip_vs_conn_param p;
336 334
337 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 335 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
338 return NULL; 336 return NULL;
339 337
340 return ip_vs_conn_in_get(&p); 338 return ip_vs_conn_in_get(&p);
@@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
432 430
433struct ip_vs_conn * 431struct ip_vs_conn *
434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 432ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
435 const struct ip_vs_iphdr *iph, 433 const struct ip_vs_iphdr *iph, int inverse)
436 unsigned int proto_off, int inverse)
437{ 434{
438 struct ip_vs_conn_param p; 435 struct ip_vs_conn_param p;
439 436
440 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
441 return NULL; 438 return NULL;
442 439
443 return ip_vs_conn_out_get(&p); 440 return ip_vs_conn_out_get(&p);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58918e20f9d..fb45640dc1f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
222 */ 222 */
223static struct ip_vs_conn * 223static struct ip_vs_conn *
224ip_vs_sched_persist(struct ip_vs_service *svc, 224ip_vs_sched_persist(struct ip_vs_service *svc,
225 struct sk_buff *skb, 225 struct sk_buff *skb, __be16 src_port, __be16 dst_port,
226 __be16 src_port, __be16 dst_port, int *ignored) 226 int *ignored, struct ip_vs_iphdr *iph)
227{ 227{
228 struct ip_vs_conn *cp = NULL; 228 struct ip_vs_conn *cp = NULL;
229 struct ip_vs_iphdr iph;
230 struct ip_vs_dest *dest; 229 struct ip_vs_dest *dest;
231 struct ip_vs_conn *ct; 230 struct ip_vs_conn *ct;
232 __be16 dport = 0; /* destination port to forward */ 231 __be16 dport = 0; /* destination port to forward */
@@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
236 union nf_inet_addr snet; /* source network of the client, 235 union nf_inet_addr snet; /* source network of the client,
237 after masking */ 236 after masking */
238 237
239 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
240
241 /* Mask saddr with the netmask to adjust template granularity */ 238 /* Mask saddr with the netmask to adjust template granularity */
242#ifdef CONFIG_IP_VS_IPV6 239#ifdef CONFIG_IP_VS_IPV6
243 if (svc->af == AF_INET6) 240 if (svc->af == AF_INET6)
244 ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); 241 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
245 else 242 else
246#endif 243#endif
247 snet.ip = iph.saddr.ip & svc->netmask; 244 snet.ip = iph->saddr.ip & svc->netmask;
248 245
249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 246 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
250 "mnet %s\n", 247 "mnet %s\n",
251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), 248 IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), 249 IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
253 IP_VS_DBG_ADDR(svc->af, &snet)); 250 IP_VS_DBG_ADDR(svc->af, &snet));
254 251
255 /* 252 /*
@@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
266 * is created for other persistent services. 263 * is created for other persistent services.
267 */ 264 */
268 { 265 {
269 int protocol = iph.protocol; 266 int protocol = iph->protocol;
270 const union nf_inet_addr *vaddr = &iph.daddr; 267 const union nf_inet_addr *vaddr = &iph->daddr;
271 __be16 vport = 0; 268 __be16 vport = 0;
272 269
273 if (dst_port == svc->port) { 270 if (dst_port == svc->port) {
@@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
342 dport = dest->port; 339 dport = dest->port;
343 340
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 341 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)? 342 && iph->protocol == IPPROTO_UDP) ?
346 IP_VS_CONN_F_ONE_PACKET : 0; 343 IP_VS_CONN_F_ONE_PACKET : 0;
347 344
348 /* 345 /*
349 * Create a new connection according to the template 346 * Create a new connection according to the template
350 */ 347 */
351 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, 348 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
352 src_port, &iph.daddr, dst_port, &param); 349 src_port, &iph->daddr, dst_port, &param);
353 350
354 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); 351 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
355 if (cp == NULL) { 352 if (cp == NULL) {
@@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
392 */ 389 */
393struct ip_vs_conn * 390struct ip_vs_conn *
394ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 391ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
395 struct ip_vs_proto_data *pd, int *ignored) 392 struct ip_vs_proto_data *pd, int *ignored,
393 struct ip_vs_iphdr *iph)
396{ 394{
397 struct ip_vs_protocol *pp = pd->pp; 395 struct ip_vs_protocol *pp = pd->pp;
398 struct ip_vs_conn *cp = NULL; 396 struct ip_vs_conn *cp = NULL;
399 struct ip_vs_iphdr iph;
400 struct ip_vs_dest *dest; 397 struct ip_vs_dest *dest;
401 __be16 _ports[2], *pptr; 398 __be16 _ports[2], *pptr;
402 unsigned int flags; 399 unsigned int flags;
403 400
404 *ignored = 1; 401 *ignored = 1;
405 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 402 /*
406 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 403 * IPv6 frags, only the first hit here.
404 */
405 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
407 if (pptr == NULL) 406 if (pptr == NULL)
408 return NULL; 407 return NULL;
409 408
@@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
423 * Do not schedule replies from local real server. 422 * Do not schedule replies from local real server.
424 */ 423 */
425 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 424 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
426 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { 425 (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
427 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 426 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
428 "Not scheduling reply for existing connection"); 427 "Not scheduling reply for existing connection");
429 __ip_vs_conn_put(cp); 428 __ip_vs_conn_put(cp);
@@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
434 * Persistent service 433 * Persistent service
435 */ 434 */
436 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 435 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
437 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); 436 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
437 iph);
438 438
439 *ignored = 0; 439 *ignored = 0;
440 440
@@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
456 } 456 }
457 457
458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
459 && iph.protocol == IPPROTO_UDP)? 459 && iph->protocol == IPPROTO_UDP) ?
460 IP_VS_CONN_F_ONE_PACKET : 0; 460 IP_VS_CONN_F_ONE_PACKET : 0;
461 461
462 /* 462 /*
@@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
465 { 465 {
466 struct ip_vs_conn_param p; 466 struct ip_vs_conn_param p;
467 467
468 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 468 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
469 &iph.saddr, pptr[0], &iph.daddr, pptr[1], 469 &iph->saddr, pptr[0], &iph->daddr,
470 &p); 470 pptr[1], &p);
471 cp = ip_vs_conn_new(&p, &dest->addr, 471 cp = ip_vs_conn_new(&p, &dest->addr,
472 dest->port ? dest->port : pptr[1], 472 dest->port ? dest->port : pptr[1],
473 flags, dest, skb->mark); 473 flags, dest, skb->mark);
@@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
496 * no destination is available for a new connection. 496 * no destination is available for a new connection.
497 */ 497 */
498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
499 struct ip_vs_proto_data *pd) 499 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
500{ 500{
501 __be16 _ports[2], *pptr; 501 __be16 _ports[2], *pptr;
502 struct ip_vs_iphdr iph;
503#ifdef CONFIG_SYSCTL 502#ifdef CONFIG_SYSCTL
504 struct net *net; 503 struct net *net;
505 struct netns_ipvs *ipvs; 504 struct netns_ipvs *ipvs;
506 int unicast; 505 int unicast;
507#endif 506#endif
508 507
509 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
510
511 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
512 if (pptr == NULL) { 509 if (pptr == NULL) {
513 ip_vs_service_put(svc); 510 ip_vs_service_put(svc);
514 return NF_DROP; 511 return NF_DROP;
@@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
519 516
520#ifdef CONFIG_IP_VS_IPV6 517#ifdef CONFIG_IP_VS_IPV6
521 if (svc->af == AF_INET6) 518 if (svc->af == AF_INET6)
522 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
523 else 520 else
524#endif 521#endif
525 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); 522 unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
526 523
527 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 /* if it is fwmark-based service, the cache_bypass sysctl is up
528 and the destination is a non-local unicast, then create 525 and the destination is a non-local unicast, then create
@@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
532 int ret; 529 int ret;
533 struct ip_vs_conn *cp; 530 struct ip_vs_conn *cp;
534 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 531 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
535 iph.protocol == IPPROTO_UDP)? 532 iph->protocol == IPPROTO_UDP) ?
536 IP_VS_CONN_F_ONE_PACKET : 0; 533 IP_VS_CONN_F_ONE_PACKET : 0;
537 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
538 535
@@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
542 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
543 { 540 {
544 struct ip_vs_conn_param p; 541 struct ip_vs_conn_param p;
545 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 542 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
546 &iph.saddr, pptr[0], 543 &iph->saddr, pptr[0],
547 &iph.daddr, pptr[1], &p); 544 &iph->daddr, pptr[1], &p);
548 cp = ip_vs_conn_new(&p, &daddr, 0, 545 cp = ip_vs_conn_new(&p, &daddr, 0,
549 IP_VS_CONN_F_BYPASS | flags, 546 IP_VS_CONN_F_BYPASS | flags,
550 NULL, skb->mark); 547 NULL, skb->mark);
@@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
559 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 556 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
560 557
561 /* transmit the first SYN packet */ 558 /* transmit the first SYN packet */
562 ret = cp->packet_xmit(skb, cp, pd->pp); 559 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
563 /* do not touch skb anymore */ 560 /* do not touch skb anymore */
564 561
565 atomic_inc(&cp->in_pkts); 562 atomic_inc(&cp->in_pkts);
@@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
654 return err; 651 return err;
655} 652}
656 653
657#ifdef CONFIG_IP_VS_IPV6
658static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
659{
660 /* TODO IPv6: Find out what to do here for IPv6 */
661 return 0;
662}
663#endif
664
665static int ip_vs_route_me_harder(int af, struct sk_buff *skb) 654static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
666{ 655{
667#ifdef CONFIG_IP_VS_IPV6 656#ifdef CONFIG_IP_VS_IPV6
@@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
732 struct ip_vs_conn *cp, int inout) 721 struct ip_vs_conn *cp, int inout)
733{ 722{
734 struct ipv6hdr *iph = ipv6_hdr(skb); 723 struct ipv6hdr *iph = ipv6_hdr(skb);
735 unsigned int icmp_offset = sizeof(struct ipv6hdr); 724 unsigned int icmp_offset = 0;
736 struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + 725 unsigned int offs = 0; /* header offset*/
737 icmp_offset); 726 int protocol;
738 struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); 727 struct icmp6hdr *icmph;
728 struct ipv6hdr *ciph;
729 unsigned short fragoffs;
730
731 ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
732 icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
733 offs = icmp_offset + sizeof(struct icmp6hdr);
734 ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
735
736 protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
739 737
740 if (inout) { 738 if (inout) {
741 iph->saddr = cp->vaddr.in6; 739 iph->saddr = cp->vaddr.in6;
@@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
746 } 744 }
747 745
748 /* the TCP/UDP/SCTP port */ 746 /* the TCP/UDP/SCTP port */
749 if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || 747 if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
750 IPPROTO_SCTP == ciph->nexthdr) { 748 IPPROTO_SCTP == protocol)) {
751 __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); 749 __be16 *ports = (void *)(skb_network_header(skb) + offs);
752 750
751 IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
752 ntohs(inout ? ports[1] : ports[0]),
753 ntohs(inout ? cp->vport : cp->dport));
753 if (inout) 754 if (inout)
754 ports[1] = cp->vport; 755 ports[1] = cp->vport;
755 else 756 else
@@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
898 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, 899 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
899 "Checking outgoing ICMP for"); 900 "Checking outgoing ICMP for");
900 901
901 offset += cih->ihl * 4; 902 ip_vs_fill_ip4hdr(cih, &ciph);
902 903 ciph.len += offset;
903 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
904 /* The embedded headers contain source and dest in reverse order */ 904 /* The embedded headers contain source and dest in reverse order */
905 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 905 cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
906 if (!cp) 906 if (!cp)
907 return NF_ACCEPT; 907 return NF_ACCEPT;
908 908
909 snet.ip = iph->saddr; 909 snet.ip = iph->saddr;
910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, 910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
911 pp, offset, ihl); 911 pp, ciph.len, ihl);
912} 912}
913 913
914#ifdef CONFIG_IP_VS_IPV6 914#ifdef CONFIG_IP_VS_IPV6
915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, 915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
916 unsigned int hooknum) 916 unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
917{ 917{
918 struct ipv6hdr *iph;
919 struct icmp6hdr _icmph, *ic; 918 struct icmp6hdr _icmph, *ic;
920 struct ipv6hdr _ciph, *cih; /* The ip header contained 919 struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
921 within the ICMP */ 920 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
922 struct ip_vs_iphdr ciph;
923 struct ip_vs_conn *cp; 921 struct ip_vs_conn *cp;
924 struct ip_vs_protocol *pp; 922 struct ip_vs_protocol *pp;
925 unsigned int offset;
926 union nf_inet_addr snet; 923 union nf_inet_addr snet;
924 unsigned int writable;
927 925
928 *related = 1; 926 *related = 1;
929 927 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
930 /* reassemble IP fragments */
931 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
932 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
933 return NF_STOLEN;
934 }
935
936 iph = ipv6_hdr(skb);
937 offset = sizeof(struct ipv6hdr);
938 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
939 if (ic == NULL) 928 if (ic == NULL)
940 return NF_DROP; 929 return NF_DROP;
941 930
942 IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
943 ic->icmp6_type, ntohs(icmpv6_id(ic)),
944 &iph->saddr, &iph->daddr);
945
946 /* 931 /*
947 * Work through seeing if this is for us. 932 * Work through seeing if this is for us.
948 * These checks are supposed to be in an order that means easy 933 * These checks are supposed to be in an order that means easy
@@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
950 * this means that some packets will manage to get a long way 935 * this means that some packets will manage to get a long way
951 * down this stack and then be rejected, but that's life. 936 * down this stack and then be rejected, but that's life.
952 */ 937 */
953 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 938 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
954 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
955 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
956 *related = 0; 939 *related = 0;
957 return NF_ACCEPT; 940 return NF_ACCEPT;
958 } 941 }
942 /* Fragment header that is before ICMP header tells us that:
943 * it's not an error message since they can't be fragmented.
944 */
945 if (ipvsh->flags & IP6T_FH_F_FRAG)
946 return NF_DROP;
947
948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
949 ic->icmp6_type, ntohs(icmpv6_id(ic)),
950 &ipvsh->saddr, &ipvsh->daddr);
959 951
960 /* Now find the contained IP header */ 952 /* Now find the contained IP header */
961 offset += sizeof(_icmph); 953 ciph.len = ipvsh->len + sizeof(_icmph);
962 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 954 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
963 if (cih == NULL) 955 if (ip6h == NULL)
964 return NF_ACCEPT; /* The packet looks wrong, ignore */ 956 return NF_ACCEPT; /* The packet looks wrong, ignore */
965 957 ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
966 pp = ip_vs_proto_get(cih->nexthdr); 958 ciph.daddr.in6 = ip6h->daddr;
959 /* skip possible IPv6 exthdrs of contained IPv6 packet */
960 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
961 if (ciph.protocol < 0)
962 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
963
964 pp = ip_vs_proto_get(ciph.protocol);
967 if (!pp) 965 if (!pp)
968 return NF_ACCEPT; 966 return NF_ACCEPT;
969 967
970 /* Is the embedded protocol header present? */
971 /* TODO: we don't support fragmentation at the moment anyways */
972 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
973 return NF_ACCEPT;
974
975 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
976 "Checking outgoing ICMPv6 for");
977
978 offset += sizeof(struct ipv6hdr);
979
980 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
981 /* The embedded headers contain source and dest in reverse order */ 968 /* The embedded headers contain source and dest in reverse order */
982 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 969 cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
983 if (!cp) 970 if (!cp)
984 return NF_ACCEPT; 971 return NF_ACCEPT;
985 972
986 snet.in6 = iph->saddr; 973 snet.in6 = ciph.saddr.in6;
987 return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, 974 writable = ciph.len;
988 pp, offset, sizeof(struct ipv6hdr)); 975 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
976 pp, writable, sizeof(struct ipv6hdr));
989} 977}
990#endif 978#endif
991 979
@@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
1018 */ 1006 */
1019static unsigned int 1007static unsigned int
1020handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 1008handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1021 struct ip_vs_conn *cp, int ihl) 1009 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
1022{ 1010{
1023 struct ip_vs_protocol *pp = pd->pp; 1011 struct ip_vs_protocol *pp = pd->pp;
1024 1012
1025 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1013 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
1026 1014
1027 if (!skb_make_writable(skb, ihl)) 1015 if (!skb_make_writable(skb, iph->len))
1028 goto drop; 1016 goto drop;
1029 1017
1030 /* mangle the packet */ 1018 /* mangle the packet */
1031 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) 1019 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
1032 goto drop; 1020 goto drop;
1033 1021
1034#ifdef CONFIG_IP_VS_IPV6 1022#ifdef CONFIG_IP_VS_IPV6
@@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1115 if (!net_ipvs(net)->enable) 1103 if (!net_ipvs(net)->enable)
1116 return NF_ACCEPT; 1104 return NF_ACCEPT;
1117 1105
1118 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1106 ip_vs_fill_iph_skb(af, skb, &iph);
1119#ifdef CONFIG_IP_VS_IPV6 1107#ifdef CONFIG_IP_VS_IPV6
1120 if (af == AF_INET6) { 1108 if (af == AF_INET6) {
1109 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1110 struct sk_buff *reasm = skb_nfct_reasm(skb);
1111 /* Save fw mark for coming frags */
1112 reasm->ipvs_property = 1;
1113 reasm->mark = skb->mark;
1114 }
1121 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1115 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1122 int related; 1116 int related;
1123 int verdict = ip_vs_out_icmp_v6(skb, &related, 1117 int verdict = ip_vs_out_icmp_v6(skb, &related,
1124 hooknum); 1118 hooknum, &iph);
1125 1119
1126 if (related) 1120 if (related)
1127 return verdict; 1121 return verdict;
1128 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1129 } 1122 }
1130 } else 1123 } else
1131#endif 1124#endif
@@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1135 1128
1136 if (related) 1129 if (related)
1137 return verdict; 1130 return verdict;
1138 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1139 } 1131 }
1140 1132
1141 pd = ip_vs_proto_data_get(net, iph.protocol); 1133 pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1145 1137
1146 /* reassemble IP fragments */ 1138 /* reassemble IP fragments */
1147#ifdef CONFIG_IP_VS_IPV6 1139#ifdef CONFIG_IP_VS_IPV6
1148 if (af == AF_INET6) { 1140 if (af == AF_INET)
1149 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1150 if (ip_vs_gather_frags_v6(skb,
1151 ip_vs_defrag_user(hooknum)))
1152 return NF_STOLEN;
1153 }
1154
1155 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1156 } else
1157#endif 1141#endif
1158 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { 1142 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
1159 if (ip_vs_gather_frags(skb, 1143 if (ip_vs_gather_frags(skb,
1160 ip_vs_defrag_user(hooknum))) 1144 ip_vs_defrag_user(hooknum)))
1161 return NF_STOLEN; 1145 return NF_STOLEN;
1162 1146
1163 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1147 ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
1164 } 1148 }
1165 1149
1166 /* 1150 /*
1167 * Check if the packet belongs to an existing entry 1151 * Check if the packet belongs to an existing entry
1168 */ 1152 */
1169 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1153 cp = pp->conn_out_get(af, skb, &iph, 0);
1170 1154
1171 if (likely(cp)) 1155 if (likely(cp))
1172 return handle_response(af, skb, pd, cp, iph.len); 1156 return handle_response(af, skb, pd, cp, &iph);
1173 if (sysctl_nat_icmp_send(net) && 1157 if (sysctl_nat_icmp_send(net) &&
1174 (pp->protocol == IPPROTO_TCP || 1158 (pp->protocol == IPPROTO_TCP ||
1175 pp->protocol == IPPROTO_UDP || 1159 pp->protocol == IPPROTO_UDP ||
1176 pp->protocol == IPPROTO_SCTP)) { 1160 pp->protocol == IPPROTO_SCTP)) {
1177 __be16 _ports[2], *pptr; 1161 __be16 _ports[2], *pptr;
1178 1162
1179 pptr = skb_header_pointer(skb, iph.len, 1163 pptr = frag_safe_skb_hp(skb, iph.len,
1180 sizeof(_ports), _ports); 1164 sizeof(_ports), _ports, &iph);
1181 if (pptr == NULL) 1165 if (pptr == NULL)
1182 return NF_ACCEPT; /* Not for me */ 1166 return NF_ACCEPT; /* Not for me */
1183 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1167 if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1375 "Checking incoming ICMP for"); 1359 "Checking incoming ICMP for");
1376 1360
1377 offset2 = offset; 1361 offset2 = offset;
1378 offset += cih->ihl * 4; 1362 ip_vs_fill_ip4hdr(cih, &ciph);
1379 1363 ciph.len += offset;
1380 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1364 offset = ciph.len;
1381 /* The embedded headers contain source and dest in reverse order. 1365 /* The embedded headers contain source and dest in reverse order.
1382 * For IPIP this is error for request, not for reply. 1366 * For IPIP this is error for request, not for reply.
1383 */ 1367 */
1384 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); 1368 cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
1385 if (!cp) 1369 if (!cp)
1386 return NF_ACCEPT; 1370 return NF_ACCEPT;
1387 1371
@@ -1450,7 +1434,7 @@ ignore_ipip:
1450 ip_vs_in_stats(cp, skb); 1434 ip_vs_in_stats(cp, skb);
1451 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1435 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1452 offset += 2 * sizeof(__u16); 1436 offset += 2 * sizeof(__u16);
1453 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); 1437 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1454 1438
1455out: 1439out:
1456 __ip_vs_conn_put(cp); 1440 __ip_vs_conn_put(cp);
@@ -1459,38 +1443,24 @@ out:
1459} 1443}
1460 1444
1461#ifdef CONFIG_IP_VS_IPV6 1445#ifdef CONFIG_IP_VS_IPV6
1462static int 1446static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1463ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1447 unsigned int hooknum, struct ip_vs_iphdr *iph)
1464{ 1448{
1465 struct net *net = NULL; 1449 struct net *net = NULL;
1466 struct ipv6hdr *iph; 1450 struct ipv6hdr _ip6h, *ip6h;
1467 struct icmp6hdr _icmph, *ic; 1451 struct icmp6hdr _icmph, *ic;
1468 struct ipv6hdr _ciph, *cih; /* The ip header contained 1452 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
1469 within the ICMP */
1470 struct ip_vs_iphdr ciph;
1471 struct ip_vs_conn *cp; 1453 struct ip_vs_conn *cp;
1472 struct ip_vs_protocol *pp; 1454 struct ip_vs_protocol *pp;
1473 struct ip_vs_proto_data *pd; 1455 struct ip_vs_proto_data *pd;
1474 unsigned int offset, verdict; 1456 unsigned int offs_ciph, writable, verdict;
1475 1457
1476 *related = 1; 1458 *related = 1;
1477 1459
1478 /* reassemble IP fragments */ 1460 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
1479 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1480 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1481 return NF_STOLEN;
1482 }
1483
1484 iph = ipv6_hdr(skb);
1485 offset = sizeof(struct ipv6hdr);
1486 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
1487 if (ic == NULL) 1461 if (ic == NULL)
1488 return NF_DROP; 1462 return NF_DROP;
1489 1463
1490 IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
1491 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1492 &iph->saddr, &iph->daddr);
1493
1494 /* 1464 /*
1495 * Work through seeing if this is for us. 1465 * Work through seeing if this is for us.
1496 * These checks are supposed to be in an order that means easy 1466 * These checks are supposed to be in an order that means easy
@@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1498 * this means that some packets will manage to get a long way 1468 * this means that some packets will manage to get a long way
1499 * down this stack and then be rejected, but that's life. 1469 * down this stack and then be rejected, but that's life.
1500 */ 1470 */
1501 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 1471 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
1502 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
1503 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
1504 *related = 0; 1472 *related = 0;
1505 return NF_ACCEPT; 1473 return NF_ACCEPT;
1506 } 1474 }
1475 /* Fragment header that is before ICMP header tells us that:
1476 * it's not an error message since they can't be fragmented.
1477 */
1478 if (iph->flags & IP6T_FH_F_FRAG)
1479 return NF_DROP;
1480
1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
1482 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1483 &iph->saddr, &iph->daddr);
1507 1484
1508 /* Now find the contained IP header */ 1485 /* Now find the contained IP header */
1509 offset += sizeof(_icmph); 1486 ciph.len = iph->len + sizeof(_icmph);
1510 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 1487 offs_ciph = ciph.len; /* Save ip header offset */
1511 if (cih == NULL) 1488 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
1489 if (ip6h == NULL)
1512 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1490 return NF_ACCEPT; /* The packet looks wrong, ignore */
1491 ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
1492 ciph.daddr.in6 = ip6h->daddr;
1493 /* skip possible IPv6 exthdrs of contained IPv6 packet */
1494 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
1495 if (ciph.protocol < 0)
1496 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
1513 1497
1514 net = skb_net(skb); 1498 net = skb_net(skb);
1515 pd = ip_vs_proto_data_get(net, cih->nexthdr); 1499 pd = ip_vs_proto_data_get(net, ciph.protocol);
1516 if (!pd) 1500 if (!pd)
1517 return NF_ACCEPT; 1501 return NF_ACCEPT;
1518 pp = pd->pp; 1502 pp = pd->pp;
1519 1503
1520 /* Is the embedded protocol header present? */ 1504 /* Cannot handle fragmented embedded protocol */
1521 /* TODO: we don't support fragmentation at the moment anyways */ 1505 if (ciph.fragoffs)
1522 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1523 return NF_ACCEPT; 1506 return NF_ACCEPT;
1524 1507
1525 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, 1508 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
1526 "Checking incoming ICMPv6 for"); 1509 "Checking incoming ICMPv6 for");
1527 1510
1528 offset += sizeof(struct ipv6hdr); 1511 /* The embedded headers contain source and dest in reverse order
1512 * if not from localhost
1513 */
1514 cp = pp->conn_in_get(AF_INET6, skb, &ciph,
1515 (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
1529 1516
1530 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1531 /* The embedded headers contain source and dest in reverse order */
1532 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1533 if (!cp) 1517 if (!cp)
1534 return NF_ACCEPT; 1518 return NF_ACCEPT;
1519 /* VS/TUN, VS/DR and LOCALNODE just let it go */
1520 if ((hooknum == NF_INET_LOCAL_OUT) &&
1521 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
1522 __ip_vs_conn_put(cp);
1523 return NF_ACCEPT;
1524 }
1535 1525
1536 /* do the statistics and put it back */ 1526 /* do the statistics and put it back */
1537 ip_vs_in_stats(cp, skb); 1527 ip_vs_in_stats(cp, skb);
1538 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1528
1539 IPPROTO_SCTP == cih->nexthdr) 1529 /* Need to mangle contained IPv6 header in ICMPv6 packet */
1540 offset += 2 * sizeof(__u16); 1530 writable = ciph.len;
1541 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); 1531 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
1532 IPPROTO_SCTP == ciph.protocol)
1533 writable += 2 * sizeof(__u16); /* Also mangle ports */
1534
1535 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
1542 1536
1543 __ip_vs_conn_put(cp); 1537 __ip_vs_conn_put(cp);
1544 1538
@@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1574 if (unlikely((skb->pkt_type != PACKET_HOST && 1568 if (unlikely((skb->pkt_type != PACKET_HOST &&
1575 hooknum != NF_INET_LOCAL_OUT) || 1569 hooknum != NF_INET_LOCAL_OUT) ||
1576 !skb_dst(skb))) { 1570 !skb_dst(skb))) {
1577 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1571 ip_vs_fill_iph_skb(af, skb, &iph);
1578 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" 1572 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1579 " ignored in hook %u\n", 1573 " ignored in hook %u\n",
1580 skb->pkt_type, iph.protocol, 1574 skb->pkt_type, iph.protocol,
@@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1586 if (!net_ipvs(net)->enable) 1580 if (!net_ipvs(net)->enable)
1587 return NF_ACCEPT; 1581 return NF_ACCEPT;
1588 1582
1589 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1583 ip_vs_fill_iph_skb(af, skb, &iph);
1590 1584
1591 /* Bad... Do not break raw sockets */ 1585 /* Bad... Do not break raw sockets */
1592 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1586 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1600 1594
1601#ifdef CONFIG_IP_VS_IPV6 1595#ifdef CONFIG_IP_VS_IPV6
1602 if (af == AF_INET6) { 1596 if (af == AF_INET6) {
1597 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1598 struct sk_buff *reasm = skb_nfct_reasm(skb);
1599 /* Save fw mark for coming frags. */
1600 reasm->ipvs_property = 1;
1601 reasm->mark = skb->mark;
1602 }
1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1604 int related; 1604 int related;
1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
1606 &iph);
1606 1607
1607 if (related) 1608 if (related)
1608 return verdict; 1609 return verdict;
1609 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1610 } 1610 }
1611 } else 1611 } else
1612#endif 1612#endif
@@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1616 1616
1617 if (related) 1617 if (related)
1618 return verdict; 1618 return verdict;
1619 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1620 } 1619 }
1621 1620
1622 /* Protocol supported? */ 1621 /* Protocol supported? */
@@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1627 /* 1626 /*
1628 * Check if the packet belongs to an existing connection entry 1627 * Check if the packet belongs to an existing connection entry
1629 */ 1628 */
1630 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); 1629 cp = pp->conn_in_get(af, skb, &iph, 0);
1631 1630 if (unlikely(!cp) && !iph.fragoffs) {
1632 if (unlikely(!cp)) { 1631 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1632 * replayed fragment zero will already have created the cp
1633 */
1633 int v; 1634 int v;
1634 1635
1635 if (!pp->conn_schedule(af, skb, pd, &v, &cp)) 1636 /* Schedule and create new connection entry into &cp */
1637 if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
1636 return v; 1638 return v;
1637 } 1639 }
1638 1640
@@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1640 /* sorry, all this trouble for a no-hit :) */ 1642 /* sorry, all this trouble for a no-hit :) */
1641 IP_VS_DBG_PKT(12, af, pp, skb, 0, 1643 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1642 "ip_vs_in: packet continues traversal as normal"); 1644 "ip_vs_in: packet continues traversal as normal");
1645 if (iph.fragoffs && !skb_nfct_reasm(skb)) {
1646 /* Fragment that couldn't be mapped to a conn entry
1647 * and don't have any pointer to a reasm skb
1648 * is missing module nf_defrag_ipv6
1649 */
1650 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1651 IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
1652 }
1643 return NF_ACCEPT; 1653 return NF_ACCEPT;
1644 } 1654 }
1645 1655
@@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1662 ip_vs_in_stats(cp, skb); 1672 ip_vs_in_stats(cp, skb);
1663 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 1673 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1664 if (cp->packet_xmit) 1674 if (cp->packet_xmit)
1665 ret = cp->packet_xmit(skb, cp, pp); 1675 ret = cp->packet_xmit(skb, cp, pp, &iph);
1666 /* do not touch skb anymore */ 1676 /* do not touch skb anymore */
1667 else { 1677 else {
1668 IP_VS_DBG_RL("warning: packet_xmit is null"); 1678 IP_VS_DBG_RL("warning: packet_xmit is null");
@@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1724#ifdef CONFIG_IP_VS_IPV6 1734#ifdef CONFIG_IP_VS_IPV6
1725 1735
1726/* 1736/*
1737 * AF_INET6 fragment handling
1738 * Copy info from first fragment, to the rest of them.
1739 */
1740static unsigned int
1741ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
1742 const struct net_device *in,
1743 const struct net_device *out,
1744 int (*okfn)(struct sk_buff *))
1745{
1746 struct sk_buff *reasm = skb_nfct_reasm(skb);
1747 struct net *net;
1748
1749 /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
1750 * ipvs_property is set when checking first fragment
1751 * in ip_vs_in() and ip_vs_out().
1752 */
1753 if (reasm)
1754 IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
1755 if (!reasm || !reasm->ipvs_property)
1756 return NF_ACCEPT;
1757
1758 net = skb_net(skb);
1759 if (!net_ipvs(net)->enable)
1760 return NF_ACCEPT;
1761
1762 /* Copy stored fw mark, saved in ip_vs_{in,out} */
1763 skb->mark = reasm->mark;
1764
1765 return NF_ACCEPT;
1766}
1767
1768/*
1727 * AF_INET6 handler in NF_INET_LOCAL_IN chain 1769 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1728 * Schedule and forward packets from remote clients 1770 * Schedule and forward packets from remote clients
1729 */ 1771 */
@@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1793{ 1835{
1794 int r; 1836 int r;
1795 struct net *net; 1837 struct net *net;
1838 struct ip_vs_iphdr iphdr;
1796 1839
1797 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) 1840 ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
1841 if (iphdr.protocol != IPPROTO_ICMPV6)
1798 return NF_ACCEPT; 1842 return NF_ACCEPT;
1799 1843
1800 /* ipvs enabled in this netns ? */ 1844 /* ipvs enabled in this netns ? */
@@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1802 if (!net_ipvs(net)->enable) 1846 if (!net_ipvs(net)->enable)
1803 return NF_ACCEPT; 1847 return NF_ACCEPT;
1804 1848
1805 return ip_vs_in_icmp_v6(skb, &r, hooknum); 1849 return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
1806} 1850}
1807#endif 1851#endif
1808 1852
@@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1860 .priority = 100, 1904 .priority = 100,
1861 }, 1905 },
1862#ifdef CONFIG_IP_VS_IPV6 1906#ifdef CONFIG_IP_VS_IPV6
1907 /* After mangle & nat fetch 2:nd fragment and following */
1908 {
1909 .hook = ip_vs_preroute_frag6,
1910 .owner = THIS_MODULE,
1911 .pf = NFPROTO_IPV6,
1912 .hooknum = NF_INET_PRE_ROUTING,
1913 .priority = NF_IP6_PRI_NAT_DST + 1,
1914 },
1863 /* After packet filtering, change source only for VS/NAT */ 1915 /* After packet filtering, change source only for VS/NAT */
1864 { 1916 {
1865 .hook = ip_vs_reply6, 1917 .hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 8b7dca9ea42..7f3b0cc00b7 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
215 struct ip_vs_dh_bucket *tbl; 215 struct ip_vs_dh_bucket *tbl;
216 struct ip_vs_iphdr iph; 216 struct ip_vs_iphdr iph;
217 217
218 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 219
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 221
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index df646ccf08a..cbd37489ac7 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
479 struct ip_vs_dest *dest = NULL; 479 struct ip_vs_dest *dest = NULL;
480 struct ip_vs_lblc_entry *en; 480 struct ip_vs_lblc_entry *en;
481 481
482 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 482 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
483 483
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 485
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 570e31ea427..161b67972e3 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
649 struct ip_vs_dest *dest = NULL; 649 struct ip_vs_dest *dest = NULL;
650 struct ip_vs_lblcr_entry *en; 650 struct ip_vs_lblcr_entry *en;
651 651
652 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
653 653
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 655
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1aa5cac748c..12475ef88da 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff,
68static int 68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) 69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{ 70{
71 struct sk_buff *reasm = skb_nfct_reasm(skb);
71 struct ip_vs_iphdr iph; 72 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 73 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 74 const char *dptr;
74 int retc; 75 int retc;
75 76
76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 77 ip_vs_fill_iph_skb(p->af, skb, &iph);
77 78
78 /* Only useful with UDP */ 79 /* Only useful with UDP */
79 if (iph.protocol != IPPROTO_UDP) 80 if (iph.protocol != IPPROTO_UDP)
80 return -EINVAL; 81 return -EINVAL;
82 /* todo: IPv6 fragments:
83 * I think this only should be done for the first fragment. /HS
84 */
85 if (reasm) {
86 skb = reasm;
87 dataoff = iph.thoff_reasm + sizeof(struct udphdr);
88 } else
89 dataoff = iph.len + sizeof(struct udphdr);
81 90
82 /* No Data ? */
83 dataoff = iph.len + sizeof(struct udphdr);
84 if (dataoff >= skb->len) 91 if (dataoff >= skb->len)
85 return -EINVAL; 92 return -EINVAL;
86 93 /* todo: Check if this will mess-up the reasm skb !!! /HS */
87 if ((retc=skb_linearize(skb)) < 0) 94 retc = skb_linearize(skb);
95 if (retc < 0)
88 return retc; 96 return retc;
89 dptr = skb->data + dataoff; 97 dptr = skb->data + dataoff;
90 datalen = skb->len - dataoff; 98 datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 50d82186da8..939f7fbe9b4 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
280 if (ih == NULL) 280 if (ih == NULL)
281 sprintf(buf, "TRUNCATED"); 281 sprintf(buf, "TRUNCATED");
282 else if (ih->nexthdr == IPPROTO_FRAGMENT) 282 else if (ih->nexthdr == IPPROTO_FRAGMENT)
283 sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); 283 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
284 else { 284 else {
285 __be16 _ports[2], *pptr; 285 __be16 _ports[2], *pptr;
286 286
287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
288 sizeof(_ports), _ports); 288 sizeof(_ports), _ports);
289 if (pptr == NULL) 289 if (pptr == NULL)
290 sprintf(buf, "TRUNCATED %pI6->%pI6", 290 sprintf(buf, "TRUNCATED %pI6c->%pI6c",
291 &ih->saddr, &ih->daddr); 291 &ih->saddr, &ih->daddr);
292 else 292 else
293 sprintf(buf, "%pI6:%u->%pI6:%u", 293 sprintf(buf, "%pI6c:%u->%pI6c:%u",
294 &ih->saddr, ntohs(pptr[0]), 294 &ih->saddr, ntohs(pptr[0]),
295 &ih->daddr, ntohs(pptr[1])); 295 &ih->daddr, ntohs(pptr[1]));
296 } 296 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5b8eb8b12c3..5de3dd312c0 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
57 57
58static struct ip_vs_conn * 58static struct ip_vs_conn *
59ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
60 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph,
61 int inverse) 61 int inverse)
62{ 62{
63 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
@@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
85 85
86static struct ip_vs_conn * 86static struct ip_vs_conn *
87ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
88 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph, int inverse)
89 unsigned int proto_off,
90 int inverse)
91{ 89{
92 struct ip_vs_conn *cp; 90 struct ip_vs_conn *cp;
93 struct ip_vs_conn_param p; 91 struct ip_vs_conn_param p;
@@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
110 108
111static int 109static int
112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
113 int *verdict, struct ip_vs_conn **cpp) 111 int *verdict, struct ip_vs_conn **cpp,
112 struct ip_vs_iphdr *iph)
114{ 113{
115 /* 114 /*
116 * AH/ESP is only related traffic. Pass the packet to IP stack. 115 * AH/ESP is only related traffic. Pass the packet to IP stack.
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9f3fb751c49..746048b13ef 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,28 +10,26 @@
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp,
14 struct ip_vs_iphdr *iph)
14{ 15{
15 struct net *net; 16 struct net *net;
16 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
17 sctp_chunkhdr_t _schunkh, *sch; 18 sctp_chunkhdr_t _schunkh, *sch;
18 sctp_sctphdr_t *sh, _sctph; 19 sctp_sctphdr_t *sh, _sctph;
19 struct ip_vs_iphdr iph;
20 20
21 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 21 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
22
23 sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
24 if (sh == NULL) 22 if (sh == NULL)
25 return 0; 23 return 0;
26 24
27 sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), 25 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
28 sizeof(_schunkh), &_schunkh); 26 sizeof(_schunkh), &_schunkh);
29 if (sch == NULL) 27 if (sch == NULL)
30 return 0; 28 return 0;
31 net = skb_net(skb); 29 net = skb_net(skb);
32 if ((sch->type == SCTP_CID_INIT) && 30 if ((sch->type == SCTP_CID_INIT) &&
33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
34 &iph.daddr, sh->dest))) { 32 &iph->daddr, sh->dest))) {
35 int ignored; 33 int ignored;
36 34
37 if (ip_vs_todrop(net_ipvs(net))) { 35 if (ip_vs_todrop(net_ipvs(net))) {
@@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 * Let the virtual server select a real server for the 45 * Let the virtual server select a real server for the
48 * incoming connection, and create a connection entry. 46 * incoming connection, and create a connection entry.
49 */ 47 */
50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 48 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
51 if (!*cpp && ignored <= 0) { 49 if (!*cpp && ignored <= 0) {
52 if (!ignored) 50 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd); 51 *verdict = ip_vs_leave(svc, skb, pd, iph);
54 else { 52 else {
55 ip_vs_service_put(svc); 53 ip_vs_service_put(svc);
56 *verdict = NF_DROP; 54 *verdict = NF_DROP;
@@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64} 62}
65 63
66static int 64static int
67sctp_snat_handler(struct sk_buff *skb, 65sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
68 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 66 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
69{ 67{
70 sctp_sctphdr_t *sctph; 68 sctp_sctphdr_t *sctph;
71 unsigned int sctphoff; 69 unsigned int sctphoff = iph->len;
72 struct sk_buff *iter; 70 struct sk_buff *iter;
73 __be32 crc32; 71 __be32 crc32;
74 72
75#ifdef CONFIG_IP_VS_IPV6 73#ifdef CONFIG_IP_VS_IPV6
76 if (cp->af == AF_INET6) 74 if (cp->af == AF_INET6 && iph->fragoffs)
77 sctphoff = sizeof(struct ipv6hdr); 75 return 1;
78 else
79#endif 76#endif
80 sctphoff = ip_hdrlen(skb);
81 77
82 /* csum_check requires unshared skb */ 78 /* csum_check requires unshared skb */
83 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 79 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
@@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb,
108} 104}
109 105
110static int 106static int
111sctp_dnat_handler(struct sk_buff *skb, 107sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
112 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 108 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
113{ 109{
114 sctp_sctphdr_t *sctph; 110 sctp_sctphdr_t *sctph;
115 unsigned int sctphoff; 111 unsigned int sctphoff = iph->len;
116 struct sk_buff *iter; 112 struct sk_buff *iter;
117 __be32 crc32; 113 __be32 crc32;
118 114
119#ifdef CONFIG_IP_VS_IPV6 115#ifdef CONFIG_IP_VS_IPV6
120 if (cp->af == AF_INET6) 116 if (cp->af == AF_INET6 && iph->fragoffs)
121 sctphoff = sizeof(struct ipv6hdr); 117 return 1;
122 else
123#endif 118#endif
124 sctphoff = ip_hdrlen(skb);
125 119
126 /* csum_check requires unshared skb */ 120 /* csum_check requires unshared skb */
127 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 121 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index cd609cc6272..9af653a7582 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -33,16 +33,14 @@
33 33
34static int 34static int
35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
36 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp,
37 struct ip_vs_iphdr *iph)
37{ 38{
38 struct net *net; 39 struct net *net;
39 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
40 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
41 struct ip_vs_iphdr iph;
42 42
43 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 43 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
44
45 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
46 if (th == NULL) { 44 if (th == NULL) {
47 *verdict = NF_DROP; 45 *verdict = NF_DROP;
48 return 0; 46 return 0;
@@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
50 net = skb_net(skb); 48 net = skb_net(skb);
51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52 if (th->syn && 50 if (th->syn &&
53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
54 &iph.daddr, th->dest))) { 52 &iph->daddr, th->dest))) {
55 int ignored; 53 int ignored;
56 54
57 if (ip_vs_todrop(net_ipvs(net))) { 55 if (ip_vs_todrop(net_ipvs(net))) {
@@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
68 * Let the virtual server select a real server for the 66 * Let the virtual server select a real server for the
69 * incoming connection, and create a connection entry. 67 * incoming connection, and create a connection entry.
70 */ 68 */
71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 69 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
72 if (!*cpp && ignored <= 0) { 70 if (!*cpp && ignored <= 0) {
73 if (!ignored) 71 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd); 72 *verdict = ip_vs_leave(svc, skb, pd, iph);
75 else { 73 else {
76 ip_vs_service_put(svc); 74 ip_vs_service_put(svc);
77 *verdict = NF_DROP; 75 *verdict = NF_DROP;
@@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
128 126
129 127
130static int 128static int
131tcp_snat_handler(struct sk_buff *skb, 129tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
132 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 130 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
133{ 131{
134 struct tcphdr *tcph; 132 struct tcphdr *tcph;
135 unsigned int tcphoff; 133 unsigned int tcphoff = iph->len;
136 int oldlen; 134 int oldlen;
137 int payload_csum = 0; 135 int payload_csum = 0;
138 136
139#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
140 if (cp->af == AF_INET6) 138 if (cp->af == AF_INET6 && iph->fragoffs)
141 tcphoff = sizeof(struct ipv6hdr); 139 return 1;
142 else
143#endif 140#endif
144 tcphoff = ip_hdrlen(skb);
145 oldlen = skb->len - tcphoff; 141 oldlen = skb->len - tcphoff;
146 142
147 /* csum_check requires unshared skb */ 143 /* csum_check requires unshared skb */
@@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb,
208 204
209 205
210static int 206static int
211tcp_dnat_handler(struct sk_buff *skb, 207tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
212 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 208 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
213{ 209{
214 struct tcphdr *tcph; 210 struct tcphdr *tcph;
215 unsigned int tcphoff; 211 unsigned int tcphoff = iph->len;
216 int oldlen; 212 int oldlen;
217 int payload_csum = 0; 213 int payload_csum = 0;
218 214
219#ifdef CONFIG_IP_VS_IPV6 215#ifdef CONFIG_IP_VS_IPV6
220 if (cp->af == AF_INET6) 216 if (cp->af == AF_INET6 && iph->fragoffs)
221 tcphoff = sizeof(struct ipv6hdr); 217 return 1;
222 else
223#endif 218#endif
224 tcphoff = ip_hdrlen(skb);
225 oldlen = skb->len - tcphoff; 219 oldlen = skb->len - tcphoff;
226 220
227 /* csum_check requires unshared skb */ 221 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 2fedb2dcb3d..503a842c90d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -30,23 +30,22 @@
30 30
31static int 31static int
32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
33 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp,
34 struct ip_vs_iphdr *iph)
34{ 35{
35 struct net *net; 36 struct net *net;
36 struct ip_vs_service *svc; 37 struct ip_vs_service *svc;
37 struct udphdr _udph, *uh; 38 struct udphdr _udph, *uh;
38 struct ip_vs_iphdr iph;
39 39
40 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 40 /* IPv6 fragments, only first fragment will hit this */
41 41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
42 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
43 if (uh == NULL) { 42 if (uh == NULL) {
44 *verdict = NF_DROP; 43 *verdict = NF_DROP;
45 return 0; 44 return 0;
46 } 45 }
47 net = skb_net(skb); 46 net = skb_net(skb);
48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
49 &iph.daddr, uh->dest); 48 &iph->daddr, uh->dest);
50 if (svc) { 49 if (svc) {
51 int ignored; 50 int ignored;
52 51
@@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
65 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
66 */ 65 */
67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 66 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
68 if (!*cpp && ignored <= 0) { 67 if (!*cpp && ignored <= 0) {
69 if (!ignored) 68 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd); 69 *verdict = ip_vs_leave(svc, skb, pd, iph);
71 else { 70 else {
72 ip_vs_service_put(svc); 71 ip_vs_service_put(svc);
73 *verdict = NF_DROP; 72 *verdict = NF_DROP;
@@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
125 124
126 125
127static int 126static int
128udp_snat_handler(struct sk_buff *skb, 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
129 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 128 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
130{ 129{
131 struct udphdr *udph; 130 struct udphdr *udph;
132 unsigned int udphoff; 131 unsigned int udphoff = iph->len;
133 int oldlen; 132 int oldlen;
134 int payload_csum = 0; 133 int payload_csum = 0;
135 134
136#ifdef CONFIG_IP_VS_IPV6 135#ifdef CONFIG_IP_VS_IPV6
137 if (cp->af == AF_INET6) 136 if (cp->af == AF_INET6 && iph->fragoffs)
138 udphoff = sizeof(struct ipv6hdr); 137 return 1;
139 else
140#endif 138#endif
141 udphoff = ip_hdrlen(skb);
142 oldlen = skb->len - udphoff; 139 oldlen = skb->len - udphoff;
143 140
144 /* csum_check requires unshared skb */ 141 /* csum_check requires unshared skb */
@@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb,
210 207
211 208
212static int 209static int
213udp_dnat_handler(struct sk_buff *skb, 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
214 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 211 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
215{ 212{
216 struct udphdr *udph; 213 struct udphdr *udph;
217 unsigned int udphoff; 214 unsigned int udphoff = iph->len;
218 int oldlen; 215 int oldlen;
219 int payload_csum = 0; 216 int payload_csum = 0;
220 217
221#ifdef CONFIG_IP_VS_IPV6 218#ifdef CONFIG_IP_VS_IPV6
222 if (cp->af == AF_INET6) 219 if (cp->af == AF_INET6 && iph->fragoffs)
223 udphoff = sizeof(struct ipv6hdr); 220 return 1;
224 else
225#endif 221#endif
226 udphoff = ip_hdrlen(skb);
227 oldlen = skb->len - udphoff; 222 oldlen = skb->len - udphoff;
228 223
229 /* csum_check requires unshared skb */ 224 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 08dbdd5bc18..d6bf20d6cdb 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
159 svc->fwmark, msg); 159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", 162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol), 164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 165 &svc->addr.in6, ntohs(svc->port), msg);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 05126521743..e3312699462 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
228 struct ip_vs_sh_bucket *tbl; 228 struct ip_vs_sh_bucket *tbl;
229 struct ip_vs_iphdr iph; 229 struct ip_vs_iphdr iph;
230 230
231 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 232
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 234
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 56f6d5d81a7..90122eb0b04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -335,7 +335,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
335 local = __ip_vs_is_local_route6(rt); 335 local = __ip_vs_is_local_route6(rt);
336 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 336 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
337 rt_mode)) { 337 rt_mode)) {
338 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", 338 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
339 local ? "local":"non-local", daddr); 339 local ? "local":"non-local", daddr);
340 dst_release(&rt->dst); 340 dst_release(&rt->dst);
341 return NULL; 341 return NULL;
@@ -343,8 +343,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
343 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 343 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
344 !((ort = (struct rt6_info *) skb_dst(skb)) && 344 !((ort = (struct rt6_info *) skb_dst(skb)) &&
345 __ip_vs_is_local_route6(ort))) { 345 __ip_vs_is_local_route6(ort))) {
346 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " 346 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
347 "requires NAT method, dest: %pI6\n", 347 "requires NAT method, dest: %pI6c\n",
348 &ipv6_hdr(skb)->daddr, daddr); 348 &ipv6_hdr(skb)->daddr, daddr);
349 dst_release(&rt->dst); 349 dst_release(&rt->dst);
350 return NULL; 350 return NULL;
@@ -352,8 +352,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
352 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 352 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
353 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 353 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
354 IPV6_ADDR_LOOPBACK)) { 354 IPV6_ADDR_LOOPBACK)) {
355 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " 355 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
356 "to non-local address, dest: %pI6\n", 356 "to non-local address, dest: %pI6c\n",
357 &ipv6_hdr(skb)->saddr, daddr); 357 &ipv6_hdr(skb)->saddr, daddr);
358 dst_release(&rt->dst); 358 dst_release(&rt->dst);
359 return NULL; 359 return NULL;
@@ -424,7 +424,7 @@ do { \
424 */ 424 */
425int 425int
426ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 426ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
427 struct ip_vs_protocol *pp) 427 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
428{ 428{
429 /* we do not touch skb and do not need pskb ptr */ 429 /* we do not touch skb and do not need pskb ptr */
430 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 430 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
@@ -438,7 +438,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
438 */ 438 */
439int 439int
440ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 440ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
441 struct ip_vs_protocol *pp) 441 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
442{ 442{
443 struct rtable *rt; /* Route to the other host */ 443 struct rtable *rt; /* Route to the other host */
444 struct iphdr *iph = ip_hdr(skb); 444 struct iphdr *iph = ip_hdr(skb);
@@ -493,16 +493,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
493#ifdef CONFIG_IP_VS_IPV6 493#ifdef CONFIG_IP_VS_IPV6
494int 494int
495ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 495ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
496 struct ip_vs_protocol *pp) 496 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
497{ 497{
498 struct rt6_info *rt; /* Route to the other host */ 498 struct rt6_info *rt; /* Route to the other host */
499 struct ipv6hdr *iph = ipv6_hdr(skb);
500 int mtu; 499 int mtu;
501 500
502 EnterFunction(10); 501 EnterFunction(10);
503 502
504 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 503 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
505 IP_VS_RT_MODE_NON_LOCAL))) 504 IP_VS_RT_MODE_NON_LOCAL);
505 if (!rt)
506 goto tx_error_icmp; 506 goto tx_error_icmp;
507 507
508 /* MTU checking */ 508 /* MTU checking */
@@ -513,7 +513,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
513 513
514 skb->dev = net->loopback_dev; 514 skb->dev = net->loopback_dev;
515 } 515 }
516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 516 /* only send ICMP too big on first fragment */
517 if (!iph->fragoffs)
518 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
517 dst_release(&rt->dst); 519 dst_release(&rt->dst);
518 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 520 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
519 goto tx_error; 521 goto tx_error;
@@ -556,7 +558,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
556 */ 558 */
557int 559int
558ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 560ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
559 struct ip_vs_protocol *pp) 561 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
560{ 562{
561 struct rtable *rt; /* Route to the other host */ 563 struct rtable *rt; /* Route to the other host */
562 int mtu; 564 int mtu;
@@ -626,7 +628,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
626 goto tx_error_put; 628 goto tx_error_put;
627 629
628 /* mangle the packet */ 630 /* mangle the packet */
629 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 631 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
630 goto tx_error_put; 632 goto tx_error_put;
631 ip_hdr(skb)->daddr = cp->daddr.ip; 633 ip_hdr(skb)->daddr = cp->daddr.ip;
632 ip_send_check(ip_hdr(skb)); 634 ip_send_check(ip_hdr(skb));
@@ -674,7 +676,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
674#ifdef CONFIG_IP_VS_IPV6 676#ifdef CONFIG_IP_VS_IPV6
675int 677int
676ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 678ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
677 struct ip_vs_protocol *pp) 679 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
678{ 680{
679 struct rt6_info *rt; /* Route to the other host */ 681 struct rt6_info *rt; /* Route to the other host */
680 int mtu; 682 int mtu;
@@ -683,10 +685,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
683 EnterFunction(10); 685 EnterFunction(10);
684 686
685 /* check if it is a connection of no-client-port */ 687 /* check if it is a connection of no-client-port */
686 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 688 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
687 __be16 _pt, *p; 689 __be16 _pt, *p;
688 p = skb_header_pointer(skb, sizeof(struct ipv6hdr), 690 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
689 sizeof(_pt), &_pt);
690 if (p == NULL) 691 if (p == NULL)
691 goto tx_error; 692 goto tx_error;
692 ip_vs_conn_fill_cport(cp, *p); 693 ip_vs_conn_fill_cport(cp, *p);
@@ -734,7 +735,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
734 735
735 skb->dev = net->loopback_dev; 736 skb->dev = net->loopback_dev;
736 } 737 }
737 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 738 /* only send ICMP too big on first fragment */
739 if (!iph->fragoffs)
740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
738 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
739 "ip_vs_nat_xmit_v6(): frag needed for"); 742 "ip_vs_nat_xmit_v6(): frag needed for");
740 goto tx_error_put; 743 goto tx_error_put;
@@ -748,7 +751,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
748 goto tx_error_put; 751 goto tx_error_put;
749 752
750 /* mangle the packet */ 753 /* mangle the packet */
751 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
752 goto tx_error; 755 goto tx_error;
753 ipv6_hdr(skb)->daddr = cp->daddr.in6; 756 ipv6_hdr(skb)->daddr = cp->daddr.in6;
754 757
@@ -809,7 +812,7 @@ tx_error_put:
809 */ 812 */
810int 813int
811ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
812 struct ip_vs_protocol *pp) 815 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
813{ 816{
814 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
815 struct rtable *rt; /* Route to the other host */ 818 struct rtable *rt; /* Route to the other host */
@@ -929,7 +932,7 @@ tx_error_put:
929#ifdef CONFIG_IP_VS_IPV6 932#ifdef CONFIG_IP_VS_IPV6
930int 933int
931ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
932 struct ip_vs_protocol *pp) 935 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
933{ 936{
934 struct rt6_info *rt; /* Route to the other host */ 937 struct rt6_info *rt; /* Route to the other host */
935 struct in6_addr saddr; /* Source for tunnel */ 938 struct in6_addr saddr; /* Source for tunnel */
@@ -969,7 +972,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
969 972
970 skb->dev = net->loopback_dev; 973 skb->dev = net->loopback_dev;
971 } 974 }
972 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 975 /* only send ICMP too big on first fragment */
976 if (!ipvsh->fragoffs)
977 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
973 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 978 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
974 goto tx_error_put; 979 goto tx_error_put;
975 } 980 }
@@ -1050,7 +1055,7 @@ tx_error_put:
1050 */ 1055 */
1051int 1056int
1052ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1057ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1053 struct ip_vs_protocol *pp) 1058 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1054{ 1059{
1055 struct rtable *rt; /* Route to the other host */ 1060 struct rtable *rt; /* Route to the other host */
1056 struct iphdr *iph = ip_hdr(skb); 1061 struct iphdr *iph = ip_hdr(skb);
@@ -1111,7 +1116,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1111#ifdef CONFIG_IP_VS_IPV6 1116#ifdef CONFIG_IP_VS_IPV6
1112int 1117int
1113ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1118ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1114 struct ip_vs_protocol *pp) 1119 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1115{ 1120{
1116 struct rt6_info *rt; /* Route to the other host */ 1121 struct rt6_info *rt; /* Route to the other host */
1117 int mtu; 1122 int mtu;
@@ -1135,7 +1140,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1135 1140
1136 skb->dev = net->loopback_dev; 1141 skb->dev = net->loopback_dev;
1137 } 1142 }
1138 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1143 /* only send ICMP too big on first fragment */
1144 if (!iph->fragoffs)
1145 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1139 dst_release(&rt->dst); 1146 dst_release(&rt->dst);
1140 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1147 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1141 goto tx_error; 1148 goto tx_error;
@@ -1179,7 +1186,8 @@ tx_error:
1179 */ 1186 */
1180int 1187int
1181ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1188ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1182 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1189 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1190 struct ip_vs_iphdr *iph)
1183{ 1191{
1184 struct rtable *rt; /* Route to the other host */ 1192 struct rtable *rt; /* Route to the other host */
1185 int mtu; 1193 int mtu;
@@ -1194,7 +1202,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1194 translate address/port back */ 1202 translate address/port back */
1195 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1203 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1196 if (cp->packet_xmit) 1204 if (cp->packet_xmit)
1197 rc = cp->packet_xmit(skb, cp, pp); 1205 rc = cp->packet_xmit(skb, cp, pp, iph);
1198 else 1206 else
1199 rc = NF_ACCEPT; 1207 rc = NF_ACCEPT;
1200 /* do not touch skb anymore */ 1208 /* do not touch skb anymore */
@@ -1300,7 +1308,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1300#ifdef CONFIG_IP_VS_IPV6 1308#ifdef CONFIG_IP_VS_IPV6
1301int 1309int
1302ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1310ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1303 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1311 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1312 struct ip_vs_iphdr *iph)
1304{ 1313{
1305 struct rt6_info *rt; /* Route to the other host */ 1314 struct rt6_info *rt; /* Route to the other host */
1306 int mtu; 1315 int mtu;
@@ -1315,7 +1324,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1315 translate address/port back */ 1324 translate address/port back */
1316 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1325 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1317 if (cp->packet_xmit) 1326 if (cp->packet_xmit)
1318 rc = cp->packet_xmit(skb, cp, pp); 1327 rc = cp->packet_xmit(skb, cp, pp, iph);
1319 else 1328 else
1320 rc = NF_ACCEPT; 1329 rc = NF_ACCEPT;
1321 /* do not touch skb anymore */ 1330 /* do not touch skb anymore */
@@ -1371,7 +1380,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1371 1380
1372 skb->dev = net->loopback_dev; 1381 skb->dev = net->loopback_dev;
1373 } 1382 }
1374 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1383 /* only send ICMP too big on first fragment */
1384 if (!iph->fragoffs)
1385 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1375 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1386 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1376 goto tx_error_put; 1387 goto tx_error_put;
1377 } 1388 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index bb10b0717f1..8d47c3780fd 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
67 goto out; 67 goto out;
68 } 68 }
69 69
70 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); 70 ip_vs_fill_iph_skb(family, skb, &iph);
71 71
72 if (data->bitmask & XT_IPVS_PROTO) 72 if (data->bitmask & XT_IPVS_PROTO)
73 if ((iph.protocol == data->l4proto) ^ 73 if ((iph.protocol == data->l4proto) ^
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;