diff options
author | Hans Schillstrom <hans.schillstrom@ericsson.com> | 2010-11-19 08:25:12 -0500 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2010-11-24 20:42:59 -0500 |
commit | fe5e7a1efb664df0280f10377813d7099fb7eb0f (patch) | |
tree | 72eaf55ec2200327a8260a49c22bc13c60f996a2 | |
parent | 2981bc9a63456500037ca1f434b93a561e63f384 (diff) |
IPVS: Backup, Adding Version 1 receive capability
Functionality improvements
* flags changed from 16 to 32 bits
* fwmark added (32 bits)
* timeout in sec. added (32 bits)
* pe data added (Variable length)
* IPv6 capabilities (3x16 bytes for addr.)
* Version and type in every conn msg.
ip_vs_process_message() now handles Version 1 messages
and will call ip_vs_process_message_v0() for version 0 messages.
ip_vs_proc_conn() is common for both version, and handles the update of
connection hash.
ip_vs_conn_fill_param_sync() - Version 1 messages only
ip_vs_conn_fill_param_sync_v0() - Version 0 messages only
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
-rw-r--r-- | include/linux/ip_vs.h | 8 | ||||
-rw-r--r-- | include/net/ip_vs.h | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_pe.c | 5 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 549 |
4 files changed, 440 insertions, 123 deletions
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 5f43a3b2e3ad..4deb3834d62c 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h | |||
@@ -89,6 +89,14 @@ | |||
89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ | 89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ |
90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ | 90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ |
91 | 91 | ||
92 | #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ | ||
93 | IP_VS_CONN_F_NOOUTPUT | \ | ||
94 | IP_VS_CONN_F_INACTIVE | \ | ||
95 | IP_VS_CONN_F_SEQ_MASK | \ | ||
96 | IP_VS_CONN_F_NO_CPORT | \ | ||
97 | IP_VS_CONN_F_TEMPLATE \ | ||
98 | ) | ||
99 | |||
92 | /* Flags that are not sent to backup server start from bit 16 */ | 100 | /* Flags that are not sent to backup server start from bit 16 */ |
93 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ | 101 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ |
94 | 102 | ||
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 890f01c215e9..4069484df7bb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -817,6 +817,7 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc); | |||
817 | int register_ip_vs_pe(struct ip_vs_pe *pe); | 817 | int register_ip_vs_pe(struct ip_vs_pe *pe); |
818 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); | 818 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); |
819 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); | 819 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); |
820 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); | ||
820 | 821 | ||
821 | static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) | 822 | static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) |
822 | { | 823 | { |
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index e99f920b93d1..5cf859ccb31b 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c | |||
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) | |||
29 | } | 29 | } |
30 | 30 | ||
31 | /* Get pe in the pe list by name */ | 31 | /* Get pe in the pe list by name */ |
32 | static struct ip_vs_pe * | 32 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) |
33 | __ip_vs_pe_getbyname(const char *pe_name) | ||
34 | { | 33 | { |
35 | struct ip_vs_pe *pe; | 34 | struct ip_vs_pe *pe; |
36 | 35 | ||
37 | IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, | 36 | IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, |
38 | pe_name); | 37 | pe_name); |
39 | 38 | ||
40 | spin_lock_bh(&ip_vs_pe_lock); | 39 | spin_lock_bh(&ip_vs_pe_lock); |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 566482f227fa..e071508901d1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <linux/wait.h> | 35 | #include <linux/wait.h> |
36 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
37 | 37 | ||
38 | #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ | ||
39 | |||
38 | #include <net/ip.h> | 40 | #include <net/ip.h> |
39 | #include <net/sock.h> | 41 | #include <net/sock.h> |
40 | 42 | ||
@@ -286,6 +288,16 @@ static struct sockaddr_in mcast_addr = { | |||
286 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | 288 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), |
287 | }; | 289 | }; |
288 | 290 | ||
291 | /* | ||
292 | * Copy of struct ip_vs_seq | ||
293 | * From unaligned network order to aligned host order | ||
294 | */ | ||
295 | static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) | ||
296 | { | ||
297 | ho->init_seq = get_unaligned_be32(&no->init_seq); | ||
298 | ho->delta = get_unaligned_be32(&no->delta); | ||
299 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); | ||
300 | } | ||
289 | 301 | ||
290 | static inline struct ip_vs_sync_buff *sb_dequeue(void) | 302 | static inline struct ip_vs_sync_buff *sb_dequeue(void) |
291 | { | 303 | { |
@@ -418,59 +430,186 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp) | |||
418 | ip_vs_sync_conn(cp->control); | 430 | ip_vs_sync_conn(cp->control); |
419 | } | 431 | } |
420 | 432 | ||
433 | /* | ||
434 | * fill_param used by version 1 | ||
435 | */ | ||
421 | static inline int | 436 | static inline int |
422 | ip_vs_conn_fill_param_sync(int af, int protocol, | 437 | ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, |
423 | const union nf_inet_addr *caddr, __be16 cport, | 438 | struct ip_vs_conn_param *p, |
424 | const union nf_inet_addr *vaddr, __be16 vport, | 439 | __u8 *pe_data, unsigned int pe_data_len, |
425 | struct ip_vs_conn_param *p) | 440 | __u8 *pe_name, unsigned int pe_name_len) |
426 | { | 441 | { |
427 | /* XXX: Need to take into account persistence engine */ | 442 | #ifdef CONFIG_IP_VS_IPV6 |
428 | ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); | 443 | if (af == AF_INET6) |
444 | ip_vs_conn_fill_param(af, sc->v6.protocol, | ||
445 | (const union nf_inet_addr *)&sc->v6.caddr, | ||
446 | sc->v6.cport, | ||
447 | (const union nf_inet_addr *)&sc->v6.vaddr, | ||
448 | sc->v6.vport, p); | ||
449 | else | ||
450 | #endif | ||
451 | ip_vs_conn_fill_param(af, sc->v4.protocol, | ||
452 | (const union nf_inet_addr *)&sc->v4.caddr, | ||
453 | sc->v4.cport, | ||
454 | (const union nf_inet_addr *)&sc->v4.vaddr, | ||
455 | sc->v4.vport, p); | ||
456 | /* Handle pe data */ | ||
457 | if (pe_data_len) { | ||
458 | if (pe_name_len) { | ||
459 | char buff[IP_VS_PENAME_MAXLEN+1]; | ||
460 | |||
461 | memcpy(buff, pe_name, pe_name_len); | ||
462 | buff[pe_name_len]=0; | ||
463 | p->pe = __ip_vs_pe_getbyname(buff); | ||
464 | if (!p->pe) { | ||
465 | IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); | ||
466 | return 1; | ||
467 | } | ||
468 | } else { | ||
469 | IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); | ||
470 | return 1; | ||
471 | } | ||
472 | |||
473 | p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); | ||
474 | if (!p->pe_data) { | ||
475 | if (p->pe->module) | ||
476 | module_put(p->pe->module); | ||
477 | return -ENOMEM; | ||
478 | } | ||
479 | memcpy(p->pe_data, pe_data, pe_data_len); | ||
480 | p->pe_data_len = pe_data_len; | ||
481 | } | ||
429 | return 0; | 482 | return 0; |
430 | } | 483 | } |
431 | 484 | ||
432 | /* | 485 | /* |
433 | * Process received multicast message and create the corresponding | 486 | * Connection Add / Update. |
434 | * ip_vs_conn entries. | 487 | * Common for version 0 and 1 reception of backup sync_conns. |
488 | * Param: ... | ||
489 | * timeout is in sec. | ||
490 | */ | ||
491 | static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, | ||
492 | unsigned state, unsigned protocol, unsigned type, | ||
493 | const union nf_inet_addr *daddr, __be16 dport, | ||
494 | unsigned long timeout, __u32 fwmark, | ||
495 | struct ip_vs_sync_conn_options *opt, | ||
496 | struct ip_vs_protocol *pp) | ||
497 | { | ||
498 | struct ip_vs_dest *dest; | ||
499 | struct ip_vs_conn *cp; | ||
500 | |||
501 | |||
502 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | ||
503 | cp = ip_vs_conn_in_get(param); | ||
504 | else | ||
505 | cp = ip_vs_ct_in_get(param); | ||
506 | |||
507 | if (cp && param->pe_data) /* Free pe_data */ | ||
508 | kfree(param->pe_data); | ||
509 | if (!cp) { | ||
510 | /* | ||
511 | * Find the appropriate destination for the connection. | ||
512 | * If it is not found the connection will remain unbound | ||
513 | * but still handled. | ||
514 | */ | ||
515 | dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, | ||
516 | param->vport, protocol, fwmark); | ||
517 | |||
518 | /* Set the approprite ativity flag */ | ||
519 | if (protocol == IPPROTO_TCP) { | ||
520 | if (state != IP_VS_TCP_S_ESTABLISHED) | ||
521 | flags |= IP_VS_CONN_F_INACTIVE; | ||
522 | else | ||
523 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
524 | } else if (protocol == IPPROTO_SCTP) { | ||
525 | if (state != IP_VS_SCTP_S_ESTABLISHED) | ||
526 | flags |= IP_VS_CONN_F_INACTIVE; | ||
527 | else | ||
528 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
529 | } | ||
530 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); | ||
531 | if (dest) | ||
532 | atomic_dec(&dest->refcnt); | ||
533 | if (!cp) { | ||
534 | if (param->pe_data) | ||
535 | kfree(param->pe_data); | ||
536 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | ||
537 | return; | ||
538 | } | ||
539 | } else if (!cp->dest) { | ||
540 | dest = ip_vs_try_bind_dest(cp); | ||
541 | if (dest) | ||
542 | atomic_dec(&dest->refcnt); | ||
543 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
544 | (cp->state != state)) { | ||
545 | /* update active/inactive flag for the connection */ | ||
546 | dest = cp->dest; | ||
547 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
548 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
549 | atomic_dec(&dest->activeconns); | ||
550 | atomic_inc(&dest->inactconns); | ||
551 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
552 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
553 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
554 | atomic_inc(&dest->activeconns); | ||
555 | atomic_dec(&dest->inactconns); | ||
556 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
557 | } | ||
558 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | ||
559 | (cp->state != state)) { | ||
560 | dest = cp->dest; | ||
561 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
562 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | ||
563 | atomic_dec(&dest->activeconns); | ||
564 | atomic_inc(&dest->inactconns); | ||
565 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
566 | } | ||
567 | } | ||
568 | |||
569 | if (opt) | ||
570 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | ||
571 | atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); | ||
572 | cp->state = state; | ||
573 | cp->old_state = cp->state; | ||
574 | /* | ||
575 | * For Ver 0 messages style | ||
576 | * - Not possible to recover the right timeout for templates | ||
577 | * - can not find the right fwmark | ||
578 | * virtual service. If needed, we can do it for | ||
579 | * non-fwmark persistent services. | ||
580 | * Ver 1 messages style. | ||
581 | * - No problem. | ||
582 | */ | ||
583 | if (timeout) { | ||
584 | if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) | ||
585 | timeout = MAX_SCHEDULE_TIMEOUT / HZ; | ||
586 | cp->timeout = timeout*HZ; | ||
587 | } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) | ||
588 | cp->timeout = pp->timeout_table[state]; | ||
589 | else | ||
590 | cp->timeout = (3*60*HZ); | ||
591 | ip_vs_conn_put(cp); | ||
592 | } | ||
593 | |||
594 | /* | ||
595 | * Process received multicast message for Version 0 | ||
435 | */ | 596 | */ |
436 | static void ip_vs_process_message(char *buffer, const size_t buflen) | 597 | static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) |
437 | { | 598 | { |
438 | struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; | 599 | struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; |
439 | struct ip_vs_sync_conn_v0 *s; | 600 | struct ip_vs_sync_conn_v0 *s; |
440 | struct ip_vs_sync_conn_options *opt; | 601 | struct ip_vs_sync_conn_options *opt; |
441 | struct ip_vs_conn *cp; | ||
442 | struct ip_vs_protocol *pp; | 602 | struct ip_vs_protocol *pp; |
443 | struct ip_vs_dest *dest; | ||
444 | struct ip_vs_conn_param param; | 603 | struct ip_vs_conn_param param; |
445 | char *p; | 604 | char *p; |
446 | int i; | 605 | int i; |
447 | 606 | ||
448 | if (buflen < sizeof(struct ip_vs_sync_mesg)) { | ||
449 | IP_VS_ERR_RL("sync message header too short\n"); | ||
450 | return; | ||
451 | } | ||
452 | |||
453 | /* Convert size back to host byte order */ | ||
454 | m->size = ntohs(m->size); | ||
455 | |||
456 | if (buflen != m->size) { | ||
457 | IP_VS_ERR_RL("bogus sync message size\n"); | ||
458 | return; | ||
459 | } | ||
460 | |||
461 | /* SyncID sanity check */ | ||
462 | if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { | ||
463 | IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", | ||
464 | m->syncid); | ||
465 | return; | ||
466 | } | ||
467 | |||
468 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | 607 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); |
469 | for (i=0; i<m->nr_conns; i++) { | 608 | for (i=0; i<m->nr_conns; i++) { |
470 | unsigned flags, state; | 609 | unsigned flags, state; |
471 | 610 | ||
472 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { | 611 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { |
473 | IP_VS_ERR_RL("bogus conn in sync message\n"); | 612 | IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); |
474 | return; | 613 | return; |
475 | } | 614 | } |
476 | s = (struct ip_vs_sync_conn_v0 *) p; | 615 | s = (struct ip_vs_sync_conn_v0 *) p; |
@@ -480,7 +619,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) | |||
480 | opt = (struct ip_vs_sync_conn_options *)&s[1]; | 619 | opt = (struct ip_vs_sync_conn_options *)&s[1]; |
481 | p += FULL_CONN_SIZE; | 620 | p += FULL_CONN_SIZE; |
482 | if (p > buffer+buflen) { | 621 | if (p > buffer+buflen) { |
483 | IP_VS_ERR_RL("bogus conn options in sync message\n"); | 622 | IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); |
484 | return; | 623 | return; |
485 | } | 624 | } |
486 | } else { | 625 | } else { |
@@ -492,12 +631,12 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) | |||
492 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | 631 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
493 | pp = ip_vs_proto_get(s->protocol); | 632 | pp = ip_vs_proto_get(s->protocol); |
494 | if (!pp) { | 633 | if (!pp) { |
495 | IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", | 634 | IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", |
496 | s->protocol); | 635 | s->protocol); |
497 | continue; | 636 | continue; |
498 | } | 637 | } |
499 | if (state >= pp->num_states) { | 638 | if (state >= pp->num_states) { |
500 | IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", | 639 | IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", |
501 | pp->name, state); | 640 | pp->name, state); |
502 | continue; | 641 | continue; |
503 | } | 642 | } |
@@ -505,103 +644,273 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) | |||
505 | /* protocol in templates is not used for state/timeout */ | 644 | /* protocol in templates is not used for state/timeout */ |
506 | pp = NULL; | 645 | pp = NULL; |
507 | if (state > 0) { | 646 | if (state > 0) { |
508 | IP_VS_DBG(2, "Invalid template state %u in sync msg\n", | 647 | IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", |
509 | state); | 648 | state); |
510 | state = 0; | 649 | state = 0; |
511 | } | 650 | } |
512 | } | 651 | } |
513 | 652 | ||
514 | if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, | 653 | ip_vs_conn_fill_param(AF_INET, s->protocol, |
515 | (union nf_inet_addr *)&s->caddr, | 654 | (const union nf_inet_addr *)&s->caddr, |
516 | s->cport, | 655 | s->cport, |
517 | (union nf_inet_addr *)&s->vaddr, | 656 | (const union nf_inet_addr *)&s->vaddr, |
518 | s->vport, ¶m)) { | 657 | s->vport, ¶m); |
519 | pr_err("ip_vs_conn_fill_param_sync failed"); | 658 | |
520 | return; | 659 | /* Send timeout as Zero */ |
660 | ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, | ||
661 | (union nf_inet_addr *)&s->daddr, s->dport, | ||
662 | 0, 0, opt, pp); | ||
663 | } | ||
664 | } | ||
665 | |||
666 | /* | ||
667 | * Handle options | ||
668 | */ | ||
669 | static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, | ||
670 | __u32 *opt_flags, | ||
671 | struct ip_vs_sync_conn_options *opt) | ||
672 | { | ||
673 | struct ip_vs_sync_conn_options *topt; | ||
674 | |||
675 | topt = (struct ip_vs_sync_conn_options *)p; | ||
676 | |||
677 | if (plen != sizeof(struct ip_vs_sync_conn_options)) { | ||
678 | IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); | ||
679 | return -EINVAL; | ||
680 | } | ||
681 | if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { | ||
682 | IP_VS_DBG(2, "BACKUP, conn options found twice\n"); | ||
683 | return -EINVAL; | ||
684 | } | ||
685 | ntoh_seq(&topt->in_seq, &opt->in_seq); | ||
686 | ntoh_seq(&topt->out_seq, &opt->out_seq); | ||
687 | *opt_flags |= IPVS_OPT_F_SEQ_DATA; | ||
688 | return 0; | ||
689 | } | ||
690 | |||
691 | static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, | ||
692 | __u8 **data, unsigned int maxlen, | ||
693 | __u32 *opt_flags, __u32 flag) | ||
694 | { | ||
695 | if (plen > maxlen) { | ||
696 | IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); | ||
697 | return -EINVAL; | ||
698 | } | ||
699 | if (*opt_flags & flag) { | ||
700 | IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); | ||
701 | return -EINVAL; | ||
702 | } | ||
703 | *data_len = plen; | ||
704 | *data = p; | ||
705 | *opt_flags |= flag; | ||
706 | return 0; | ||
707 | } | ||
708 | /* | ||
709 | * Process a Version 1 sync. connection | ||
710 | */ | ||
711 | static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) | ||
712 | { | ||
713 | struct ip_vs_sync_conn_options opt; | ||
714 | union ip_vs_sync_conn *s; | ||
715 | struct ip_vs_protocol *pp; | ||
716 | struct ip_vs_conn_param param; | ||
717 | __u32 flags; | ||
718 | unsigned int af, state, pe_data_len=0, pe_name_len=0; | ||
719 | __u8 *pe_data=NULL, *pe_name=NULL; | ||
720 | __u32 opt_flags=0; | ||
721 | int retc=0; | ||
722 | |||
723 | s = (union ip_vs_sync_conn *) p; | ||
724 | |||
725 | if (s->v6.type & STYPE_F_INET6) { | ||
726 | #ifdef CONFIG_IP_VS_IPV6 | ||
727 | af = AF_INET6; | ||
728 | p += sizeof(struct ip_vs_sync_v6); | ||
729 | #else | ||
730 | IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); | ||
731 | retc = 10; | ||
732 | goto out; | ||
733 | #endif | ||
734 | } else if (!s->v4.type) { | ||
735 | af = AF_INET; | ||
736 | p += sizeof(struct ip_vs_sync_v4); | ||
737 | } else { | ||
738 | return -10; | ||
739 | } | ||
740 | if (p > msg_end) | ||
741 | return -20; | ||
742 | |||
743 | /* Process optional params check Type & Len. */ | ||
744 | while (p < msg_end) { | ||
745 | int ptype; | ||
746 | int plen; | ||
747 | |||
748 | if (p+2 > msg_end) | ||
749 | return -30; | ||
750 | ptype = *(p++); | ||
751 | plen = *(p++); | ||
752 | |||
753 | if (!plen || ((p + plen) > msg_end)) | ||
754 | return -40; | ||
755 | /* Handle seq option p = param data */ | ||
756 | switch (ptype & ~IPVS_OPT_F_PARAM) { | ||
757 | case IPVS_OPT_SEQ_DATA: | ||
758 | if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) | ||
759 | return -50; | ||
760 | break; | ||
761 | |||
762 | case IPVS_OPT_PE_DATA: | ||
763 | if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, | ||
764 | IP_VS_PEDATA_MAXLEN, &opt_flags, | ||
765 | IPVS_OPT_F_PE_DATA)) | ||
766 | return -60; | ||
767 | break; | ||
768 | |||
769 | case IPVS_OPT_PE_NAME: | ||
770 | if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, | ||
771 | IP_VS_PENAME_MAXLEN, &opt_flags, | ||
772 | IPVS_OPT_F_PE_NAME)) | ||
773 | return -70; | ||
774 | break; | ||
775 | |||
776 | default: | ||
777 | /* Param data mandatory ? */ | ||
778 | if (!(ptype & IPVS_OPT_F_PARAM)) { | ||
779 | IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", | ||
780 | ptype & ~IPVS_OPT_F_PARAM); | ||
781 | retc = 20; | ||
782 | goto out; | ||
783 | } | ||
521 | } | 784 | } |
522 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | 785 | p += plen; /* Next option */ |
523 | cp = ip_vs_conn_in_get(¶m); | 786 | } |
524 | else | 787 | |
525 | cp = ip_vs_ct_in_get(¶m); | 788 | /* Get flags and Mask off unsupported */ |
526 | if (!cp) { | 789 | flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; |
527 | /* | 790 | flags |= IP_VS_CONN_F_SYNC; |
528 | * Find the appropriate destination for the connection. | 791 | state = ntohs(s->v4.state); |
529 | * If it is not found the connection will remain unbound | 792 | |
530 | * but still handled. | 793 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
531 | */ | 794 | pp = ip_vs_proto_get(s->v4.protocol); |
532 | dest = ip_vs_find_dest(AF_INET, | 795 | if (!pp) { |
533 | (union nf_inet_addr *)&s->daddr, | 796 | IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", |
534 | s->dport, | 797 | s->v4.protocol); |
535 | (union nf_inet_addr *)&s->vaddr, | 798 | retc = 30; |
536 | s->vport, | 799 | goto out; |
537 | s->protocol, 0); | 800 | } |
538 | /* Set the approprite ativity flag */ | 801 | if (state >= pp->num_states) { |
539 | if (s->protocol == IPPROTO_TCP) { | 802 | IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", |
540 | if (state != IP_VS_TCP_S_ESTABLISHED) | 803 | pp->name, state); |
541 | flags |= IP_VS_CONN_F_INACTIVE; | 804 | retc = 40; |
542 | else | 805 | goto out; |
543 | flags &= ~IP_VS_CONN_F_INACTIVE; | 806 | } |
544 | } else if (s->protocol == IPPROTO_SCTP) { | 807 | } else { |
545 | if (state != IP_VS_SCTP_S_ESTABLISHED) | 808 | /* protocol in templates is not used for state/timeout */ |
546 | flags |= IP_VS_CONN_F_INACTIVE; | 809 | pp = NULL; |
547 | else | 810 | if (state > 0) { |
548 | flags &= ~IP_VS_CONN_F_INACTIVE; | 811 | IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", |
812 | state); | ||
813 | state = 0; | ||
814 | } | ||
815 | } | ||
816 | if (ip_vs_conn_fill_param_sync(af, s, ¶m, | ||
817 | pe_data, pe_data_len, | ||
818 | pe_name, pe_name_len)) { | ||
819 | retc = 50; | ||
820 | goto out; | ||
821 | } | ||
822 | /* If only IPv4, just silent skip IPv6 */ | ||
823 | if (af == AF_INET) | ||
824 | ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, | ||
825 | (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, | ||
826 | ntohl(s->v4.timeout), ntohl(s->v4.fwmark), | ||
827 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), | ||
828 | pp); | ||
829 | #ifdef CONFIG_IP_VS_IPV6 | ||
830 | else | ||
831 | ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, | ||
832 | (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, | ||
833 | ntohl(s->v6.timeout), ntohl(s->v6.fwmark), | ||
834 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), | ||
835 | pp); | ||
836 | #endif | ||
837 | return 0; | ||
838 | /* Error exit */ | ||
839 | out: | ||
840 | IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); | ||
841 | return retc; | ||
842 | |||
843 | } | ||
844 | /* | ||
845 | * Process received multicast message and create the corresponding | ||
846 | * ip_vs_conn entries. | ||
847 | * Handles Version 0 & 1 | ||
848 | */ | ||
849 | static void ip_vs_process_message(__u8 *buffer, const size_t buflen) | ||
850 | { | ||
851 | struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer; | ||
852 | __u8 *p, *msg_end; | ||
853 | unsigned int i, nr_conns; | ||
854 | |||
855 | if (buflen < sizeof(struct ip_vs_sync_mesg)) { | ||
856 | IP_VS_DBG(2, "BACKUP, message header too short\n"); | ||
857 | return; | ||
858 | } | ||
859 | /* Convert size back to host byte order */ | ||
860 | m2->size = ntohs(m2->size); | ||
861 | |||
862 | if (buflen != m2->size) { | ||
863 | IP_VS_DBG(2, "BACKUP, bogus message size\n"); | ||
864 | return; | ||
865 | } | ||
866 | /* SyncID sanity check */ | ||
867 | if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) { | ||
868 | IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); | ||
869 | return; | ||
870 | } | ||
871 | /* Handle version 1 message */ | ||
872 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) | ||
873 | && (m2->spare == 0)) { | ||
874 | |||
875 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2); | ||
876 | nr_conns = m2->nr_conns; | ||
877 | |||
878 | for (i=0; i<nr_conns; i++) { | ||
879 | union ip_vs_sync_conn *s; | ||
880 | unsigned size; | ||
881 | int retc; | ||
882 | |||
883 | p = msg_end; | ||
884 | if (p + sizeof(s->v4) > buffer+buflen) { | ||
885 | IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); | ||
886 | return; | ||
549 | } | 887 | } |
550 | cp = ip_vs_conn_new(¶m, | 888 | s = (union ip_vs_sync_conn *)p; |
551 | (union nf_inet_addr *)&s->daddr, | 889 | size = ntohs(s->v4.ver_size) & SVER_MASK; |
552 | s->dport, flags, dest, 0); | 890 | msg_end = p + size; |
553 | if (dest) | 891 | /* Basic sanity checks */ |
554 | atomic_dec(&dest->refcnt); | 892 | if (msg_end > buffer+buflen) { |
555 | if (!cp) { | 893 | IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); |
556 | pr_err("ip_vs_conn_new failed\n"); | ||
557 | return; | 894 | return; |
558 | } | 895 | } |
559 | } else if (!cp->dest) { | 896 | if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { |
560 | dest = ip_vs_try_bind_dest(cp); | 897 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", |
561 | if (dest) | 898 | ntohs(s->v4.ver_size) >> SVER_SHIFT); |
562 | atomic_dec(&dest->refcnt); | 899 | return; |
563 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
564 | (cp->state != state)) { | ||
565 | /* update active/inactive flag for the connection */ | ||
566 | dest = cp->dest; | ||
567 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
568 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
569 | atomic_dec(&dest->activeconns); | ||
570 | atomic_inc(&dest->inactconns); | ||
571 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
572 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
573 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
574 | atomic_inc(&dest->activeconns); | ||
575 | atomic_dec(&dest->inactconns); | ||
576 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
577 | } | 900 | } |
578 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | 901 | /* Process a single sync_conn */ |
579 | (cp->state != state)) { | 902 | if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { |
580 | dest = cp->dest; | 903 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", |
581 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | 904 | retc); |
582 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | 905 | return; |
583 | atomic_dec(&dest->activeconns); | ||
584 | atomic_inc(&dest->inactconns); | ||
585 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
586 | } | 906 | } |
907 | /* Make sure we have 32 bit alignment */ | ||
908 | msg_end = p + ((size + 3) & ~3); | ||
587 | } | 909 | } |
588 | 910 | } else { | |
589 | if (opt) | 911 | /* Old type of message */ |
590 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | 912 | ip_vs_process_message_v0(buffer, buflen); |
591 | atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); | 913 | return; |
592 | cp->state = state; | ||
593 | cp->old_state = cp->state; | ||
594 | /* | ||
595 | * We can not recover the right timeout for templates | ||
596 | * in all cases, we can not find the right fwmark | ||
597 | * virtual service. If needed, we can do it for | ||
598 | * non-fwmark persistent services. | ||
599 | */ | ||
600 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) | ||
601 | cp->timeout = pp->timeout_table[state]; | ||
602 | else | ||
603 | cp->timeout = (3*60*HZ); | ||
604 | ip_vs_conn_put(cp); | ||
605 | } | 914 | } |
606 | } | 915 | } |
607 | 916 | ||