diff options
-rw-r--r-- | include/net/ip_vs.h | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 28 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 134 |
3 files changed, 163 insertions, 1 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a715f3db179a..d858264217ba 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -883,7 +883,9 @@ extern int sysctl_ip_vs_conntrack; | |||
883 | extern int sysctl_ip_vs_snat_reroute; | 883 | extern int sysctl_ip_vs_snat_reroute; |
884 | extern struct ip_vs_stats ip_vs_stats; | 884 | extern struct ip_vs_stats ip_vs_stats; |
885 | extern const struct ctl_path net_vs_ctl_path[]; | 885 | extern const struct ctl_path net_vs_ctl_path[]; |
886 | extern int sysctl_ip_vs_sync_ver; | ||
886 | 887 | ||
888 | extern void ip_vs_sync_switch_mode(int mode); | ||
887 | extern struct ip_vs_service * | 889 | extern struct ip_vs_service * |
888 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | 890 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, |
889 | const union nf_inet_addr *vaddr, __be16 vport); | 891 | const union nf_inet_addr *vaddr, __be16 vport); |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a5bd00279047..d12a13c497ba 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -92,7 +92,7 @@ int sysctl_ip_vs_nat_icmp_send = 0; | |||
92 | int sysctl_ip_vs_conntrack; | 92 | int sysctl_ip_vs_conntrack; |
93 | #endif | 93 | #endif |
94 | int sysctl_ip_vs_snat_reroute = 1; | 94 | int sysctl_ip_vs_snat_reroute = 1; |
95 | 95 | int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ | |
96 | 96 | ||
97 | #ifdef CONFIG_IP_VS_DEBUG | 97 | #ifdef CONFIG_IP_VS_DEBUG |
98 | static int sysctl_ip_vs_debug_level = 0; | 98 | static int sysctl_ip_vs_debug_level = 0; |
@@ -1536,6 +1536,25 @@ proc_do_sync_threshold(ctl_table *table, int write, | |||
1536 | return rc; | 1536 | return rc; |
1537 | } | 1537 | } |
1538 | 1538 | ||
1539 | static int | ||
1540 | proc_do_sync_mode(ctl_table *table, int write, | ||
1541 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1542 | { | ||
1543 | int *valp = table->data; | ||
1544 | int val = *valp; | ||
1545 | int rc; | ||
1546 | |||
1547 | rc = proc_dointvec(table, write, buffer, lenp, ppos); | ||
1548 | if (write && (*valp != val)) { | ||
1549 | if ((*valp < 0) || (*valp > 1)) { | ||
1550 | /* Restore the correct value */ | ||
1551 | *valp = val; | ||
1552 | } else { | ||
1553 | ip_vs_sync_switch_mode(val); | ||
1554 | } | ||
1555 | } | ||
1556 | return rc; | ||
1557 | } | ||
1539 | 1558 | ||
1540 | /* | 1559 | /* |
1541 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) | 1560 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) |
@@ -1602,6 +1621,13 @@ static struct ctl_table vs_vars[] = { | |||
1602 | .mode = 0644, | 1621 | .mode = 0644, |
1603 | .proc_handler = &proc_dointvec, | 1622 | .proc_handler = &proc_dointvec, |
1604 | }, | 1623 | }, |
1624 | { | ||
1625 | .procname = "sync_version", | ||
1626 | .data = &sysctl_ip_vs_sync_ver, | ||
1627 | .maxlen = sizeof(int), | ||
1628 | .mode = 0644, | ||
1629 | .proc_handler = &proc_do_sync_mode, | ||
1630 | }, | ||
1605 | #if 0 | 1631 | #if 0 |
1606 | { | 1632 | { |
1607 | .procname = "timeout_established", | 1633 | .procname = "timeout_established", |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index df5abf0e25af..c1c167ab73ee 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -5,6 +5,18 @@ | |||
5 | * high-performance and highly available server based on a | 5 | * high-performance and highly available server based on a |
6 | * cluster of servers. | 6 | * cluster of servers. |
7 | * | 7 | * |
8 | * Version 1, is capable of handling both version 0 and 1 messages. | ||
9 | * Version 0 is the plain old format. | ||
10 | * Note Version 0 receivers will just drop Ver 1 messages. | ||
11 | * Version 1 is capable of handle IPv6, Persistence data, | ||
12 | * time-outs, and firewall marks. | ||
13 | * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. | ||
14 | * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 | ||
15 | * | ||
16 | * Definitions Message: is a complete datagram | ||
17 | * Sync_conn: is a part of a Message | ||
18 | * Param Data is an option to a Sync_conn. | ||
19 | * | ||
8 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | 20 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
9 | * | 21 | * |
10 | * ip_vs_sync: sync connection info from master load balancer to backups | 22 | * ip_vs_sync: sync connection info from master load balancer to backups |
@@ -15,6 +27,8 @@ | |||
15 | * Alexandre Cassen : Added SyncID support for incoming sync | 27 | * Alexandre Cassen : Added SyncID support for incoming sync |
16 | * messages filtering. | 28 | * messages filtering. |
17 | * Justin Ossevoort : Fix endian problem on sync message size. | 29 | * Justin Ossevoort : Fix endian problem on sync message size. |
30 | * Hans Schillstrom : Added Version 1: i.e. IPv6, | ||
31 | * Persistence support, fwmark and time-out. | ||
18 | */ | 32 | */ |
19 | 33 | ||
20 | #define KMSG_COMPONENT "IPVS" | 34 | #define KMSG_COMPONENT "IPVS" |
@@ -392,6 +406,121 @@ get_curr_sync_buff(unsigned long time) | |||
392 | } | 406 | } |
393 | 407 | ||
394 | /* | 408 | /* |
409 | * Switch mode from sending version 0 or 1 | ||
410 | * - must handle sync_buf | ||
411 | */ | ||
412 | void ip_vs_sync_switch_mode(int mode) { | ||
413 | |||
414 | if (!ip_vs_sync_state & IP_VS_STATE_MASTER) | ||
415 | return; | ||
416 | if (mode == sysctl_ip_vs_sync_ver || !curr_sb) | ||
417 | return; | ||
418 | |||
419 | spin_lock_bh(&curr_sb_lock); | ||
420 | /* Buffer empty ? then let buf_create do the job */ | ||
421 | if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { | ||
422 | kfree(curr_sb); | ||
423 | curr_sb = NULL; | ||
424 | } else { | ||
425 | spin_lock_bh(&ip_vs_sync_lock); | ||
426 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) | ||
427 | list_add_tail(&curr_sb->list, &ip_vs_sync_queue); | ||
428 | else | ||
429 | ip_vs_sync_buff_release(curr_sb); | ||
430 | spin_unlock_bh(&ip_vs_sync_lock); | ||
431 | } | ||
432 | spin_unlock_bh(&curr_sb_lock); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Create a new sync buffer for Version 0 proto. | ||
437 | */ | ||
438 | static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) | ||
439 | { | ||
440 | struct ip_vs_sync_buff *sb; | ||
441 | struct ip_vs_sync_mesg_v0 *mesg; | ||
442 | |||
443 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | ||
444 | return NULL; | ||
445 | |||
446 | if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { | ||
447 | kfree(sb); | ||
448 | return NULL; | ||
449 | } | ||
450 | mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; | ||
451 | mesg->nr_conns = 0; | ||
452 | mesg->syncid = ip_vs_master_syncid; | ||
453 | mesg->size = 4; | ||
454 | sb->head = (unsigned char *)mesg + 4; | ||
455 | sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen; | ||
456 | sb->firstuse = jiffies; | ||
457 | return sb; | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Version 0 , could be switched in by sys_ctl. | ||
462 | * Add an ip_vs_conn information into the current sync_buff. | ||
463 | */ | ||
464 | void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) | ||
465 | { | ||
466 | struct ip_vs_sync_mesg_v0 *m; | ||
467 | struct ip_vs_sync_conn_v0 *s; | ||
468 | int len; | ||
469 | |||
470 | if (unlikely(cp->af != AF_INET)) | ||
471 | return; | ||
472 | /* Do not sync ONE PACKET */ | ||
473 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | ||
474 | return; | ||
475 | |||
476 | spin_lock(&curr_sb_lock); | ||
477 | if (!curr_sb) { | ||
478 | if (!(curr_sb=ip_vs_sync_buff_create_v0())) { | ||
479 | spin_unlock(&curr_sb_lock); | ||
480 | pr_err("ip_vs_sync_buff_create failed.\n"); | ||
481 | return; | ||
482 | } | ||
483 | } | ||
484 | |||
485 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | ||
486 | SIMPLE_CONN_SIZE; | ||
487 | m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg; | ||
488 | s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; | ||
489 | |||
490 | /* copy members */ | ||
491 | s->reserved = 0; | ||
492 | s->protocol = cp->protocol; | ||
493 | s->cport = cp->cport; | ||
494 | s->vport = cp->vport; | ||
495 | s->dport = cp->dport; | ||
496 | s->caddr = cp->caddr.ip; | ||
497 | s->vaddr = cp->vaddr.ip; | ||
498 | s->daddr = cp->daddr.ip; | ||
499 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); | ||
500 | s->state = htons(cp->state); | ||
501 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | ||
502 | struct ip_vs_sync_conn_options *opt = | ||
503 | (struct ip_vs_sync_conn_options *)&s[1]; | ||
504 | memcpy(opt, &cp->in_seq, sizeof(*opt)); | ||
505 | } | ||
506 | |||
507 | m->nr_conns++; | ||
508 | m->size += len; | ||
509 | curr_sb->head += len; | ||
510 | |||
511 | /* check if there is a space for next one */ | ||
512 | if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) { | ||
513 | sb_queue_tail(curr_sb); | ||
514 | curr_sb = NULL; | ||
515 | } | ||
516 | spin_unlock(&curr_sb_lock); | ||
517 | |||
518 | /* synchronize its controller if it has */ | ||
519 | if (cp->control) | ||
520 | ip_vs_sync_conn(cp->control); | ||
521 | } | ||
522 | |||
523 | /* | ||
395 | * Add an ip_vs_conn information into the current sync_buff. | 524 | * Add an ip_vs_conn information into the current sync_buff. |
396 | * Called by ip_vs_in. | 525 | * Called by ip_vs_in. |
397 | * Sending Version 1 messages | 526 | * Sending Version 1 messages |
@@ -403,6 +532,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
403 | __u8 *p; | 532 | __u8 *p; |
404 | unsigned int len, pe_name_len, pad; | 533 | unsigned int len, pe_name_len, pad; |
405 | 534 | ||
535 | /* Handle old version of the protocol */ | ||
536 | if (sysctl_ip_vs_sync_ver == 0) { | ||
537 | ip_vs_sync_conn_v0(cp); | ||
538 | return; | ||
539 | } | ||
406 | /* Do not sync ONE PACKET */ | 540 | /* Do not sync ONE PACKET */ |
407 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | 541 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
408 | goto control; | 542 | goto control; |