diff options
author | Hans Schillstrom <hans.schillstrom@ericsson.com> | 2010-11-19 08:25:13 -0500 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2010-11-24 20:42:59 -0500 |
commit | 986a075795339c5ea1122ce9290dfd5504252eb0 (patch) | |
tree | dc908479b0259f311892a9e46d865207ce0dbedc | |
parent | fe5e7a1efb664df0280f10377813d7099fb7eb0f (diff) |
IPVS: Backup, Change sending to Version 1 format
Enable sending and removal of version 0 sending
Affected functions,
ip_vs_sync_buff_create()
ip_vs_sync_conn()
ip_vs_core.c removal of IPv4 check.
*v5
Just check cp->pe_data_len in ip_vs_sync_conn
Check if padding needed before adding a new sync_conn
to the buffer, i.e. avoid sending padding at the end.
*v4
moved sanity check and pe_name_len after sloop.
use cp->pe instead of cp->dest->svc->pe
real length in each sync_conn, not padded length
however total size of a sync_msg includes padding.
*v3
Sending ip_vs_sync_conn_options in network order.
Sending Templates for ONE_PACKET conn.
Renaming of ip_vs_sync_mesg to ip_vs_sync_mesg_v0
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
-rw-r--r-- | include/net/ip_vs.h | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 13 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 189 |
3 files changed, 156 insertions, 48 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4069484df7bb..a715f3db179a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -919,7 +919,7 @@ extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | |||
919 | extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 919 | extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; |
920 | extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); | 920 | extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); |
921 | extern int stop_sync_thread(int state); | 921 | extern int stop_sync_thread(int state); |
922 | extern void ip_vs_sync_conn(const struct ip_vs_conn *cp); | 922 | extern void ip_vs_sync_conn(struct ip_vs_conn *cp); |
923 | 923 | ||
924 | 924 | ||
925 | /* | 925 | /* |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3445da6e8c95..5287771d0647 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -1560,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1560 | * | 1560 | * |
1561 | * Sync connection if it is about to close to | 1561 | * Sync connection if it is about to close to |
1562 | * encorage the standby servers to update the connections timeout | 1562 | * encorage the standby servers to update the connections timeout |
1563 | * | ||
1564 | * For ONE_PKT let ip_vs_sync_conn() do the filter work. | ||
1563 | */ | 1565 | */ |
1564 | pkts = atomic_add_return(1, &cp->in_pkts); | 1566 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
1565 | if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && | 1567 | pkts = sysctl_ip_vs_sync_threshold[0]; |
1568 | else | ||
1569 | pkts = atomic_add_return(1, &cp->in_pkts); | ||
1570 | |||
1571 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && | ||
1566 | cp->protocol == IPPROTO_SCTP) { | 1572 | cp->protocol == IPPROTO_SCTP) { |
1567 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && | 1573 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && |
1568 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1574 | (pkts % sysctl_ip_vs_sync_threshold[1] |
@@ -1577,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1577 | } | 1583 | } |
1578 | 1584 | ||
1579 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ | 1585 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ |
1580 | else if (af == AF_INET && | 1586 | else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && |
1581 | (ip_vs_sync_state & IP_VS_STATE_MASTER) && | ||
1582 | (((cp->protocol != IPPROTO_TCP || | 1587 | (((cp->protocol != IPPROTO_TCP || |
1583 | cp->state == IP_VS_TCP_S_ESTABLISHED) && | 1588 | cp->state == IP_VS_TCP_S_ESTABLISHED) && |
1584 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1589 | (pkts % sysctl_ip_vs_sync_threshold[1] |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e071508901d1..df5abf0e25af 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -226,7 +226,7 @@ struct ip_vs_sync_thread_data { | |||
226 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ | 226 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ |
227 | 227 | ||
228 | /* Version 0 header */ | 228 | /* Version 0 header */ |
229 | struct ip_vs_sync_mesg { | 229 | struct ip_vs_sync_mesg_v0 { |
230 | __u8 nr_conns; | 230 | __u8 nr_conns; |
231 | __u8 syncid; | 231 | __u8 syncid; |
232 | __u16 size; | 232 | __u16 size; |
@@ -235,7 +235,7 @@ struct ip_vs_sync_mesg { | |||
235 | }; | 235 | }; |
236 | 236 | ||
237 | /* Version 1 header */ | 237 | /* Version 1 header */ |
238 | struct ip_vs_sync_mesg_v2 { | 238 | struct ip_vs_sync_mesg { |
239 | __u8 reserved; /* must be zero */ | 239 | __u8 reserved; /* must be zero */ |
240 | __u8 syncid; | 240 | __u8 syncid; |
241 | __u16 size; | 241 | __u16 size; |
@@ -299,6 +299,17 @@ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) | |||
299 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); | 299 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); |
300 | } | 300 | } |
301 | 301 | ||
302 | /* | ||
303 | * Copy of struct ip_vs_seq | ||
304 | * From Aligned host order to unaligned network order | ||
305 | */ | ||
306 | static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) | ||
307 | { | ||
308 | put_unaligned_be32(ho->init_seq, &no->init_seq); | ||
309 | put_unaligned_be32(ho->delta, &no->delta); | ||
310 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); | ||
311 | } | ||
312 | |||
302 | static inline struct ip_vs_sync_buff *sb_dequeue(void) | 313 | static inline struct ip_vs_sync_buff *sb_dequeue(void) |
303 | { | 314 | { |
304 | struct ip_vs_sync_buff *sb; | 315 | struct ip_vs_sync_buff *sb; |
@@ -317,6 +328,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void) | |||
317 | return sb; | 328 | return sb; |
318 | } | 329 | } |
319 | 330 | ||
331 | /* | ||
332 | * Create a new sync buffer for Version 1 proto. | ||
333 | */ | ||
320 | static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) | 334 | static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) |
321 | { | 335 | { |
322 | struct ip_vs_sync_buff *sb; | 336 | struct ip_vs_sync_buff *sb; |
@@ -328,11 +342,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) | |||
328 | kfree(sb); | 342 | kfree(sb); |
329 | return NULL; | 343 | return NULL; |
330 | } | 344 | } |
331 | sb->mesg->nr_conns = 0; | 345 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ |
346 | sb->mesg->version = SYNC_PROTO_VER; | ||
332 | sb->mesg->syncid = ip_vs_master_syncid; | 347 | sb->mesg->syncid = ip_vs_master_syncid; |
333 | sb->mesg->size = 4; | 348 | sb->mesg->size = sizeof(struct ip_vs_sync_mesg); |
334 | sb->head = (unsigned char *)sb->mesg + 4; | 349 | sb->mesg->nr_conns = 0; |
350 | sb->mesg->spare = 0; | ||
351 | sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); | ||
335 | sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; | 352 | sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; |
353 | |||
336 | sb->firstuse = jiffies; | 354 | sb->firstuse = jiffies; |
337 | return sb; | 355 | return sb; |
338 | } | 356 | } |
@@ -373,18 +391,60 @@ get_curr_sync_buff(unsigned long time) | |||
373 | return sb; | 391 | return sb; |
374 | } | 392 | } |
375 | 393 | ||
376 | |||
377 | /* | 394 | /* |
378 | * Add an ip_vs_conn information into the current sync_buff. | 395 | * Add an ip_vs_conn information into the current sync_buff. |
379 | * Called by ip_vs_in. | 396 | * Called by ip_vs_in. |
397 | * Sending Version 1 messages | ||
380 | */ | 398 | */ |
381 | void ip_vs_sync_conn(const struct ip_vs_conn *cp) | 399 | void ip_vs_sync_conn(struct ip_vs_conn *cp) |
382 | { | 400 | { |
383 | struct ip_vs_sync_mesg *m; | 401 | struct ip_vs_sync_mesg *m; |
384 | struct ip_vs_sync_conn_v0 *s; | 402 | union ip_vs_sync_conn *s; |
385 | int len; | 403 | __u8 *p; |
404 | unsigned int len, pe_name_len, pad; | ||
405 | |||
406 | /* Do not sync ONE PACKET */ | ||
407 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | ||
408 | goto control; | ||
409 | sloop: | ||
410 | /* Sanity checks */ | ||
411 | pe_name_len = 0; | ||
412 | if (cp->pe_data_len) { | ||
413 | if (!cp->pe_data || !cp->dest) { | ||
414 | IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); | ||
415 | return; | ||
416 | } | ||
417 | pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); | ||
418 | } | ||
386 | 419 | ||
387 | spin_lock(&curr_sb_lock); | 420 | spin_lock(&curr_sb_lock); |
421 | |||
422 | #ifdef CONFIG_IP_VS_IPV6 | ||
423 | if (cp->af == AF_INET6) | ||
424 | len = sizeof(struct ip_vs_sync_v6); | ||
425 | else | ||
426 | #endif | ||
427 | len = sizeof(struct ip_vs_sync_v4); | ||
428 | |||
429 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) | ||
430 | len += sizeof(struct ip_vs_sync_conn_options) + 2; | ||
431 | |||
432 | if (cp->pe_data_len) | ||
433 | len += cp->pe_data_len + 2; /* + Param hdr field */ | ||
434 | if (pe_name_len) | ||
435 | len += pe_name_len + 2; | ||
436 | |||
437 | /* check if there is a space for this one */ | ||
438 | pad = 0; | ||
439 | if (curr_sb) { | ||
440 | pad = (4 - (size_t)curr_sb->head) & 3; | ||
441 | if (curr_sb->head + len + pad > curr_sb->end) { | ||
442 | sb_queue_tail(curr_sb); | ||
443 | curr_sb = NULL; | ||
444 | pad = 0; | ||
445 | } | ||
446 | } | ||
447 | |||
388 | if (!curr_sb) { | 448 | if (!curr_sb) { |
389 | if (!(curr_sb=ip_vs_sync_buff_create())) { | 449 | if (!(curr_sb=ip_vs_sync_buff_create())) { |
390 | spin_unlock(&curr_sb_lock); | 450 | spin_unlock(&curr_sb_lock); |
@@ -393,41 +453,84 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp) | |||
393 | } | 453 | } |
394 | } | 454 | } |
395 | 455 | ||
396 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | ||
397 | SIMPLE_CONN_SIZE; | ||
398 | m = curr_sb->mesg; | 456 | m = curr_sb->mesg; |
399 | s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; | 457 | p = curr_sb->head; |
400 | 458 | curr_sb->head += pad + len; | |
401 | /* copy members */ | 459 | m->size += pad + len; |
402 | s->protocol = cp->protocol; | 460 | /* Add ev. padding from prev. sync_conn */ |
403 | s->cport = cp->cport; | 461 | while (pad--) |
404 | s->vport = cp->vport; | 462 | *(p++) = 0; |
405 | s->dport = cp->dport; | 463 | |
406 | s->caddr = cp->caddr.ip; | 464 | s = (union ip_vs_sync_conn *)p; |
407 | s->vaddr = cp->vaddr.ip; | 465 | |
408 | s->daddr = cp->daddr.ip; | 466 | /* Set message type & copy members */ |
409 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); | 467 | s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); |
410 | s->state = htons(cp->state); | 468 | s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ |
411 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | 469 | s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); |
412 | struct ip_vs_sync_conn_options *opt = | 470 | s->v4.state = htons(cp->state); |
413 | (struct ip_vs_sync_conn_options *)&s[1]; | 471 | s->v4.protocol = cp->protocol; |
414 | memcpy(opt, &cp->in_seq, sizeof(*opt)); | 472 | s->v4.cport = cp->cport; |
415 | } | 473 | s->v4.vport = cp->vport; |
416 | 474 | s->v4.dport = cp->dport; | |
475 | s->v4.fwmark = htonl(cp->fwmark); | ||
476 | s->v4.timeout = htonl(cp->timeout / HZ); | ||
417 | m->nr_conns++; | 477 | m->nr_conns++; |
418 | m->size += len; | ||
419 | curr_sb->head += len; | ||
420 | 478 | ||
421 | /* check if there is a space for next one */ | 479 | #ifdef CONFIG_IP_VS_IPV6 |
422 | if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { | 480 | if (cp->af == AF_INET6) { |
423 | sb_queue_tail(curr_sb); | 481 | p += sizeof(struct ip_vs_sync_v6); |
424 | curr_sb = NULL; | 482 | ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); |
483 | ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); | ||
484 | ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); | ||
485 | } else | ||
486 | #endif | ||
487 | { | ||
488 | p += sizeof(struct ip_vs_sync_v4); /* options ptr */ | ||
489 | s->v4.caddr = cp->caddr.ip; | ||
490 | s->v4.vaddr = cp->vaddr.ip; | ||
491 | s->v4.daddr = cp->daddr.ip; | ||
492 | } | ||
493 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | ||
494 | *(p++) = IPVS_OPT_SEQ_DATA; | ||
495 | *(p++) = sizeof(struct ip_vs_sync_conn_options); | ||
496 | hton_seq((struct ip_vs_seq *)p, &cp->in_seq); | ||
497 | p += sizeof(struct ip_vs_seq); | ||
498 | hton_seq((struct ip_vs_seq *)p, &cp->out_seq); | ||
499 | p += sizeof(struct ip_vs_seq); | ||
425 | } | 500 | } |
501 | /* Handle pe data */ | ||
502 | if (cp->pe_data_len && cp->pe_data) { | ||
503 | *(p++) = IPVS_OPT_PE_DATA; | ||
504 | *(p++) = cp->pe_data_len; | ||
505 | memcpy(p, cp->pe_data, cp->pe_data_len); | ||
506 | p += cp->pe_data_len; | ||
507 | if (pe_name_len) { | ||
508 | /* Add PE_NAME */ | ||
509 | *(p++) = IPVS_OPT_PE_NAME; | ||
510 | *(p++) = pe_name_len; | ||
511 | memcpy(p, cp->pe->name, pe_name_len); | ||
512 | p += pe_name_len; | ||
513 | } | ||
514 | } | ||
515 | |||
426 | spin_unlock(&curr_sb_lock); | 516 | spin_unlock(&curr_sb_lock); |
427 | 517 | ||
518 | control: | ||
428 | /* synchronize its controller if it has */ | 519 | /* synchronize its controller if it has */ |
429 | if (cp->control) | 520 | cp = cp->control; |
430 | ip_vs_sync_conn(cp->control); | 521 | if (!cp) |
522 | return; | ||
523 | /* | ||
524 | * Reduce sync rate for templates | ||
525 | * i.e only increment in_pkts for Templates. | ||
526 | */ | ||
527 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) { | ||
528 | int pkts = atomic_add_return(1, &cp->in_pkts); | ||
529 | |||
530 | if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) | ||
531 | return; | ||
532 | } | ||
533 | goto sloop; | ||
431 | } | 534 | } |
432 | 535 | ||
433 | /* | 536 | /* |
@@ -596,7 +699,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, | |||
596 | */ | 699 | */ |
597 | static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) | 700 | static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) |
598 | { | 701 | { |
599 | struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; | 702 | struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; |
600 | struct ip_vs_sync_conn_v0 *s; | 703 | struct ip_vs_sync_conn_v0 *s; |
601 | struct ip_vs_sync_conn_options *opt; | 704 | struct ip_vs_sync_conn_options *opt; |
602 | struct ip_vs_protocol *pp; | 705 | struct ip_vs_protocol *pp; |
@@ -604,7 +707,7 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) | |||
604 | char *p; | 707 | char *p; |
605 | int i; | 708 | int i; |
606 | 709 | ||
607 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | 710 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); |
608 | for (i=0; i<m->nr_conns; i++) { | 711 | for (i=0; i<m->nr_conns; i++) { |
609 | unsigned flags, state; | 712 | unsigned flags, state; |
610 | 713 | ||
@@ -848,11 +951,11 @@ out: | |||
848 | */ | 951 | */ |
849 | static void ip_vs_process_message(__u8 *buffer, const size_t buflen) | 952 | static void ip_vs_process_message(__u8 *buffer, const size_t buflen) |
850 | { | 953 | { |
851 | struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer; | 954 | struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; |
852 | __u8 *p, *msg_end; | 955 | __u8 *p, *msg_end; |
853 | unsigned int i, nr_conns; | 956 | int i, nr_conns; |
854 | 957 | ||
855 | if (buflen < sizeof(struct ip_vs_sync_mesg)) { | 958 | if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { |
856 | IP_VS_DBG(2, "BACKUP, message header too short\n"); | 959 | IP_VS_DBG(2, "BACKUP, message header too short\n"); |
857 | return; | 960 | return; |
858 | } | 961 | } |
@@ -872,7 +975,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) | |||
872 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) | 975 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) |
873 | && (m2->spare == 0)) { | 976 | && (m2->spare == 0)) { |
874 | 977 | ||
875 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2); | 978 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg); |
876 | nr_conns = m2->nr_conns; | 979 | nr_conns = m2->nr_conns; |
877 | 980 | ||
878 | for (i=0; i<nr_conns; i++) { | 981 | for (i=0; i<nr_conns; i++) { |