aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Schillstrom <hans.schillstrom@ericsson.com>2010-11-19 08:25:13 -0500
committerSimon Horman <horms@verge.net.au>2010-11-24 20:42:59 -0500
commit986a075795339c5ea1122ce9290dfd5504252eb0 (patch)
treedc908479b0259f311892a9e46d865207ce0dbedc
parentfe5e7a1efb664df0280f10377813d7099fb7eb0f (diff)
IPVS: Backup, Change sending to Version 1 format
Enable sending and removal of version 0 sending Affected functions, ip_vs_sync_buff_create() ip_vs_sync_conn() ip_vs_core.c removal of IPv4 check. *v5 Just check cp->pe_data_len in ip_vs_sync_conn Check if padding needed before adding a new sync_conn to the buffer, i.e. avoid sending padding at the end. *v4 moved sanity check and pe_name_len after sloop. use cp->pe instead of cp->dest->svc->pe real length in each sync_conn, not padded length however total size of a sync_msg includes padding. *v3 Sending ip_vs_sync_conn_options in network order. Sending Templates for ONE_PACKET conn. Renaming of ip_vs_sync_mesg to ip_vs_sync_mesg_v0 Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com> Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
-rw-r--r--include/net/ip_vs.h2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c189
3 files changed, 156 insertions, 48 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4069484df7bb..a715f3db179a 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -919,7 +919,7 @@ extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
919extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 919extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
920extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); 920extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
921extern int stop_sync_thread(int state); 921extern int stop_sync_thread(int state);
922extern void ip_vs_sync_conn(const struct ip_vs_conn *cp); 922extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
923 923
924 924
925/* 925/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3445da6e8c95..5287771d0647 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1560,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1560 * 1560 *
1561 * Sync connection if it is about to close to 1561 * Sync connection if it is about to close to
1562 * encorage the standby servers to update the connections timeout 1562 * encorage the standby servers to update the connections timeout
1563 *
1564 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
1563 */ 1565 */
1564 pkts = atomic_add_return(1, &cp->in_pkts); 1566 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1565 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1567 pkts = sysctl_ip_vs_sync_threshold[0];
1568 else
1569 pkts = atomic_add_return(1, &cp->in_pkts);
1570
1571 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1566 cp->protocol == IPPROTO_SCTP) { 1572 cp->protocol == IPPROTO_SCTP) {
1567 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1573 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1568 (pkts % sysctl_ip_vs_sync_threshold[1] 1574 (pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1577,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1577 } 1583 }
1578 1584
1579 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1585 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1580 else if (af == AF_INET && 1586 else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1581 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1582 (((cp->protocol != IPPROTO_TCP || 1587 (((cp->protocol != IPPROTO_TCP ||
1583 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1588 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1584 (pkts % sysctl_ip_vs_sync_threshold[1] 1589 (pkts % sysctl_ip_vs_sync_threshold[1]
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e071508901d1..df5abf0e25af 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -226,7 +226,7 @@ struct ip_vs_sync_thread_data {
226#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 226#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
227 227
228/* Version 0 header */ 228/* Version 0 header */
229struct ip_vs_sync_mesg { 229struct ip_vs_sync_mesg_v0 {
230 __u8 nr_conns; 230 __u8 nr_conns;
231 __u8 syncid; 231 __u8 syncid;
232 __u16 size; 232 __u16 size;
@@ -235,7 +235,7 @@ struct ip_vs_sync_mesg {
235}; 235};
236 236
237/* Version 1 header */ 237/* Version 1 header */
238struct ip_vs_sync_mesg_v2 { 238struct ip_vs_sync_mesg {
239 __u8 reserved; /* must be zero */ 239 __u8 reserved; /* must be zero */
240 __u8 syncid; 240 __u8 syncid;
241 __u16 size; 241 __u16 size;
@@ -299,6 +299,17 @@ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
299 ho->previous_delta = get_unaligned_be32(&no->previous_delta); 299 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
300} 300}
301 301
302/*
303 * Copy of struct ip_vs_seq
304 * From Aligned host order to unaligned network order
305 */
306static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
307{
308 put_unaligned_be32(ho->init_seq, &no->init_seq);
309 put_unaligned_be32(ho->delta, &no->delta);
310 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
311}
312
302static inline struct ip_vs_sync_buff *sb_dequeue(void) 313static inline struct ip_vs_sync_buff *sb_dequeue(void)
303{ 314{
304 struct ip_vs_sync_buff *sb; 315 struct ip_vs_sync_buff *sb;
@@ -317,6 +328,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
317 return sb; 328 return sb;
318} 329}
319 330
331/*
332 * Create a new sync buffer for Version 1 proto.
333 */
320static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 334static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
321{ 335{
322 struct ip_vs_sync_buff *sb; 336 struct ip_vs_sync_buff *sb;
@@ -328,11 +342,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
328 kfree(sb); 342 kfree(sb);
329 return NULL; 343 return NULL;
330 } 344 }
331 sb->mesg->nr_conns = 0; 345 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
346 sb->mesg->version = SYNC_PROTO_VER;
332 sb->mesg->syncid = ip_vs_master_syncid; 347 sb->mesg->syncid = ip_vs_master_syncid;
333 sb->mesg->size = 4; 348 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
334 sb->head = (unsigned char *)sb->mesg + 4; 349 sb->mesg->nr_conns = 0;
350 sb->mesg->spare = 0;
351 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
335 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 352 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
353
336 sb->firstuse = jiffies; 354 sb->firstuse = jiffies;
337 return sb; 355 return sb;
338} 356}
@@ -373,18 +391,60 @@ get_curr_sync_buff(unsigned long time)
373 return sb; 391 return sb;
374} 392}
375 393
376
377/* 394/*
378 * Add an ip_vs_conn information into the current sync_buff. 395 * Add an ip_vs_conn information into the current sync_buff.
379 * Called by ip_vs_in. 396 * Called by ip_vs_in.
397 * Sending Version 1 messages
380 */ 398 */
381void ip_vs_sync_conn(const struct ip_vs_conn *cp) 399void ip_vs_sync_conn(struct ip_vs_conn *cp)
382{ 400{
383 struct ip_vs_sync_mesg *m; 401 struct ip_vs_sync_mesg *m;
384 struct ip_vs_sync_conn_v0 *s; 402 union ip_vs_sync_conn *s;
385 int len; 403 __u8 *p;
404 unsigned int len, pe_name_len, pad;
405
406 /* Do not sync ONE PACKET */
407 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
408 goto control;
409sloop:
410 /* Sanity checks */
411 pe_name_len = 0;
412 if (cp->pe_data_len) {
413 if (!cp->pe_data || !cp->dest) {
414 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
415 return;
416 }
417 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
418 }
386 419
387 spin_lock(&curr_sb_lock); 420 spin_lock(&curr_sb_lock);
421
422#ifdef CONFIG_IP_VS_IPV6
423 if (cp->af == AF_INET6)
424 len = sizeof(struct ip_vs_sync_v6);
425 else
426#endif
427 len = sizeof(struct ip_vs_sync_v4);
428
429 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
430 len += sizeof(struct ip_vs_sync_conn_options) + 2;
431
432 if (cp->pe_data_len)
433 len += cp->pe_data_len + 2; /* + Param hdr field */
434 if (pe_name_len)
435 len += pe_name_len + 2;
436
437 /* check if there is a space for this one */
438 pad = 0;
439 if (curr_sb) {
440 pad = (4 - (size_t)curr_sb->head) & 3;
441 if (curr_sb->head + len + pad > curr_sb->end) {
442 sb_queue_tail(curr_sb);
443 curr_sb = NULL;
444 pad = 0;
445 }
446 }
447
388 if (!curr_sb) { 448 if (!curr_sb) {
389 if (!(curr_sb=ip_vs_sync_buff_create())) { 449 if (!(curr_sb=ip_vs_sync_buff_create())) {
390 spin_unlock(&curr_sb_lock); 450 spin_unlock(&curr_sb_lock);
@@ -393,41 +453,84 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp)
393 } 453 }
394 } 454 }
395 455
396 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
397 SIMPLE_CONN_SIZE;
398 m = curr_sb->mesg; 456 m = curr_sb->mesg;
399 s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; 457 p = curr_sb->head;
400 458 curr_sb->head += pad + len;
401 /* copy members */ 459 m->size += pad + len;
402 s->protocol = cp->protocol; 460 /* Add ev. padding from prev. sync_conn */
403 s->cport = cp->cport; 461 while (pad--)
404 s->vport = cp->vport; 462 *(p++) = 0;
405 s->dport = cp->dport; 463
406 s->caddr = cp->caddr.ip; 464 s = (union ip_vs_sync_conn *)p;
407 s->vaddr = cp->vaddr.ip; 465
408 s->daddr = cp->daddr.ip; 466 /* Set message type & copy members */
409 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); 467 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
410 s->state = htons(cp->state); 468 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
411 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { 469 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
412 struct ip_vs_sync_conn_options *opt = 470 s->v4.state = htons(cp->state);
413 (struct ip_vs_sync_conn_options *)&s[1]; 471 s->v4.protocol = cp->protocol;
414 memcpy(opt, &cp->in_seq, sizeof(*opt)); 472 s->v4.cport = cp->cport;
415 } 473 s->v4.vport = cp->vport;
416 474 s->v4.dport = cp->dport;
475 s->v4.fwmark = htonl(cp->fwmark);
476 s->v4.timeout = htonl(cp->timeout / HZ);
417 m->nr_conns++; 477 m->nr_conns++;
418 m->size += len;
419 curr_sb->head += len;
420 478
421 /* check if there is a space for next one */ 479#ifdef CONFIG_IP_VS_IPV6
422 if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 480 if (cp->af == AF_INET6) {
423 sb_queue_tail(curr_sb); 481 p += sizeof(struct ip_vs_sync_v6);
424 curr_sb = NULL; 482 ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
483 ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
484 ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
485 } else
486#endif
487 {
488 p += sizeof(struct ip_vs_sync_v4); /* options ptr */
489 s->v4.caddr = cp->caddr.ip;
490 s->v4.vaddr = cp->vaddr.ip;
491 s->v4.daddr = cp->daddr.ip;
492 }
493 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
494 *(p++) = IPVS_OPT_SEQ_DATA;
495 *(p++) = sizeof(struct ip_vs_sync_conn_options);
496 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
497 p += sizeof(struct ip_vs_seq);
498 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
499 p += sizeof(struct ip_vs_seq);
425 } 500 }
501 /* Handle pe data */
502 if (cp->pe_data_len && cp->pe_data) {
503 *(p++) = IPVS_OPT_PE_DATA;
504 *(p++) = cp->pe_data_len;
505 memcpy(p, cp->pe_data, cp->pe_data_len);
506 p += cp->pe_data_len;
507 if (pe_name_len) {
508 /* Add PE_NAME */
509 *(p++) = IPVS_OPT_PE_NAME;
510 *(p++) = pe_name_len;
511 memcpy(p, cp->pe->name, pe_name_len);
512 p += pe_name_len;
513 }
514 }
515
426 spin_unlock(&curr_sb_lock); 516 spin_unlock(&curr_sb_lock);
427 517
518control:
428 /* synchronize its controller if it has */ 519 /* synchronize its controller if it has */
429 if (cp->control) 520 cp = cp->control;
430 ip_vs_sync_conn(cp->control); 521 if (!cp)
522 return;
523 /*
524 * Reduce sync rate for templates
525 * i.e only increment in_pkts for Templates.
526 */
527 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
528 int pkts = atomic_add_return(1, &cp->in_pkts);
529
530 if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
531 return;
532 }
533 goto sloop;
431} 534}
432 535
433/* 536/*
@@ -596,7 +699,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
596 */ 699 */
597static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) 700static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
598{ 701{
599 struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; 702 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
600 struct ip_vs_sync_conn_v0 *s; 703 struct ip_vs_sync_conn_v0 *s;
601 struct ip_vs_sync_conn_options *opt; 704 struct ip_vs_sync_conn_options *opt;
602 struct ip_vs_protocol *pp; 705 struct ip_vs_protocol *pp;
@@ -604,7 +707,7 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
604 char *p; 707 char *p;
605 int i; 708 int i;
606 709
607 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 710 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
608 for (i=0; i<m->nr_conns; i++) { 711 for (i=0; i<m->nr_conns; i++) {
609 unsigned flags, state; 712 unsigned flags, state;
610 713
@@ -848,11 +951,11 @@ out:
848 */ 951 */
849static void ip_vs_process_message(__u8 *buffer, const size_t buflen) 952static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
850{ 953{
851 struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer; 954 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
852 __u8 *p, *msg_end; 955 __u8 *p, *msg_end;
853 unsigned int i, nr_conns; 956 int i, nr_conns;
854 957
855 if (buflen < sizeof(struct ip_vs_sync_mesg)) { 958 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
856 IP_VS_DBG(2, "BACKUP, message header too short\n"); 959 IP_VS_DBG(2, "BACKUP, message header too short\n");
857 return; 960 return;
858 } 961 }
@@ -872,7 +975,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
872 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) 975 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
873 && (m2->spare == 0)) { 976 && (m2->spare == 0)) {
874 977
875 msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2); 978 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
876 nr_conns = m2->nr_conns; 979 nr_conns = m2->nr_conns;
877 980
878 for (i=0; i<nr_conns; i++) { 981 for (i=0; i<nr_conns; i++) {