aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRumen G. Bogdanovski <rumen@voicecho.com>2007-11-07 05:35:54 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 07:15:09 -0500
commit1e356f9cdfa885c78791d5d6e5d2baef22f01853 (patch)
tree5ddd3064dd27fcca0ca3538842021cccf8ff74d9
parentc183783e28969e92f3df23f8b7e18d5c3e5bc8dd (diff)
[IPVS]: Bind connections on stanby if the destination exists
This patch fixes the problem with node overload on director fail-over. Given the scenario: 2 nodes each accepting 3 connections at a time and 2 directors, director failover occurs when the nodes are fully loaded (6 connections to the cluster) in this case the new director will assign another 6 connections to the cluster, If the same real servers exist there. The problem turned to be in not binding the inherited connections to the real servers (destinations) on the backup director. Therefore: "ipvsadm -l" reports 0 connections: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 0 -> node484.local:5999 Route 1000 0 0 while "ipvs -lnc" is right root@test2:~# ipvsadm -lnc IPVS connection entries pro expire state source virtual destination TCP 14:56 ESTABLISHED 192.168.0.10:39164 192.168.0.222:5999 192.168.0.51:5999 TCP 14:59 ESTABLISHED 192.168.0.10:39165 192.168.0.222:5999 192.168.0.52:5999 So the patch I am sending fixes the problem by binding the received connections to the appropriate service on the backup director, if it exists, else the connection will be handled the old way. So if the master and the backup directors are synchronized in terms of real services there will be no problem with server over-committing since new connections will not be created on the nonexistent real services on the backup. However if the service is created later on the backup, the binding will be performed when the next connection update is received. With this patch the inherited connections will show as inactive on the backup: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 1 -> node484.local:5999 Route 1000 0 1 rumen@test2:~$ cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP C0A800DE:176F wlc -> C0A80033:176F Route 1000 0 1 -> C0A80032:176F Route 1000 0 1 Regards, Rumen Bogdanovski Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Rumen G. Bogdanovski <rumen@voicecho.com> Signed-off-by: Simon Horman <horms@verge.net.au>
-rw-r--r--include/net/ip_vs.h4
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c19
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c26
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c24
4 files changed, 69 insertions, 4 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 41870564df8e..1fd1ee896f39 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -901,6 +901,10 @@ extern int ip_vs_use_count_inc(void);
901extern void ip_vs_use_count_dec(void); 901extern void ip_vs_use_count_dec(void);
902extern int ip_vs_control_init(void); 902extern int ip_vs_control_init(void);
903extern void ip_vs_control_cleanup(void); 903extern void ip_vs_control_cleanup(void);
904extern struct ip_vs_dest *
905ip_vs_find_dest(__be32 daddr, __be16 dport,
906 __be32 vaddr, __be16 vport, __u16 protocol);
907extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
904 908
905 909
906/* 910/*
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 4b702f708d30..b7eeae622d9b 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -426,6 +426,25 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
426 426
427 427
428/* 428/*
429 * Check if there is a destination for the connection, if so
430 * bind the connection to the destination.
431 */
432struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
433{
434 struct ip_vs_dest *dest;
435
436 if ((cp) && (!cp->dest)) {
437 dest = ip_vs_find_dest(cp->daddr, cp->dport,
438 cp->vaddr, cp->vport, cp->protocol);
439 ip_vs_bind_dest(cp, dest);
440 return dest;
441 } else
442 return NULL;
443}
444EXPORT_SYMBOL(ip_vs_try_bind_dest);
445
446
447/*
429 * Unbind a connection entry with its VS destination 448 * Unbind a connection entry with its VS destination
430 * Called by the ip_vs_conn_expire function. 449 * Called by the ip_vs_conn_expire function.
431 */ 450 */
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 7345fc252a23..3c4d22a468ec 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -579,6 +579,32 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
579 return NULL; 579 return NULL;
580} 580}
581 581
582/*
583 * Find destination by {daddr,dport,vaddr,protocol}
584 * Cretaed to be used in ip_vs_process_message() in
585 * the backup synchronization daemon. It finds the
586 * destination to be bound to the received connection
587 * on the backup.
588 *
589 * ip_vs_lookup_real_service() looked promissing, but
590 * seems not working as expected.
591 */
592struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
593 __be32 vaddr, __be16 vport, __u16 protocol)
594{
595 struct ip_vs_dest *dest;
596 struct ip_vs_service *svc;
597
598 svc = ip_vs_service_get(0, protocol, vaddr, vport);
599 if (!svc)
600 return NULL;
601 dest = ip_vs_lookup_dest(svc, daddr, dport);
602 if (dest)
603 atomic_inc(&dest->refcnt);
604 ip_vs_service_put(svc);
605 return dest;
606}
607EXPORT_SYMBOL(ip_vs_find_dest);
582 608
583/* 609/*
584 * Lookup dest by {svc,addr,port} in the destination trash. 610 * Lookup dest by {svc,addr,port} in the destination trash.
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 0d4d9721cbd4..b1694d67abb9 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
284 struct ip_vs_sync_conn_options *opt; 284 struct ip_vs_sync_conn_options *opt;
285 struct ip_vs_conn *cp; 285 struct ip_vs_conn *cp;
286 struct ip_vs_protocol *pp; 286 struct ip_vs_protocol *pp;
287 struct ip_vs_dest *dest;
287 char *p; 288 char *p;
288 int i; 289 int i;
289 290
@@ -317,20 +318,35 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
317 s->caddr, s->cport, 318 s->caddr, s->cport,
318 s->vaddr, s->vport); 319 s->vaddr, s->vport);
319 if (!cp) { 320 if (!cp) {
321 /*
322 * Find the appropriate destination for the connection.
323 * If it is not found the connection will remain unbound
324 * but still handled.
325 */
326 dest = ip_vs_find_dest(s->daddr, s->dport,
327 s->vaddr, s->vport,
328 s->protocol);
320 cp = ip_vs_conn_new(s->protocol, 329 cp = ip_vs_conn_new(s->protocol,
321 s->caddr, s->cport, 330 s->caddr, s->cport,
322 s->vaddr, s->vport, 331 s->vaddr, s->vport,
323 s->daddr, s->dport, 332 s->daddr, s->dport,
324 flags, NULL); 333 flags, dest);
334 if (dest)
335 atomic_dec(&dest->refcnt);
325 if (!cp) { 336 if (!cp) {
326 IP_VS_ERR("ip_vs_conn_new failed\n"); 337 IP_VS_ERR("ip_vs_conn_new failed\n");
327 return; 338 return;
328 } 339 }
329 cp->state = ntohs(s->state); 340 cp->state = ntohs(s->state);
330 } else if (!cp->dest) { 341 } else if (!cp->dest) {
331 /* it is an entry created by the synchronization */ 342 dest = ip_vs_try_bind_dest(cp);
332 cp->state = ntohs(s->state); 343 if (!dest) {
333 cp->flags = flags | IP_VS_CONN_F_HASHED; 344 /* it is an unbound entry created by
345 * synchronization */
346 cp->state = ntohs(s->state);
347 cp->flags = flags | IP_VS_CONN_F_HASHED;
348 } else
349 atomic_dec(&dest->refcnt);
334 } /* Note that we don't touch its state and flags 350 } /* Note that we don't touch its state and flags
335 if it is a normal entry. */ 351 if it is a normal entry. */
336 352