aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/rcom.c
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2007-09-27 16:53:38 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2007-10-10 03:56:38 -0400
commitc36258b5925e6cf6bf72904635100593573bfcff (patch)
tree565f1ce29a7f8a2cd1c25f2d36c932727adbdbc2 /fs/dlm/rcom.c
parentb434eda6fda5bcdcc2dd918e5ffbf7184f2d4e17 (diff)
[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv threads while working in the dlm. This allows dlm_recv activity to be suspended when the lockspace transitions to, from and between recovery cycles. The specific bug prompting this change is one where an in-progress recovery cycle is aborted by a new recovery cycle. While dlm_recv was processing a recovery message, the recovery cycle was aborted and dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count on an rsb after dlm_recoverd had reset it to zero. This is fixed by suspending dlm_recv (taking write lock on the rwsem) before aborting the current recovery. The transitions to/from normal and recovery modes are simplified by using this new ability to block dlm_recv. The switch from normal to recovery mode means dlm_recv goes from processing locking messages, to saving them for later, and vice versa. Races are avoided by blocking dlm_recv when setting the flag that switches between modes. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/rcom.c')
-rw-r--r--fs/dlm/rcom.c36
1 files changed, 8 insertions, 28 deletions
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 188b91c027e4..ae2fd97fa4ad 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389/* If the lockspace doesn't exist then still send a status message
390 back; it's possible that it just doesn't have its global_id yet. */
391
392int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
390{ 393{
391 struct dlm_rcom *rc; 394 struct dlm_rcom *rc;
392 struct rcom_config *rf; 395 struct rcom_config *rf;
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
446 return rv; 449 return rv;
447} 450}
448 451
449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 452/* Called by dlm_recv; corresponds to dlm_receive_message() but special
450 recovery-only comms are sent through here. */ 453 recovery-only comms are sent through here. */
451 454
452void dlm_receive_rcom(struct dlm_header *hd, int nodeid) 455void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
453{ 456{
454 struct dlm_rcom *rc = (struct dlm_rcom *) hd;
455 struct dlm_ls *ls;
456
457 dlm_rcom_in(rc);
458
459 /* If the lockspace doesn't exist then still send a status message
460 back; it's possible that it just doesn't have its global_id yet. */
461
462 ls = dlm_find_lockspace_global(hd->h_lockspace);
463 if (!ls) {
464 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc);
468 return;
469 }
470
471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 457 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
472 log_debug(ls, "ignoring recovery message %x from %d", 458 log_debug(ls, "ignoring recovery message %x from %d",
473 rc->rc_type, nodeid); 459 rc->rc_type, nodeid);
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
477 if (is_old_reply(ls, rc)) 463 if (is_old_reply(ls, rc))
478 goto out; 464 goto out;
479 465
480 if (nodeid != rc->rc_header.h_nodeid) {
481 log_error(ls, "bad rcom nodeid %d from %d",
482 rc->rc_header.h_nodeid, nodeid);
483 goto out;
484 }
485
486 switch (rc->rc_type) { 466 switch (rc->rc_type) {
487 case DLM_RCOM_STATUS: 467 case DLM_RCOM_STATUS:
488 receive_rcom_status(ls, rc); 468 receive_rcom_status(ls, rc);
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
520 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); 500 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
521 } 501 }
522 out: 502 out:
523 dlm_put_lockspace(ls); 503 return;
524} 504}
525 505