diff options
author | David Teigland <teigland@redhat.com> | 2007-09-27 16:53:38 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:56:38 -0400 |
commit | c36258b5925e6cf6bf72904635100593573bfcff (patch) | |
tree | 565f1ce29a7f8a2cd1c25f2d36c932727adbdbc2 /fs/dlm/rcom.c | |
parent | b434eda6fda5bcdcc2dd918e5ffbf7184f2d4e17 (diff) |
[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv
threads while working in the dlm. This allows dlm_recv activity to be
suspended when the lockspace transitions to, from and between recovery
cycles.
The specific bug prompting this change is one where an in-progress
recovery cycle is aborted by a new recovery cycle. While dlm_recv was
processing a recovery message, the recovery cycle was aborted and
dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count
on an rsb after dlm_recoverd had reset it to zero. This is fixed by
suspending dlm_recv (taking write lock on the rwsem) before aborting the
current recovery.
The transitions to/from normal and recovery modes are simplified by using
this new ability to block dlm_recv. The switch from normal to recovery
mode means dlm_recv goes from processing locking messages, to saving them
for later, and vice versa. Races are avoided by blocking dlm_recv when
setting the flag that switches between modes.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/rcom.c')
-rw-r--r-- | fs/dlm/rcom.c | 36 |
1 files changed, 8 insertions, 28 deletions
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 188b91c027e4..ae2fd97fa4ad 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
386 | dlm_recover_process_copy(ls, rc_in); | 386 | dlm_recover_process_copy(ls, rc_in); |
387 | } | 387 | } |
388 | 388 | ||
389 | static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | 389 | /* If the lockspace doesn't exist then still send a status message |
390 | back; it's possible that it just doesn't have its global_id yet. */ | ||
391 | |||
392 | int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | ||
390 | { | 393 | { |
391 | struct dlm_rcom *rc; | 394 | struct dlm_rcom *rc; |
392 | struct rcom_config *rf; | 395 | struct rcom_config *rf; |
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
446 | return rv; | 449 | return rv; |
447 | } | 450 | } |
448 | 451 | ||
449 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special | 452 | /* Called by dlm_recv; corresponds to dlm_receive_message() but special |
450 | recovery-only comms are sent through here. */ | 453 | recovery-only comms are sent through here. */ |
451 | 454 | ||
452 | void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | 455 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
453 | { | 456 | { |
454 | struct dlm_rcom *rc = (struct dlm_rcom *) hd; | ||
455 | struct dlm_ls *ls; | ||
456 | |||
457 | dlm_rcom_in(rc); | ||
458 | |||
459 | /* If the lockspace doesn't exist then still send a status message | ||
460 | back; it's possible that it just doesn't have its global_id yet. */ | ||
461 | |||
462 | ls = dlm_find_lockspace_global(hd->h_lockspace); | ||
463 | if (!ls) { | ||
464 | log_print("lockspace %x from %d type %x not found", | ||
465 | hd->h_lockspace, nodeid, rc->rc_type); | ||
466 | if (rc->rc_type == DLM_RCOM_STATUS) | ||
467 | send_ls_not_ready(nodeid, rc); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { | 457 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { |
472 | log_debug(ls, "ignoring recovery message %x from %d", | 458 | log_debug(ls, "ignoring recovery message %x from %d", |
473 | rc->rc_type, nodeid); | 459 | rc->rc_type, nodeid); |
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | |||
477 | if (is_old_reply(ls, rc)) | 463 | if (is_old_reply(ls, rc)) |
478 | goto out; | 464 | goto out; |
479 | 465 | ||
480 | if (nodeid != rc->rc_header.h_nodeid) { | ||
481 | log_error(ls, "bad rcom nodeid %d from %d", | ||
482 | rc->rc_header.h_nodeid, nodeid); | ||
483 | goto out; | ||
484 | } | ||
485 | |||
486 | switch (rc->rc_type) { | 466 | switch (rc->rc_type) { |
487 | case DLM_RCOM_STATUS: | 467 | case DLM_RCOM_STATUS: |
488 | receive_rcom_status(ls, rc); | 468 | receive_rcom_status(ls, rc); |
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | |||
520 | DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); | 500 | DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); |
521 | } | 501 | } |
522 | out: | 502 | out: |
523 | dlm_put_lockspace(ls); | 503 | return; |
524 | } | 504 | } |
525 | 505 | ||