aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/rcom.c
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2012-04-26 16:54:29 -0400
committerDavid Teigland <teigland@redhat.com>2012-05-02 15:15:27 -0400
commit4875647a08e35f77274838d97ca8fa44158d50e2 (patch)
treebf8a39eaf3219af5d661ed3e347545306fd84bda /fs/dlm/rcom.c
parent6d40c4a708e0e996fd9c60d4093aebba5fe1f749 (diff)
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead of using the resource directory) has always been highly experimental, and never seriously used. This commit fixes a number of problems, making nodir much more usable. - Major change to recovery: recover all locks and restart all in-progress operations after recovery. In some cases it's not possible to know which in-progess locks to recover, so recover all. (Most require recovery in nodir mode anyway since rehashing changes most master nodes.) - Change the way nodir mode is enabled, from a command line mount arg passed through gfs2, into a sysfs file managed by dlm_controld, consistent with the other config settings. - Allow recovering MSTCPY locks on an rsb that has not yet been turned into a master copy. - Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages from a previous, aborted recovery cycle. Base this on the local recovery status not being in the state where any nodes should be sending LOCK messages for the current recovery cycle. - Hold rsb lock around dlm_purge_mstcpy_locks() because it may run concurrently with dlm_recover_master_copy(). - Maintain highbast on process-copy lkb's (in addition to the master as is usual), because the lkb can switch back and forth between being a master and being a process copy as the master node changes in recovery. - When recovering MSTCPY locks, flag rsb's that have non-empty convert or waiting queues for granting at the end of recovery. (Rename flag from LOCKS_PURGED to RECOVER_GRANT and similar for the recovery function, because it's not only resources with purged locks that need grant a grant attempt.) - Replace a couple of unnecessary assertion panics with error messages. Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/rcom.c')
-rw-r--r--fs/dlm/rcom.c23
1 files changed, 17 insertions, 6 deletions
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6565fd5e28ef..64d3e2b958c7 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -492,30 +492,41 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
492void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) 492void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
493{ 493{
494 int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); 494 int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock);
495 int stop, reply = 0; 495 int stop, reply = 0, lock = 0;
496 uint32_t status;
496 uint64_t seq; 497 uint64_t seq;
497 498
498 switch (rc->rc_type) { 499 switch (rc->rc_type) {
500 case DLM_RCOM_LOCK:
501 lock = 1;
502 break;
503 case DLM_RCOM_LOCK_REPLY:
504 lock = 1;
505 reply = 1;
506 break;
499 case DLM_RCOM_STATUS_REPLY: 507 case DLM_RCOM_STATUS_REPLY:
500 case DLM_RCOM_NAMES_REPLY: 508 case DLM_RCOM_NAMES_REPLY:
501 case DLM_RCOM_LOOKUP_REPLY: 509 case DLM_RCOM_LOOKUP_REPLY:
502 case DLM_RCOM_LOCK_REPLY:
503 reply = 1; 510 reply = 1;
504 }; 511 };
505 512
506 spin_lock(&ls->ls_recover_lock); 513 spin_lock(&ls->ls_recover_lock);
514 status = ls->ls_recover_status;
507 stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 515 stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
508 seq = ls->ls_recover_seq; 516 seq = ls->ls_recover_seq;
509 spin_unlock(&ls->ls_recover_lock); 517 spin_unlock(&ls->ls_recover_lock);
510 518
511 if ((stop && (rc->rc_type != DLM_RCOM_STATUS)) || 519 if ((stop && (rc->rc_type != DLM_RCOM_STATUS)) ||
512 (reply && (rc->rc_seq_reply != seq))) { 520 (reply && (rc->rc_seq_reply != seq)) ||
521 (lock && !(status & DLM_RS_DIR))) {
513 log_limit(ls, "dlm_receive_rcom ignore msg %d " 522 log_limit(ls, "dlm_receive_rcom ignore msg %d "
514 "from %d %llu %llu seq %llu", 523 "from %d %llu %llu recover seq %llu sts %x gen %u",
515 rc->rc_type, nodeid, 524 rc->rc_type,
525 nodeid,
516 (unsigned long long)rc->rc_seq, 526 (unsigned long long)rc->rc_seq,
517 (unsigned long long)rc->rc_seq_reply, 527 (unsigned long long)rc->rc_seq_reply,
518 (unsigned long long)seq); 528 (unsigned long long)seq,
529 status, ls->ls_generation);
519 goto out; 530 goto out;
520 } 531 }
521 532