diff options
author | David Teigland <teigland@redhat.com> | 2012-04-26 16:54:29 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2012-05-02 15:15:27 -0400 |
commit | 4875647a08e35f77274838d97ca8fa44158d50e2 (patch) | |
tree | bf8a39eaf3219af5d661ed3e347545306fd84bda /fs/dlm/rcom.c | |
parent | 6d40c4a708e0e996fd9c60d4093aebba5fe1f749 (diff) |
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/rcom.c')
-rw-r--r-- | fs/dlm/rcom.c | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 6565fd5e28ef..64d3e2b958c7 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -492,30 +492,41 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
492 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | 492 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
493 | { | 493 | { |
494 | int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); | 494 | int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); |
495 | int stop, reply = 0; | 495 | int stop, reply = 0, lock = 0; |
496 | uint32_t status; | ||
496 | uint64_t seq; | 497 | uint64_t seq; |
497 | 498 | ||
498 | switch (rc->rc_type) { | 499 | switch (rc->rc_type) { |
500 | case DLM_RCOM_LOCK: | ||
501 | lock = 1; | ||
502 | break; | ||
503 | case DLM_RCOM_LOCK_REPLY: | ||
504 | lock = 1; | ||
505 | reply = 1; | ||
506 | break; | ||
499 | case DLM_RCOM_STATUS_REPLY: | 507 | case DLM_RCOM_STATUS_REPLY: |
500 | case DLM_RCOM_NAMES_REPLY: | 508 | case DLM_RCOM_NAMES_REPLY: |
501 | case DLM_RCOM_LOOKUP_REPLY: | 509 | case DLM_RCOM_LOOKUP_REPLY: |
502 | case DLM_RCOM_LOCK_REPLY: | ||
503 | reply = 1; | 510 | reply = 1; |
504 | }; | 511 | }; |
505 | 512 | ||
506 | spin_lock(&ls->ls_recover_lock); | 513 | spin_lock(&ls->ls_recover_lock); |
514 | status = ls->ls_recover_status; | ||
507 | stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 515 | stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); |
508 | seq = ls->ls_recover_seq; | 516 | seq = ls->ls_recover_seq; |
509 | spin_unlock(&ls->ls_recover_lock); | 517 | spin_unlock(&ls->ls_recover_lock); |
510 | 518 | ||
511 | if ((stop && (rc->rc_type != DLM_RCOM_STATUS)) || | 519 | if ((stop && (rc->rc_type != DLM_RCOM_STATUS)) || |
512 | (reply && (rc->rc_seq_reply != seq))) { | 520 | (reply && (rc->rc_seq_reply != seq)) || |
521 | (lock && !(status & DLM_RS_DIR))) { | ||
513 | log_limit(ls, "dlm_receive_rcom ignore msg %d " | 522 | log_limit(ls, "dlm_receive_rcom ignore msg %d " |
514 | "from %d %llu %llu seq %llu", | 523 | "from %d %llu %llu recover seq %llu sts %x gen %u", |
515 | rc->rc_type, nodeid, | 524 | rc->rc_type, |
525 | nodeid, | ||
516 | (unsigned long long)rc->rc_seq, | 526 | (unsigned long long)rc->rc_seq, |
517 | (unsigned long long)rc->rc_seq_reply, | 527 | (unsigned long long)rc->rc_seq_reply, |
518 | (unsigned long long)seq); | 528 | (unsigned long long)seq, |
529 | status, ls->ls_generation); | ||
519 | goto out; | 530 | goto out; |
520 | } | 531 | } |
521 | 532 | ||