diff options
author | David Teigland <teigland@redhat.com> | 2012-04-26 16:54:29 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2012-05-02 15:15:27 -0400 |
commit | 4875647a08e35f77274838d97ca8fa44158d50e2 (patch) | |
tree | bf8a39eaf3219af5d661ed3e347545306fd84bda /fs/dlm/recover.c | |
parent | 6d40c4a708e0e996fd9c60d4093aebba5fe1f749 (diff) |
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/recover.c')
-rw-r--r-- | fs/dlm/recover.c | 73 |
1 files changed, 42 insertions, 31 deletions
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 34d5adf1fce7..7554e4dac6bb 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -339,9 +339,12 @@ static void set_lock_master(struct list_head *queue, int nodeid) | |||
339 | { | 339 | { |
340 | struct dlm_lkb *lkb; | 340 | struct dlm_lkb *lkb; |
341 | 341 | ||
342 | list_for_each_entry(lkb, queue, lkb_statequeue) | 342 | list_for_each_entry(lkb, queue, lkb_statequeue) { |
343 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) | 343 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) { |
344 | lkb->lkb_nodeid = nodeid; | 344 | lkb->lkb_nodeid = nodeid; |
345 | lkb->lkb_remid = 0; | ||
346 | } | ||
347 | } | ||
345 | } | 348 | } |
346 | 349 | ||
347 | static void set_master_lkbs(struct dlm_rsb *r) | 350 | static void set_master_lkbs(struct dlm_rsb *r) |
@@ -354,18 +357,16 @@ static void set_master_lkbs(struct dlm_rsb *r) | |||
354 | /* | 357 | /* |
355 | * Propagate the new master nodeid to locks | 358 | * Propagate the new master nodeid to locks |
356 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. | 359 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. |
357 | * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which | 360 | * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which |
358 | * rsb's to consider. | 361 | * rsb's to consider. |
359 | */ | 362 | */ |
360 | 363 | ||
361 | static void set_new_master(struct dlm_rsb *r, int nodeid) | 364 | static void set_new_master(struct dlm_rsb *r, int nodeid) |
362 | { | 365 | { |
363 | lock_rsb(r); | ||
364 | r->res_nodeid = nodeid; | 366 | r->res_nodeid = nodeid; |
365 | set_master_lkbs(r); | 367 | set_master_lkbs(r); |
366 | rsb_set_flag(r, RSB_NEW_MASTER); | 368 | rsb_set_flag(r, RSB_NEW_MASTER); |
367 | rsb_set_flag(r, RSB_NEW_MASTER2); | 369 | rsb_set_flag(r, RSB_NEW_MASTER2); |
368 | unlock_rsb(r); | ||
369 | } | 370 | } |
370 | 371 | ||
371 | /* | 372 | /* |
@@ -376,9 +377,9 @@ static void set_new_master(struct dlm_rsb *r, int nodeid) | |||
376 | static int recover_master(struct dlm_rsb *r) | 377 | static int recover_master(struct dlm_rsb *r) |
377 | { | 378 | { |
378 | struct dlm_ls *ls = r->res_ls; | 379 | struct dlm_ls *ls = r->res_ls; |
379 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | 380 | int error, ret_nodeid; |
380 | 381 | int our_nodeid = dlm_our_nodeid(); | |
381 | dir_nodeid = dlm_dir_nodeid(r); | 382 | int dir_nodeid = dlm_dir_nodeid(r); |
382 | 383 | ||
383 | if (dir_nodeid == our_nodeid) { | 384 | if (dir_nodeid == our_nodeid) { |
384 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, | 385 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, |
@@ -388,7 +389,9 @@ static int recover_master(struct dlm_rsb *r) | |||
388 | 389 | ||
389 | if (ret_nodeid == our_nodeid) | 390 | if (ret_nodeid == our_nodeid) |
390 | ret_nodeid = 0; | 391 | ret_nodeid = 0; |
392 | lock_rsb(r); | ||
391 | set_new_master(r, ret_nodeid); | 393 | set_new_master(r, ret_nodeid); |
394 | unlock_rsb(r); | ||
392 | } else { | 395 | } else { |
393 | recover_list_add(r); | 396 | recover_list_add(r); |
394 | error = dlm_send_rcom_lookup(r, dir_nodeid); | 397 | error = dlm_send_rcom_lookup(r, dir_nodeid); |
@@ -398,24 +401,33 @@ static int recover_master(struct dlm_rsb *r) | |||
398 | } | 401 | } |
399 | 402 | ||
400 | /* | 403 | /* |
401 | * When not using a directory, most resource names will hash to a new static | 404 | * All MSTCPY locks are purged and rebuilt, even if the master stayed the same. |
402 | * master nodeid and the resource will need to be remastered. | 405 | * This is necessary because recovery can be started, aborted and restarted, |
406 | * causing the master nodeid to briefly change during the aborted recovery, and | ||
407 | * change back to the original value in the second recovery. The MSTCPY locks | ||
408 | * may or may not have been purged during the aborted recovery. Another node | ||
409 | * with an outstanding request in waiters list and a request reply saved in the | ||
410 | * requestqueue, cannot know whether it should ignore the reply and resend the | ||
411 | * request, or accept the reply and complete the request. It must do the | ||
412 | * former if the remote node purged MSTCPY locks, and it must do the later if | ||
413 | * the remote node did not. This is solved by always purging MSTCPY locks, in | ||
414 | * which case, the request reply would always be ignored and the request | ||
415 | * resent. | ||
403 | */ | 416 | */ |
404 | 417 | ||
405 | static int recover_master_static(struct dlm_rsb *r) | 418 | static int recover_master_static(struct dlm_rsb *r) |
406 | { | 419 | { |
407 | int master = dlm_dir_nodeid(r); | 420 | int dir_nodeid = dlm_dir_nodeid(r); |
421 | int new_master = dir_nodeid; | ||
408 | 422 | ||
409 | if (master == dlm_our_nodeid()) | 423 | if (dir_nodeid == dlm_our_nodeid()) |
410 | master = 0; | 424 | new_master = 0; |
411 | 425 | ||
412 | if (r->res_nodeid != master) { | 426 | lock_rsb(r); |
413 | if (is_master(r)) | 427 | dlm_purge_mstcpy_locks(r); |
414 | dlm_purge_mstcpy_locks(r); | 428 | set_new_master(r, new_master); |
415 | set_new_master(r, master); | 429 | unlock_rsb(r); |
416 | return 1; | 430 | return 1; |
417 | } | ||
418 | return 0; | ||
419 | } | 431 | } |
420 | 432 | ||
421 | /* | 433 | /* |
@@ -481,7 +493,9 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
481 | if (nodeid == dlm_our_nodeid()) | 493 | if (nodeid == dlm_our_nodeid()) |
482 | nodeid = 0; | 494 | nodeid = 0; |
483 | 495 | ||
496 | lock_rsb(r); | ||
484 | set_new_master(r, nodeid); | 497 | set_new_master(r, nodeid); |
498 | unlock_rsb(r); | ||
485 | recover_list_del(r); | 499 | recover_list_del(r); |
486 | 500 | ||
487 | if (recover_list_empty(ls)) | 501 | if (recover_list_empty(ls)) |
@@ -556,8 +570,6 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
556 | struct dlm_rsb *r; | 570 | struct dlm_rsb *r; |
557 | int error, count = 0; | 571 | int error, count = 0; |
558 | 572 | ||
559 | log_debug(ls, "dlm_recover_locks"); | ||
560 | |||
561 | down_read(&ls->ls_root_sem); | 573 | down_read(&ls->ls_root_sem); |
562 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 574 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
563 | if (is_master(r)) { | 575 | if (is_master(r)) { |
@@ -584,7 +596,7 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
584 | } | 596 | } |
585 | up_read(&ls->ls_root_sem); | 597 | up_read(&ls->ls_root_sem); |
586 | 598 | ||
587 | log_debug(ls, "dlm_recover_locks %d locks", count); | 599 | log_debug(ls, "dlm_recover_locks %d out", count); |
588 | 600 | ||
589 | error = dlm_wait_function(ls, &recover_list_empty); | 601 | error = dlm_wait_function(ls, &recover_list_empty); |
590 | out: | 602 | out: |
@@ -721,21 +733,19 @@ static void recover_conversion(struct dlm_rsb *r) | |||
721 | } | 733 | } |
722 | 734 | ||
723 | /* We've become the new master for this rsb and waiting/converting locks may | 735 | /* We've become the new master for this rsb and waiting/converting locks may |
724 | need to be granted in dlm_grant_after_purge() due to locks that may have | 736 | need to be granted in dlm_recover_grant() due to locks that may have |
725 | existed from a removed node. */ | 737 | existed from a removed node. */ |
726 | 738 | ||
727 | static void set_locks_purged(struct dlm_rsb *r) | 739 | static void recover_grant(struct dlm_rsb *r) |
728 | { | 740 | { |
729 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) | 741 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) |
730 | rsb_set_flag(r, RSB_LOCKS_PURGED); | 742 | rsb_set_flag(r, RSB_RECOVER_GRANT); |
731 | } | 743 | } |
732 | 744 | ||
733 | void dlm_recover_rsbs(struct dlm_ls *ls) | 745 | void dlm_recover_rsbs(struct dlm_ls *ls) |
734 | { | 746 | { |
735 | struct dlm_rsb *r; | 747 | struct dlm_rsb *r; |
736 | int count = 0; | 748 | unsigned int count = 0; |
737 | |||
738 | log_debug(ls, "dlm_recover_rsbs"); | ||
739 | 749 | ||
740 | down_read(&ls->ls_root_sem); | 750 | down_read(&ls->ls_root_sem); |
741 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 751 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
@@ -744,7 +754,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
744 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) | 754 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) |
745 | recover_conversion(r); | 755 | recover_conversion(r); |
746 | if (rsb_flag(r, RSB_NEW_MASTER2)) | 756 | if (rsb_flag(r, RSB_NEW_MASTER2)) |
747 | set_locks_purged(r); | 757 | recover_grant(r); |
748 | recover_lvb(r); | 758 | recover_lvb(r); |
749 | count++; | 759 | count++; |
750 | } | 760 | } |
@@ -754,7 +764,8 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
754 | } | 764 | } |
755 | up_read(&ls->ls_root_sem); | 765 | up_read(&ls->ls_root_sem); |
756 | 766 | ||
757 | log_debug(ls, "dlm_recover_rsbs %d rsbs", count); | 767 | if (count) |
768 | log_debug(ls, "dlm_recover_rsbs %d done", count); | ||
758 | } | 769 | } |
759 | 770 | ||
760 | /* Create a single list of all root rsb's to be used during recovery */ | 771 | /* Create a single list of all root rsb's to be used during recovery */ |