diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-09-30 14:20:35 -0400 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-10-12 04:32:44 -0400 |
commit | c186b128cda5a246da25f474e4689cb2bfacfcac (patch) | |
tree | 69f77dba5456eee35afab5e8f5f7ebc02b50910f /drivers/md/md-cluster.c | |
parent | 2aa82191ac36cd2f2a41aa25697db30ed7c619ef (diff) |
md-cluster: Perform resync/recovery under a DLM lock
Resync or recovery must be performed by only one node at a time.
A DLM lock resource, resync_lockres provides the mutual exclusion
so that only one node performs the recovery/resync at a time.
If a node is unable to get the resync_lockres, because recovery is
being performed by another node, it set MD_RECOVER_NEEDED so as
to schedule recovery in the future.
Remove the debug message in resync_info_update()
used during development.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r-- | drivers/md/md-cluster.c | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2eb3a5019a63..e1ce9c9a0473 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -55,6 +55,7 @@ struct md_cluster_info { | |||
55 | struct completion completion; | 55 | struct completion completion; |
56 | struct mutex sb_mutex; | 56 | struct mutex sb_mutex; |
57 | struct dlm_lock_resource *bitmap_lockres; | 57 | struct dlm_lock_resource *bitmap_lockres; |
58 | struct dlm_lock_resource *resync_lockres; | ||
58 | struct list_head suspend_list; | 59 | struct list_head suspend_list; |
59 | spinlock_t suspend_lock; | 60 | spinlock_t suspend_lock; |
60 | struct md_thread *recovery_thread; | 61 | struct md_thread *recovery_thread; |
@@ -384,6 +385,8 @@ static void process_suspend_info(struct mddev *mddev, | |||
384 | 385 | ||
385 | if (!hi) { | 386 | if (!hi) { |
386 | remove_suspend_info(mddev, slot); | 387 | remove_suspend_info(mddev, slot); |
388 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
389 | md_wakeup_thread(mddev->thread); | ||
387 | return; | 390 | return; |
388 | } | 391 | } |
389 | s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); | 392 | s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); |
@@ -758,6 +761,10 @@ static int join(struct mddev *mddev, int nodes) | |||
758 | goto err; | 761 | goto err; |
759 | } | 762 | } |
760 | 763 | ||
764 | cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0); | ||
765 | if (!cinfo->resync_lockres) | ||
766 | goto err; | ||
767 | |||
761 | ret = gather_all_resync_info(mddev, nodes); | 768 | ret = gather_all_resync_info(mddev, nodes); |
762 | if (ret) | 769 | if (ret) |
763 | goto err; | 770 | goto err; |
@@ -768,6 +775,7 @@ err: | |||
768 | lockres_free(cinfo->token_lockres); | 775 | lockres_free(cinfo->token_lockres); |
769 | lockres_free(cinfo->ack_lockres); | 776 | lockres_free(cinfo->ack_lockres); |
770 | lockres_free(cinfo->no_new_dev_lockres); | 777 | lockres_free(cinfo->no_new_dev_lockres); |
778 | lockres_free(cinfo->resync_lockres); | ||
771 | lockres_free(cinfo->bitmap_lockres); | 779 | lockres_free(cinfo->bitmap_lockres); |
772 | if (cinfo->lockspace) | 780 | if (cinfo->lockspace) |
773 | dlm_release_lockspace(cinfo->lockspace, 2); | 781 | dlm_release_lockspace(cinfo->lockspace, 2); |
@@ -861,6 +869,13 @@ static int metadata_update_cancel(struct mddev *mddev) | |||
861 | return dlm_unlock_sync(cinfo->token_lockres); | 869 | return dlm_unlock_sync(cinfo->token_lockres); |
862 | } | 870 | } |
863 | 871 | ||
872 | static int resync_start(struct mddev *mddev) | ||
873 | { | ||
874 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
875 | cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE; | ||
876 | return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX); | ||
877 | } | ||
878 | |||
864 | static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) | 879 | static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) |
865 | { | 880 | { |
866 | struct md_cluster_info *cinfo = mddev->cluster_info; | 881 | struct md_cluster_info *cinfo = mddev->cluster_info; |
@@ -870,16 +885,22 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) | |||
870 | add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); | 885 | add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); |
871 | /* Re-acquire the lock to refresh LVB */ | 886 | /* Re-acquire the lock to refresh LVB */ |
872 | dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); | 887 | dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); |
873 | pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, | ||
874 | (unsigned long long)lo, | ||
875 | (unsigned long long)hi); | ||
876 | cmsg.type = cpu_to_le32(RESYNCING); | 888 | cmsg.type = cpu_to_le32(RESYNCING); |
877 | cmsg.slot = cpu_to_le32(slot); | 889 | cmsg.slot = cpu_to_le32(slot); |
878 | cmsg.low = cpu_to_le64(lo); | 890 | cmsg.low = cpu_to_le64(lo); |
879 | cmsg.high = cpu_to_le64(hi); | 891 | cmsg.high = cpu_to_le64(hi); |
892 | |||
880 | return sendmsg(cinfo, &cmsg); | 893 | return sendmsg(cinfo, &cmsg); |
881 | } | 894 | } |
882 | 895 | ||
896 | static int resync_finish(struct mddev *mddev) | ||
897 | { | ||
898 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
899 | cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE; | ||
900 | dlm_unlock_sync(cinfo->resync_lockres); | ||
901 | return resync_info_update(mddev, 0, 0); | ||
902 | } | ||
903 | |||
883 | static int area_resyncing(struct mddev *mddev, int direction, | 904 | static int area_resyncing(struct mddev *mddev, int direction, |
884 | sector_t lo, sector_t hi) | 905 | sector_t lo, sector_t hi) |
885 | { | 906 | { |
@@ -995,6 +1016,8 @@ static struct md_cluster_operations cluster_ops = { | |||
995 | .join = join, | 1016 | .join = join, |
996 | .leave = leave, | 1017 | .leave = leave, |
997 | .slot_number = slot_number, | 1018 | .slot_number = slot_number, |
1019 | .resync_start = resync_start, | ||
1020 | .resync_finish = resync_finish, | ||
998 | .resync_info_update = resync_info_update, | 1021 | .resync_info_update = resync_info_update, |
999 | .metadata_update_start = metadata_update_start, | 1022 | .metadata_update_start = metadata_update_start, |
1000 | .metadata_update_finish = metadata_update_finish, | 1023 | .metadata_update_finish = metadata_update_finish, |