aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md-cluster.c
diff options
context:
space:
mode:
authorGoldwyn Rodrigues <rgoldwyn@suse.com>2015-09-30 14:20:35 -0400
committerGoldwyn Rodrigues <rgoldwyn@suse.com>2015-10-12 04:32:44 -0400
commitc186b128cda5a246da25f474e4689cb2bfacfcac (patch)
tree69f77dba5456eee35afab5e8f5f7ebc02b50910f /drivers/md/md-cluster.c
parent2aa82191ac36cd2f2a41aa25697db30ed7c619ef (diff)
md-cluster: Perform resync/recovery under a DLM lock
Resync or recovery must be performed by only one node at a time. A DLM lock resource, resync_lockres provides the mutual exclusion so that only one node performs the recovery/resync at a time. If a node is unable to get the resync_lockres, because recovery is being performed by another node, it set MD_RECOVER_NEEDED so as to schedule recovery in the future. Remove the debug message in resync_info_update() used during development. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r--drivers/md/md-cluster.c29
1 files changed, 26 insertions, 3 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2eb3a5019a63..e1ce9c9a0473 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -55,6 +55,7 @@ struct md_cluster_info {
55 struct completion completion; 55 struct completion completion;
56 struct mutex sb_mutex; 56 struct mutex sb_mutex;
57 struct dlm_lock_resource *bitmap_lockres; 57 struct dlm_lock_resource *bitmap_lockres;
58 struct dlm_lock_resource *resync_lockres;
58 struct list_head suspend_list; 59 struct list_head suspend_list;
59 spinlock_t suspend_lock; 60 spinlock_t suspend_lock;
60 struct md_thread *recovery_thread; 61 struct md_thread *recovery_thread;
@@ -384,6 +385,8 @@ static void process_suspend_info(struct mddev *mddev,
384 385
385 if (!hi) { 386 if (!hi) {
386 remove_suspend_info(mddev, slot); 387 remove_suspend_info(mddev, slot);
388 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
389 md_wakeup_thread(mddev->thread);
387 return; 390 return;
388 } 391 }
389 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); 392 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
@@ -758,6 +761,10 @@ static int join(struct mddev *mddev, int nodes)
758 goto err; 761 goto err;
759 } 762 }
760 763
764 cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
765 if (!cinfo->resync_lockres)
766 goto err;
767
761 ret = gather_all_resync_info(mddev, nodes); 768 ret = gather_all_resync_info(mddev, nodes);
762 if (ret) 769 if (ret)
763 goto err; 770 goto err;
@@ -768,6 +775,7 @@ err:
768 lockres_free(cinfo->token_lockres); 775 lockres_free(cinfo->token_lockres);
769 lockres_free(cinfo->ack_lockres); 776 lockres_free(cinfo->ack_lockres);
770 lockres_free(cinfo->no_new_dev_lockres); 777 lockres_free(cinfo->no_new_dev_lockres);
778 lockres_free(cinfo->resync_lockres);
771 lockres_free(cinfo->bitmap_lockres); 779 lockres_free(cinfo->bitmap_lockres);
772 if (cinfo->lockspace) 780 if (cinfo->lockspace)
773 dlm_release_lockspace(cinfo->lockspace, 2); 781 dlm_release_lockspace(cinfo->lockspace, 2);
@@ -861,6 +869,13 @@ static int metadata_update_cancel(struct mddev *mddev)
861 return dlm_unlock_sync(cinfo->token_lockres); 869 return dlm_unlock_sync(cinfo->token_lockres);
862} 870}
863 871
872static int resync_start(struct mddev *mddev)
873{
874 struct md_cluster_info *cinfo = mddev->cluster_info;
875 cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
876 return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
877}
878
864static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) 879static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
865{ 880{
866 struct md_cluster_info *cinfo = mddev->cluster_info; 881 struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -870,16 +885,22 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
870 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); 885 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
871 /* Re-acquire the lock to refresh LVB */ 886 /* Re-acquire the lock to refresh LVB */
872 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); 887 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
873 pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
874 (unsigned long long)lo,
875 (unsigned long long)hi);
876 cmsg.type = cpu_to_le32(RESYNCING); 888 cmsg.type = cpu_to_le32(RESYNCING);
877 cmsg.slot = cpu_to_le32(slot); 889 cmsg.slot = cpu_to_le32(slot);
878 cmsg.low = cpu_to_le64(lo); 890 cmsg.low = cpu_to_le64(lo);
879 cmsg.high = cpu_to_le64(hi); 891 cmsg.high = cpu_to_le64(hi);
892
880 return sendmsg(cinfo, &cmsg); 893 return sendmsg(cinfo, &cmsg);
881} 894}
882 895
896static int resync_finish(struct mddev *mddev)
897{
898 struct md_cluster_info *cinfo = mddev->cluster_info;
899 cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
900 dlm_unlock_sync(cinfo->resync_lockres);
901 return resync_info_update(mddev, 0, 0);
902}
903
883static int area_resyncing(struct mddev *mddev, int direction, 904static int area_resyncing(struct mddev *mddev, int direction,
884 sector_t lo, sector_t hi) 905 sector_t lo, sector_t hi)
885{ 906{
@@ -995,6 +1016,8 @@ static struct md_cluster_operations cluster_ops = {
995 .join = join, 1016 .join = join,
996 .leave = leave, 1017 .leave = leave,
997 .slot_number = slot_number, 1018 .slot_number = slot_number,
1019 .resync_start = resync_start,
1020 .resync_finish = resync_finish,
998 .resync_info_update = resync_info_update, 1021 .resync_info_update = resync_info_update,
999 .metadata_update_start = metadata_update_start, 1022 .metadata_update_start = metadata_update_start,
1000 .metadata_update_finish = metadata_update_finish, 1023 .metadata_update_finish = metadata_update_finish,