aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGoldwyn Rodrigues <rgoldwyn@suse.com>2015-10-01 14:20:27 -0400
committerGoldwyn Rodrigues <rgoldwyn@suse.com>2015-10-12 04:35:30 -0400
commitdbb64f8635f5d68192108b88759a34633a4bd558 (patch)
tree1159afed1de7314ce0d412469031832d0295c905
parentc186b128cda5a246da25f474e4689cb2bfacfcac (diff)
md-cluster: Fix adding of new disk with new reload code
Adding the disk worked incorrectly with the new reload code. Fix it: - No operation should be performed on rdev marked as Candidate - After a metadata update operation, kick disk if role is 0xfffe else clear Candidate bit and continue with the regular change check. - Saving the mode of the lock resource to check if token lock is already locked, because it can be called twice while adding a disk. However, unlock_comm() must be called only once. - add_new_disk() is called by the node initiating the --add operation. If it needs to be canceled, call add_new_disk_cancel(). The operation is completed by md_update_sb() which will write and unlock the communication. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-rw-r--r--drivers/md/md-cluster.c35
-rw-r--r--drivers/md/md-cluster.h6
-rw-r--r--drivers/md/md.c52
3 files changed, 58 insertions, 35 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index e1ce9c9a0473..28494e9f8d02 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -28,6 +28,7 @@ struct dlm_lock_resource {
28 struct completion completion; /* completion for synchronized locking */ 28 struct completion completion; /* completion for synchronized locking */
29 void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ 29 void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
30 struct mddev *mddev; /* pointing back to mddev. */ 30 struct mddev *mddev; /* pointing back to mddev. */
31 int mode;
31}; 32};
32 33
33struct suspend_info { 34struct suspend_info {
@@ -107,6 +108,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
107 if (ret) 108 if (ret)
108 return ret; 109 return ret;
109 wait_for_completion(&res->completion); 110 wait_for_completion(&res->completion);
111 if (res->lksb.sb_status == 0)
112 res->mode = mode;
110 return res->lksb.sb_status; 113 return res->lksb.sb_status;
111} 114}
112 115
@@ -128,6 +131,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
128 init_completion(&res->completion); 131 init_completion(&res->completion);
129 res->ls = cinfo->lockspace; 132 res->ls = cinfo->lockspace;
130 res->mddev = mddev; 133 res->mddev = mddev;
134 res->mode = DLM_LOCK_IV;
131 namelen = strlen(name); 135 namelen = strlen(name);
132 res->name = kzalloc(namelen + 1, GFP_KERNEL); 136 res->name = kzalloc(namelen + 1, GFP_KERNEL);
133 if (!res->name) { 137 if (!res->name) {
@@ -536,11 +540,17 @@ static void recv_daemon(struct md_thread *thread)
536/* lock_comm() 540/* lock_comm()
537 * Takes the lock on the TOKEN lock resource so no other 541 * Takes the lock on the TOKEN lock resource so no other
538 * node can communicate while the operation is underway. 542 * node can communicate while the operation is underway.
543 * If called again, and the TOKEN lock is alread in EX mode
544 * return success. However, care must be taken that unlock_comm()
545 * is called only once.
539 */ 546 */
540static int lock_comm(struct md_cluster_info *cinfo) 547static int lock_comm(struct md_cluster_info *cinfo)
541{ 548{
542 int error; 549 int error;
543 550
551 if (cinfo->token_lockres->mode == DLM_LOCK_EX)
552 return 0;
553
544 error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); 554 error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
545 if (error) 555 if (error)
546 pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", 556 pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
@@ -550,6 +560,7 @@ static int lock_comm(struct md_cluster_info *cinfo)
550 560
551static void unlock_comm(struct md_cluster_info *cinfo) 561static void unlock_comm(struct md_cluster_info *cinfo)
552{ 562{
563 WARN_ON(cinfo->token_lockres->mode != DLM_LOCK_EX);
553 dlm_unlock_sync(cinfo->token_lockres); 564 dlm_unlock_sync(cinfo->token_lockres);
554} 565}
555 566
@@ -862,11 +873,10 @@ static int metadata_update_finish(struct mddev *mddev)
862 return ret; 873 return ret;
863} 874}
864 875
865static int metadata_update_cancel(struct mddev *mddev) 876static void metadata_update_cancel(struct mddev *mddev)
866{ 877{
867 struct md_cluster_info *cinfo = mddev->cluster_info; 878 struct md_cluster_info *cinfo = mddev->cluster_info;
868 879 unlock_comm(cinfo);
869 return dlm_unlock_sync(cinfo->token_lockres);
870} 880}
871 881
872static int resync_start(struct mddev *mddev) 882static int resync_start(struct mddev *mddev)
@@ -925,7 +935,11 @@ out:
925 return ret; 935 return ret;
926} 936}
927 937
928static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) 938/* add_new_disk() - initiates a disk add
939 * However, if this fails before writing md_update_sb(),
940 * add_new_disk_cancel() must be called to release token lock
941 */
942static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
929{ 943{
930 struct md_cluster_info *cinfo = mddev->cluster_info; 944 struct md_cluster_info *cinfo = mddev->cluster_info;
931 struct cluster_msg cmsg; 945 struct cluster_msg cmsg;
@@ -947,16 +961,17 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
947 /* Some node does not "see" the device */ 961 /* Some node does not "see" the device */
948 if (ret == -EAGAIN) 962 if (ret == -EAGAIN)
949 ret = -ENOENT; 963 ret = -ENOENT;
964 if (ret)
965 unlock_comm(cinfo);
950 else 966 else
951 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); 967 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
952 return ret; 968 return ret;
953} 969}
954 970
955static int add_new_disk_finish(struct mddev *mddev) 971static void add_new_disk_cancel(struct mddev *mddev)
956{ 972{
957 /* Write sb and inform others */ 973 struct md_cluster_info *cinfo = mddev->cluster_info;
958 md_update_sb(mddev, 1); 974 unlock_comm(cinfo);
959 return metadata_update_finish(mddev);
960} 975}
961 976
962static int new_disk_ack(struct mddev *mddev, bool ack) 977static int new_disk_ack(struct mddev *mddev, bool ack)
@@ -1023,8 +1038,8 @@ static struct md_cluster_operations cluster_ops = {
1023 .metadata_update_finish = metadata_update_finish, 1038 .metadata_update_finish = metadata_update_finish,
1024 .metadata_update_cancel = metadata_update_cancel, 1039 .metadata_update_cancel = metadata_update_cancel,
1025 .area_resyncing = area_resyncing, 1040 .area_resyncing = area_resyncing,
1026 .add_new_disk_start = add_new_disk_start, 1041 .add_new_disk = add_new_disk,
1027 .add_new_disk_finish = add_new_disk_finish, 1042 .add_new_disk_cancel = add_new_disk_cancel,
1028 .new_disk_ack = new_disk_ack, 1043 .new_disk_ack = new_disk_ack,
1029 .remove_disk = remove_disk, 1044 .remove_disk = remove_disk,
1030 .gather_bitmaps = gather_bitmaps, 1045 .gather_bitmaps = gather_bitmaps,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index c94172673599..e75ea2613184 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -15,12 +15,12 @@ struct md_cluster_operations {
15 int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); 15 int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
16 int (*metadata_update_start)(struct mddev *mddev); 16 int (*metadata_update_start)(struct mddev *mddev);
17 int (*metadata_update_finish)(struct mddev *mddev); 17 int (*metadata_update_finish)(struct mddev *mddev);
18 int (*metadata_update_cancel)(struct mddev *mddev); 18 void (*metadata_update_cancel)(struct mddev *mddev);
19 int (*resync_start)(struct mddev *mddev); 19 int (*resync_start)(struct mddev *mddev);
20 int (*resync_finish)(struct mddev *mddev); 20 int (*resync_finish)(struct mddev *mddev);
21 int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); 21 int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
22 int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); 22 int (*add_new_disk)(struct mddev *mddev, struct md_rdev *rdev);
23 int (*add_new_disk_finish)(struct mddev *mddev); 23 void (*add_new_disk_cancel)(struct mddev *mddev);
24 int (*new_disk_ack)(struct mddev *mddev, bool ack); 24 int (*new_disk_ack)(struct mddev *mddev, bool ack);
25 int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); 25 int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
26 int (*gather_bitmaps)(struct md_rdev *rdev); 26 int (*gather_bitmaps)(struct md_rdev *rdev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 61e897def04f..8a6f67f55d3d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3246,14 +3246,6 @@ static void analyze_sbs(struct mddev *mddev)
3246 md_kick_rdev_from_array(rdev); 3246 md_kick_rdev_from_array(rdev);
3247 continue; 3247 continue;
3248 } 3248 }
3249 /* No device should have a Candidate flag
3250 * when reading devices
3251 */
3252 if (test_bit(Candidate, &rdev->flags)) {
3253 pr_info("md: kicking Cluster Candidate %s from array!\n",
3254 bdevname(rdev->bdev, b));
3255 md_kick_rdev_from_array(rdev);
3256 }
3257 } 3249 }
3258 if (mddev->level == LEVEL_MULTIPATH) { 3250 if (mddev->level == LEVEL_MULTIPATH) {
3259 rdev->desc_nr = i++; 3251 rdev->desc_nr = i++;
@@ -5950,19 +5942,12 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5950 * check whether the device shows up in other nodes 5942 * check whether the device shows up in other nodes
5951 */ 5943 */
5952 if (mddev_is_clustered(mddev)) { 5944 if (mddev_is_clustered(mddev)) {
5953 if (info->state & (1 << MD_DISK_CANDIDATE)) { 5945 if (info->state & (1 << MD_DISK_CANDIDATE))
5954 /* Through --cluster-confirm */
5955 set_bit(Candidate, &rdev->flags); 5946 set_bit(Candidate, &rdev->flags);
5956 err = md_cluster_ops->new_disk_ack(mddev, true); 5947 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
5957 if (err) {
5958 export_rdev(rdev);
5959 return err;
5960 }
5961 } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
5962 /* --add initiated by this node */ 5948 /* --add initiated by this node */
5963 err = md_cluster_ops->add_new_disk_start(mddev, rdev); 5949 err = md_cluster_ops->add_new_disk(mddev, rdev);
5964 if (err) { 5950 if (err) {
5965 md_cluster_ops->add_new_disk_finish(mddev);
5966 export_rdev(rdev); 5951 export_rdev(rdev);
5967 return err; 5952 return err;
5968 } 5953 }
@@ -5971,13 +5956,23 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5971 5956
5972 rdev->raid_disk = -1; 5957 rdev->raid_disk = -1;
5973 err = bind_rdev_to_array(rdev, mddev); 5958 err = bind_rdev_to_array(rdev, mddev);
5959
5974 if (err) 5960 if (err)
5975 export_rdev(rdev); 5961 export_rdev(rdev);
5976 else 5962
5963 if (mddev_is_clustered(mddev)) {
5964 if (info->state & (1 << MD_DISK_CANDIDATE))
5965 md_cluster_ops->new_disk_ack(mddev, (err == 0));
5966 else {
5967 if (err)
5968 md_cluster_ops->add_new_disk_cancel(mddev);
5969 else
5970 err = add_bound_rdev(rdev);
5971 }
5972
5973 } else if (!err)
5977 err = add_bound_rdev(rdev); 5974 err = add_bound_rdev(rdev);
5978 if (mddev_is_clustered(mddev) && 5975
5979 (info->state & (1 << MD_DISK_CLUSTER_ADD)))
5980 md_cluster_ops->add_new_disk_finish(mddev);
5981 return err; 5976 return err;
5982 } 5977 }
5983 5978
@@ -8055,6 +8050,8 @@ static int remove_and_add_spares(struct mddev *mddev,
8055 rdev_for_each(rdev, mddev) { 8050 rdev_for_each(rdev, mddev) {
8056 if (this && this != rdev) 8051 if (this && this != rdev)
8057 continue; 8052 continue;
8053 if (test_bit(Candidate, &rdev->flags))
8054 continue;
8058 if (rdev->raid_disk >= 0 && 8055 if (rdev->raid_disk >= 0 &&
8059 !test_bit(In_sync, &rdev->flags) && 8056 !test_bit(In_sync, &rdev->flags) &&
8060 !test_bit(Faulty, &rdev->flags)) 8057 !test_bit(Faulty, &rdev->flags))
@@ -8972,6 +8969,17 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8972 8969
8973 /* Check if the roles changed */ 8970 /* Check if the roles changed */
8974 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); 8971 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8972
8973 if (test_bit(Candidate, &rdev2->flags)) {
8974 if (role == 0xfffe) {
8975 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8976 md_kick_rdev_from_array(rdev2);
8977 continue;
8978 }
8979 else
8980 clear_bit(Candidate, &rdev2->flags);
8981 }
8982
8975 if (role != rdev2->raid_disk) { 8983 if (role != rdev2->raid_disk) {
8976 /* got activated */ 8984 /* got activated */
8977 if (rdev2->raid_disk == -1 && role != 0xffff) { 8985 if (rdev2->raid_disk == -1 && role != 0xffff) {