diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-10-01 14:20:27 -0400 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-10-12 04:35:30 -0400 |
commit | dbb64f8635f5d68192108b88759a34633a4bd558 (patch) | |
tree | 1159afed1de7314ce0d412469031832d0295c905 | |
parent | c186b128cda5a246da25f474e4689cb2bfacfcac (diff) |
md-cluster: Fix adding of new disk with new reload code
Adding the disk worked incorrectly with the new reload code. Fix it:
- No operation should be performed on rdev marked as Candidate
- After a metadata update operation, kick disk if role is 0xfffe
else clear Candidate bit and continue with the regular change check.
- Saving the mode of the lock resource to check if token lock is already
locked, because it can be called twice while adding a disk. However,
unlock_comm() must be called only once.
- add_new_disk() is called by the node initiating the --add operation.
If it needs to be canceled, call add_new_disk_cancel(). The operation
is completed by md_update_sb() which will write and unlock the
communication.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-rw-r--r-- | drivers/md/md-cluster.c | 35 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 52 |
3 files changed, 58 insertions, 35 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index e1ce9c9a0473..28494e9f8d02 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -28,6 +28,7 @@ struct dlm_lock_resource { | |||
28 | struct completion completion; /* completion for synchronized locking */ | 28 | struct completion completion; /* completion for synchronized locking */ |
29 | void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ | 29 | void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ |
30 | struct mddev *mddev; /* pointing back to mddev. */ | 30 | struct mddev *mddev; /* pointing back to mddev. */ |
31 | int mode; | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | struct suspend_info { | 34 | struct suspend_info { |
@@ -107,6 +108,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) | |||
107 | if (ret) | 108 | if (ret) |
108 | return ret; | 109 | return ret; |
109 | wait_for_completion(&res->completion); | 110 | wait_for_completion(&res->completion); |
111 | if (res->lksb.sb_status == 0) | ||
112 | res->mode = mode; | ||
110 | return res->lksb.sb_status; | 113 | return res->lksb.sb_status; |
111 | } | 114 | } |
112 | 115 | ||
@@ -128,6 +131,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev, | |||
128 | init_completion(&res->completion); | 131 | init_completion(&res->completion); |
129 | res->ls = cinfo->lockspace; | 132 | res->ls = cinfo->lockspace; |
130 | res->mddev = mddev; | 133 | res->mddev = mddev; |
134 | res->mode = DLM_LOCK_IV; | ||
131 | namelen = strlen(name); | 135 | namelen = strlen(name); |
132 | res->name = kzalloc(namelen + 1, GFP_KERNEL); | 136 | res->name = kzalloc(namelen + 1, GFP_KERNEL); |
133 | if (!res->name) { | 137 | if (!res->name) { |
@@ -536,11 +540,17 @@ static void recv_daemon(struct md_thread *thread) | |||
536 | /* lock_comm() | 540 | /* lock_comm() |
537 | * Takes the lock on the TOKEN lock resource so no other | 541 | * Takes the lock on the TOKEN lock resource so no other |
538 | * node can communicate while the operation is underway. | 542 | * node can communicate while the operation is underway. |
543 | * If called again, and the TOKEN lock is alread in EX mode | ||
544 | * return success. However, care must be taken that unlock_comm() | ||
545 | * is called only once. | ||
539 | */ | 546 | */ |
540 | static int lock_comm(struct md_cluster_info *cinfo) | 547 | static int lock_comm(struct md_cluster_info *cinfo) |
541 | { | 548 | { |
542 | int error; | 549 | int error; |
543 | 550 | ||
551 | if (cinfo->token_lockres->mode == DLM_LOCK_EX) | ||
552 | return 0; | ||
553 | |||
544 | error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); | 554 | error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); |
545 | if (error) | 555 | if (error) |
546 | pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", | 556 | pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", |
@@ -550,6 +560,7 @@ static int lock_comm(struct md_cluster_info *cinfo) | |||
550 | 560 | ||
551 | static void unlock_comm(struct md_cluster_info *cinfo) | 561 | static void unlock_comm(struct md_cluster_info *cinfo) |
552 | { | 562 | { |
563 | WARN_ON(cinfo->token_lockres->mode != DLM_LOCK_EX); | ||
553 | dlm_unlock_sync(cinfo->token_lockres); | 564 | dlm_unlock_sync(cinfo->token_lockres); |
554 | } | 565 | } |
555 | 566 | ||
@@ -862,11 +873,10 @@ static int metadata_update_finish(struct mddev *mddev) | |||
862 | return ret; | 873 | return ret; |
863 | } | 874 | } |
864 | 875 | ||
865 | static int metadata_update_cancel(struct mddev *mddev) | 876 | static void metadata_update_cancel(struct mddev *mddev) |
866 | { | 877 | { |
867 | struct md_cluster_info *cinfo = mddev->cluster_info; | 878 | struct md_cluster_info *cinfo = mddev->cluster_info; |
868 | 879 | unlock_comm(cinfo); | |
869 | return dlm_unlock_sync(cinfo->token_lockres); | ||
870 | } | 880 | } |
871 | 881 | ||
872 | static int resync_start(struct mddev *mddev) | 882 | static int resync_start(struct mddev *mddev) |
@@ -925,7 +935,11 @@ out: | |||
925 | return ret; | 935 | return ret; |
926 | } | 936 | } |
927 | 937 | ||
928 | static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) | 938 | /* add_new_disk() - initiates a disk add |
939 | * However, if this fails before writing md_update_sb(), | ||
940 | * add_new_disk_cancel() must be called to release token lock | ||
941 | */ | ||
942 | static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) | ||
929 | { | 943 | { |
930 | struct md_cluster_info *cinfo = mddev->cluster_info; | 944 | struct md_cluster_info *cinfo = mddev->cluster_info; |
931 | struct cluster_msg cmsg; | 945 | struct cluster_msg cmsg; |
@@ -947,16 +961,17 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) | |||
947 | /* Some node does not "see" the device */ | 961 | /* Some node does not "see" the device */ |
948 | if (ret == -EAGAIN) | 962 | if (ret == -EAGAIN) |
949 | ret = -ENOENT; | 963 | ret = -ENOENT; |
964 | if (ret) | ||
965 | unlock_comm(cinfo); | ||
950 | else | 966 | else |
951 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); | 967 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); |
952 | return ret; | 968 | return ret; |
953 | } | 969 | } |
954 | 970 | ||
955 | static int add_new_disk_finish(struct mddev *mddev) | 971 | static void add_new_disk_cancel(struct mddev *mddev) |
956 | { | 972 | { |
957 | /* Write sb and inform others */ | 973 | struct md_cluster_info *cinfo = mddev->cluster_info; |
958 | md_update_sb(mddev, 1); | 974 | unlock_comm(cinfo); |
959 | return metadata_update_finish(mddev); | ||
960 | } | 975 | } |
961 | 976 | ||
962 | static int new_disk_ack(struct mddev *mddev, bool ack) | 977 | static int new_disk_ack(struct mddev *mddev, bool ack) |
@@ -1023,8 +1038,8 @@ static struct md_cluster_operations cluster_ops = { | |||
1023 | .metadata_update_finish = metadata_update_finish, | 1038 | .metadata_update_finish = metadata_update_finish, |
1024 | .metadata_update_cancel = metadata_update_cancel, | 1039 | .metadata_update_cancel = metadata_update_cancel, |
1025 | .area_resyncing = area_resyncing, | 1040 | .area_resyncing = area_resyncing, |
1026 | .add_new_disk_start = add_new_disk_start, | 1041 | .add_new_disk = add_new_disk, |
1027 | .add_new_disk_finish = add_new_disk_finish, | 1042 | .add_new_disk_cancel = add_new_disk_cancel, |
1028 | .new_disk_ack = new_disk_ack, | 1043 | .new_disk_ack = new_disk_ack, |
1029 | .remove_disk = remove_disk, | 1044 | .remove_disk = remove_disk, |
1030 | .gather_bitmaps = gather_bitmaps, | 1045 | .gather_bitmaps = gather_bitmaps, |
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index c94172673599..e75ea2613184 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h | |||
@@ -15,12 +15,12 @@ struct md_cluster_operations { | |||
15 | int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); | 15 | int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); |
16 | int (*metadata_update_start)(struct mddev *mddev); | 16 | int (*metadata_update_start)(struct mddev *mddev); |
17 | int (*metadata_update_finish)(struct mddev *mddev); | 17 | int (*metadata_update_finish)(struct mddev *mddev); |
18 | int (*metadata_update_cancel)(struct mddev *mddev); | 18 | void (*metadata_update_cancel)(struct mddev *mddev); |
19 | int (*resync_start)(struct mddev *mddev); | 19 | int (*resync_start)(struct mddev *mddev); |
20 | int (*resync_finish)(struct mddev *mddev); | 20 | int (*resync_finish)(struct mddev *mddev); |
21 | int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); | 21 | int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); |
22 | int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); | 22 | int (*add_new_disk)(struct mddev *mddev, struct md_rdev *rdev); |
23 | int (*add_new_disk_finish)(struct mddev *mddev); | 23 | void (*add_new_disk_cancel)(struct mddev *mddev); |
24 | int (*new_disk_ack)(struct mddev *mddev, bool ack); | 24 | int (*new_disk_ack)(struct mddev *mddev, bool ack); |
25 | int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); | 25 | int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); |
26 | int (*gather_bitmaps)(struct md_rdev *rdev); | 26 | int (*gather_bitmaps)(struct md_rdev *rdev); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 61e897def04f..8a6f67f55d3d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3246,14 +3246,6 @@ static void analyze_sbs(struct mddev *mddev) | |||
3246 | md_kick_rdev_from_array(rdev); | 3246 | md_kick_rdev_from_array(rdev); |
3247 | continue; | 3247 | continue; |
3248 | } | 3248 | } |
3249 | /* No device should have a Candidate flag | ||
3250 | * when reading devices | ||
3251 | */ | ||
3252 | if (test_bit(Candidate, &rdev->flags)) { | ||
3253 | pr_info("md: kicking Cluster Candidate %s from array!\n", | ||
3254 | bdevname(rdev->bdev, b)); | ||
3255 | md_kick_rdev_from_array(rdev); | ||
3256 | } | ||
3257 | } | 3249 | } |
3258 | if (mddev->level == LEVEL_MULTIPATH) { | 3250 | if (mddev->level == LEVEL_MULTIPATH) { |
3259 | rdev->desc_nr = i++; | 3251 | rdev->desc_nr = i++; |
@@ -5950,19 +5942,12 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
5950 | * check whether the device shows up in other nodes | 5942 | * check whether the device shows up in other nodes |
5951 | */ | 5943 | */ |
5952 | if (mddev_is_clustered(mddev)) { | 5944 | if (mddev_is_clustered(mddev)) { |
5953 | if (info->state & (1 << MD_DISK_CANDIDATE)) { | 5945 | if (info->state & (1 << MD_DISK_CANDIDATE)) |
5954 | /* Through --cluster-confirm */ | ||
5955 | set_bit(Candidate, &rdev->flags); | 5946 | set_bit(Candidate, &rdev->flags); |
5956 | err = md_cluster_ops->new_disk_ack(mddev, true); | 5947 | else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { |
5957 | if (err) { | ||
5958 | export_rdev(rdev); | ||
5959 | return err; | ||
5960 | } | ||
5961 | } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { | ||
5962 | /* --add initiated by this node */ | 5948 | /* --add initiated by this node */ |
5963 | err = md_cluster_ops->add_new_disk_start(mddev, rdev); | 5949 | err = md_cluster_ops->add_new_disk(mddev, rdev); |
5964 | if (err) { | 5950 | if (err) { |
5965 | md_cluster_ops->add_new_disk_finish(mddev); | ||
5966 | export_rdev(rdev); | 5951 | export_rdev(rdev); |
5967 | return err; | 5952 | return err; |
5968 | } | 5953 | } |
@@ -5971,13 +5956,23 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
5971 | 5956 | ||
5972 | rdev->raid_disk = -1; | 5957 | rdev->raid_disk = -1; |
5973 | err = bind_rdev_to_array(rdev, mddev); | 5958 | err = bind_rdev_to_array(rdev, mddev); |
5959 | |||
5974 | if (err) | 5960 | if (err) |
5975 | export_rdev(rdev); | 5961 | export_rdev(rdev); |
5976 | else | 5962 | |
5963 | if (mddev_is_clustered(mddev)) { | ||
5964 | if (info->state & (1 << MD_DISK_CANDIDATE)) | ||
5965 | md_cluster_ops->new_disk_ack(mddev, (err == 0)); | ||
5966 | else { | ||
5967 | if (err) | ||
5968 | md_cluster_ops->add_new_disk_cancel(mddev); | ||
5969 | else | ||
5970 | err = add_bound_rdev(rdev); | ||
5971 | } | ||
5972 | |||
5973 | } else if (!err) | ||
5977 | err = add_bound_rdev(rdev); | 5974 | err = add_bound_rdev(rdev); |
5978 | if (mddev_is_clustered(mddev) && | 5975 | |
5979 | (info->state & (1 << MD_DISK_CLUSTER_ADD))) | ||
5980 | md_cluster_ops->add_new_disk_finish(mddev); | ||
5981 | return err; | 5976 | return err; |
5982 | } | 5977 | } |
5983 | 5978 | ||
@@ -8055,6 +8050,8 @@ static int remove_and_add_spares(struct mddev *mddev, | |||
8055 | rdev_for_each(rdev, mddev) { | 8050 | rdev_for_each(rdev, mddev) { |
8056 | if (this && this != rdev) | 8051 | if (this && this != rdev) |
8057 | continue; | 8052 | continue; |
8053 | if (test_bit(Candidate, &rdev->flags)) | ||
8054 | continue; | ||
8058 | if (rdev->raid_disk >= 0 && | 8055 | if (rdev->raid_disk >= 0 && |
8059 | !test_bit(In_sync, &rdev->flags) && | 8056 | !test_bit(In_sync, &rdev->flags) && |
8060 | !test_bit(Faulty, &rdev->flags)) | 8057 | !test_bit(Faulty, &rdev->flags)) |
@@ -8972,6 +8969,17 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) | |||
8972 | 8969 | ||
8973 | /* Check if the roles changed */ | 8970 | /* Check if the roles changed */ |
8974 | role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); | 8971 | role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); |
8972 | |||
8973 | if (test_bit(Candidate, &rdev2->flags)) { | ||
8974 | if (role == 0xfffe) { | ||
8975 | pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b)); | ||
8976 | md_kick_rdev_from_array(rdev2); | ||
8977 | continue; | ||
8978 | } | ||
8979 | else | ||
8980 | clear_bit(Candidate, &rdev2->flags); | ||
8981 | } | ||
8982 | |||
8975 | if (role != rdev2->raid_disk) { | 8983 | if (role != rdev2->raid_disk) { |
8976 | /* got activated */ | 8984 | /* got activated */ |
8977 | if (rdev2->raid_disk == -1 && role != 0xffff) { | 8985 | if (rdev2->raid_disk == -1 && role != 0xffff) { |