diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-10-29 19:51:31 -0400 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 10:59:07 -0500 |
commit | 1aee41f637694d4bbf91c24195f2b63e3f6badd2 (patch) | |
tree | b6f532e5a4265193dd5ebc547a0ab9f772143762 | |
parent | 7d49ffcfa3cc08aa2301bf3fdb1e423a3fd33ee7 (diff) |
Add new disk to clustered array
Algorithm:
1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
2. Node 1 sends NEWDISK with uuid and slot number
3. Other nodes issue kobject_uevent_env with uuid and slot number
(Steps 4,5 could be a udev rule)
4. In userspace, the node searches for the disk, perhaps
using blkid -t SUB_UUID=""
5. Other nodes issue either of the following depending on whether the disk
was found:
ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
disc.number set to slot number)
ioctl(CLUSTERED_DISK_NACK)
6. Other nodes drop lock on no-new-devs (CR) if device is found
7. Node 1 attempts EX lock on no-new-devs
8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
as SpareLocal
9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
10. Other nodes understand if the device is added or not by reading the superblock again after receiving the METADATA_UPDATED message.
Signed-off-by: Lidong Zhong <lzhong@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-rw-r--r-- | drivers/md/md-cluster.c | 104 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 52 | ||||
-rw-r--r-- | drivers/md/md.h | 5 | ||||
-rw-r--r-- | drivers/md/raid1.c | 1 | ||||
-rw-r--r-- | include/uapi/linux/raid/md_p.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/raid/md_u.h | 1 |
7 files changed, 169 insertions, 4 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index d85a6ca4443e..03e521a9ca7d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -12,11 +12,13 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/raid/md_p.h> | ||
15 | #include "md.h" | 16 | #include "md.h" |
16 | #include "bitmap.h" | 17 | #include "bitmap.h" |
17 | #include "md-cluster.h" | 18 | #include "md-cluster.h" |
18 | 19 | ||
19 | #define LVB_SIZE 64 | 20 | #define LVB_SIZE 64 |
21 | #define NEW_DEV_TIMEOUT 5000 | ||
20 | 22 | ||
21 | struct dlm_lock_resource { | 23 | struct dlm_lock_resource { |
22 | dlm_lockspace_t *ls; | 24 | dlm_lockspace_t *ls; |
@@ -56,19 +58,25 @@ struct md_cluster_info { | |||
56 | struct dlm_lock_resource *ack_lockres; | 58 | struct dlm_lock_resource *ack_lockres; |
57 | struct dlm_lock_resource *message_lockres; | 59 | struct dlm_lock_resource *message_lockres; |
58 | struct dlm_lock_resource *token_lockres; | 60 | struct dlm_lock_resource *token_lockres; |
61 | struct dlm_lock_resource *no_new_dev_lockres; | ||
59 | struct md_thread *recv_thread; | 62 | struct md_thread *recv_thread; |
63 | struct completion newdisk_completion; | ||
60 | }; | 64 | }; |
61 | 65 | ||
62 | enum msg_type { | 66 | enum msg_type { |
63 | METADATA_UPDATED = 0, | 67 | METADATA_UPDATED = 0, |
64 | RESYNCING, | 68 | RESYNCING, |
69 | NEWDISK, | ||
65 | }; | 70 | }; |
66 | 71 | ||
67 | struct cluster_msg { | 72 | struct cluster_msg { |
68 | int type; | 73 | int type; |
69 | int slot; | 74 | int slot; |
75 | /* TODO: Unionize this for smaller footprint */ | ||
70 | sector_t low; | 76 | sector_t low; |
71 | sector_t high; | 77 | sector_t high; |
78 | char uuid[16]; | ||
79 | int raid_slot; | ||
72 | }; | 80 | }; |
73 | 81 | ||
74 | static void sync_ast(void *arg) | 82 | static void sync_ast(void *arg) |
@@ -358,13 +366,41 @@ static void process_suspend_info(struct md_cluster_info *cinfo, | |||
358 | spin_unlock_irq(&cinfo->suspend_lock); | 366 | spin_unlock_irq(&cinfo->suspend_lock); |
359 | } | 367 | } |
360 | 368 | ||
369 | static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) | ||
370 | { | ||
371 | char disk_uuid[64]; | ||
372 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
373 | char event_name[] = "EVENT=ADD_DEVICE"; | ||
374 | char raid_slot[16]; | ||
375 | char *envp[] = {event_name, disk_uuid, raid_slot, NULL}; | ||
376 | int len; | ||
377 | |||
378 | len = snprintf(disk_uuid, 64, "DEVICE_UUID="); | ||
379 | pretty_uuid(disk_uuid + len, cmsg->uuid); | ||
380 | snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot); | ||
381 | pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot); | ||
382 | init_completion(&cinfo->newdisk_completion); | ||
383 | kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp); | ||
384 | wait_for_completion_timeout(&cinfo->newdisk_completion, | ||
385 | NEW_DEV_TIMEOUT); | ||
386 | } | ||
387 | |||
388 | |||
389 | static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg) | ||
390 | { | ||
391 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
392 | |||
393 | md_reload_sb(mddev); | ||
394 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); | ||
395 | } | ||
396 | |||
361 | static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) | 397 | static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) |
362 | { | 398 | { |
363 | switch (msg->type) { | 399 | switch (msg->type) { |
364 | case METADATA_UPDATED: | 400 | case METADATA_UPDATED: |
365 | pr_info("%s: %d Received message: METADATA_UPDATE from %d\n", | 401 | pr_info("%s: %d Received message: METADATA_UPDATE from %d\n", |
366 | __func__, __LINE__, msg->slot); | 402 | __func__, __LINE__, msg->slot); |
367 | md_reload_sb(mddev); | 403 | process_metadata_update(mddev, msg); |
368 | break; | 404 | break; |
369 | case RESYNCING: | 405 | case RESYNCING: |
370 | pr_info("%s: %d Received message: RESYNCING from %d\n", | 406 | pr_info("%s: %d Received message: RESYNCING from %d\n", |
@@ -372,6 +408,10 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) | |||
372 | process_suspend_info(mddev->cluster_info, msg->slot, | 408 | process_suspend_info(mddev->cluster_info, msg->slot, |
373 | msg->low, msg->high); | 409 | msg->low, msg->high); |
374 | break; | 410 | break; |
411 | case NEWDISK: | ||
412 | pr_info("%s: %d Received message: NEWDISK from %d\n", | ||
413 | __func__, __LINE__, msg->slot); | ||
414 | process_add_new_disk(mddev, msg); | ||
375 | }; | 415 | }; |
376 | } | 416 | } |
377 | 417 | ||
@@ -593,10 +633,18 @@ static int join(struct mddev *mddev, int nodes) | |||
593 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); | 633 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); |
594 | if (!cinfo->ack_lockres) | 634 | if (!cinfo->ack_lockres) |
595 | goto err; | 635 | goto err; |
636 | cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); | ||
637 | if (!cinfo->no_new_dev_lockres) | ||
638 | goto err; | ||
639 | |||
596 | /* get sync CR lock on ACK. */ | 640 | /* get sync CR lock on ACK. */ |
597 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) | 641 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) |
598 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", | 642 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", |
599 | ret); | 643 | ret); |
644 | /* get sync CR lock on no-new-dev. */ | ||
645 | if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) | ||
646 | pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); | ||
647 | |||
600 | 648 | ||
601 | pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); | 649 | pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); |
602 | snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); | 650 | snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); |
@@ -621,6 +669,7 @@ err: | |||
621 | lockres_free(cinfo->message_lockres); | 669 | lockres_free(cinfo->message_lockres); |
622 | lockres_free(cinfo->token_lockres); | 670 | lockres_free(cinfo->token_lockres); |
623 | lockres_free(cinfo->ack_lockres); | 671 | lockres_free(cinfo->ack_lockres); |
672 | lockres_free(cinfo->no_new_dev_lockres); | ||
624 | lockres_free(cinfo->bitmap_lockres); | 673 | lockres_free(cinfo->bitmap_lockres); |
625 | lockres_free(cinfo->sb_lock); | 674 | lockres_free(cinfo->sb_lock); |
626 | if (cinfo->lockspace) | 675 | if (cinfo->lockspace) |
@@ -642,6 +691,7 @@ static int leave(struct mddev *mddev) | |||
642 | lockres_free(cinfo->message_lockres); | 691 | lockres_free(cinfo->message_lockres); |
643 | lockres_free(cinfo->token_lockres); | 692 | lockres_free(cinfo->token_lockres); |
644 | lockres_free(cinfo->ack_lockres); | 693 | lockres_free(cinfo->ack_lockres); |
694 | lockres_free(cinfo->no_new_dev_lockres); | ||
645 | lockres_free(cinfo->sb_lock); | 695 | lockres_free(cinfo->sb_lock); |
646 | lockres_free(cinfo->bitmap_lockres); | 696 | lockres_free(cinfo->bitmap_lockres); |
647 | dlm_release_lockspace(cinfo->lockspace, 2); | 697 | dlm_release_lockspace(cinfo->lockspace, 2); |
@@ -742,6 +792,55 @@ out: | |||
742 | return ret; | 792 | return ret; |
743 | } | 793 | } |
744 | 794 | ||
795 | static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) | ||
796 | { | ||
797 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
798 | struct cluster_msg cmsg; | ||
799 | int ret = 0; | ||
800 | struct mdp_superblock_1 *sb = page_address(rdev->sb_page); | ||
801 | char *uuid = sb->device_uuid; | ||
802 | |||
803 | memset(&cmsg, 0, sizeof(cmsg)); | ||
804 | cmsg.type = cpu_to_le32(NEWDISK); | ||
805 | memcpy(cmsg.uuid, uuid, 16); | ||
806 | cmsg.raid_slot = rdev->desc_nr; | ||
807 | lock_comm(cinfo); | ||
808 | ret = __sendmsg(cinfo, &cmsg); | ||
809 | if (ret) | ||
810 | return ret; | ||
811 | cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; | ||
812 | ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); | ||
813 | cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; | ||
814 | /* Some node does not "see" the device */ | ||
815 | if (ret == -EAGAIN) | ||
816 | ret = -ENOENT; | ||
817 | else | ||
818 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); | ||
819 | return ret; | ||
820 | } | ||
821 | |||
822 | static int add_new_disk_finish(struct mddev *mddev) | ||
823 | { | ||
824 | struct cluster_msg cmsg; | ||
825 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
826 | int ret; | ||
827 | /* Write sb and inform others */ | ||
828 | md_update_sb(mddev, 1); | ||
829 | cmsg.type = METADATA_UPDATED; | ||
830 | ret = __sendmsg(cinfo, &cmsg); | ||
831 | unlock_comm(cinfo); | ||
832 | return ret; | ||
833 | } | ||
834 | |||
835 | static void new_disk_ack(struct mddev *mddev, bool ack) | ||
836 | { | ||
837 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
838 | |||
839 | if (ack) | ||
840 | dlm_unlock_sync(cinfo->no_new_dev_lockres); | ||
841 | complete(&cinfo->newdisk_completion); | ||
842 | } | ||
843 | |||
745 | static struct md_cluster_operations cluster_ops = { | 844 | static struct md_cluster_operations cluster_ops = { |
746 | .join = join, | 845 | .join = join, |
747 | .leave = leave, | 846 | .leave = leave, |
@@ -753,6 +852,9 @@ static struct md_cluster_operations cluster_ops = { | |||
753 | .metadata_update_finish = metadata_update_finish, | 852 | .metadata_update_finish = metadata_update_finish, |
754 | .metadata_update_cancel = metadata_update_cancel, | 853 | .metadata_update_cancel = metadata_update_cancel, |
755 | .area_resyncing = area_resyncing, | 854 | .area_resyncing = area_resyncing, |
855 | .add_new_disk_start = add_new_disk_start, | ||
856 | .add_new_disk_finish = add_new_disk_finish, | ||
857 | .new_disk_ack = new_disk_ack, | ||
756 | }; | 858 | }; |
757 | 859 | ||
758 | static int __init cluster_init(void) | 860 | static int __init cluster_init(void) |
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 03785402afaa..60d7e58964f5 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include "md.h" | 6 | #include "md.h" |
7 | 7 | ||
8 | struct mddev; | 8 | struct mddev; |
9 | struct md_rdev; | ||
9 | 10 | ||
10 | struct md_cluster_operations { | 11 | struct md_cluster_operations { |
11 | int (*join)(struct mddev *mddev, int nodes); | 12 | int (*join)(struct mddev *mddev, int nodes); |
@@ -18,6 +19,9 @@ struct md_cluster_operations { | |||
18 | int (*metadata_update_finish)(struct mddev *mddev); | 19 | int (*metadata_update_finish)(struct mddev *mddev); |
19 | int (*metadata_update_cancel)(struct mddev *mddev); | 20 | int (*metadata_update_cancel)(struct mddev *mddev); |
20 | int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); | 21 | int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); |
22 | int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); | ||
23 | int (*add_new_disk_finish)(struct mddev *mddev); | ||
24 | void (*new_disk_ack)(struct mddev *mddev, bool ack); | ||
21 | }; | 25 | }; |
22 | 26 | ||
23 | #endif /* _MD_CLUSTER_H */ | 27 | #endif /* _MD_CLUSTER_H */ |
diff --git a/drivers/md/md.c b/drivers/md/md.c index fe0484648de4..5703c2e89f3a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -2210,7 +2210,7 @@ static void sync_sbs(struct mddev *mddev, int nospares) | |||
2210 | } | 2210 | } |
2211 | } | 2211 | } |
2212 | 2212 | ||
2213 | static void md_update_sb(struct mddev *mddev, int force_change) | 2213 | void md_update_sb(struct mddev *mddev, int force_change) |
2214 | { | 2214 | { |
2215 | struct md_rdev *rdev; | 2215 | struct md_rdev *rdev; |
2216 | int sync_req; | 2216 | int sync_req; |
@@ -2371,6 +2371,7 @@ repeat: | |||
2371 | wake_up(&rdev->blocked_wait); | 2371 | wake_up(&rdev->blocked_wait); |
2372 | } | 2372 | } |
2373 | } | 2373 | } |
2374 | EXPORT_SYMBOL(md_update_sb); | ||
2374 | 2375 | ||
2375 | /* words written to sysfs files may, or may not, be \n terminated. | 2376 | /* words written to sysfs files may, or may not, be \n terminated. |
2376 | * We want to accept with case. For this we use cmd_match. | 2377 | * We want to accept with case. For this we use cmd_match. |
@@ -3151,7 +3152,7 @@ static void analyze_sbs(struct mddev *mddev) | |||
3151 | kick_rdev_from_array(rdev); | 3152 | kick_rdev_from_array(rdev); |
3152 | continue; | 3153 | continue; |
3153 | } | 3154 | } |
3154 | if (rdev != freshest) | 3155 | if (rdev != freshest) { |
3155 | if (super_types[mddev->major_version]. | 3156 | if (super_types[mddev->major_version]. |
3156 | validate_super(mddev, rdev)) { | 3157 | validate_super(mddev, rdev)) { |
3157 | printk(KERN_WARNING "md: kicking non-fresh %s" | 3158 | printk(KERN_WARNING "md: kicking non-fresh %s" |
@@ -3160,6 +3161,15 @@ static void analyze_sbs(struct mddev *mddev) | |||
3160 | kick_rdev_from_array(rdev); | 3161 | kick_rdev_from_array(rdev); |
3161 | continue; | 3162 | continue; |
3162 | } | 3163 | } |
3164 | /* No device should have a Candidate flag | ||
3165 | * when reading devices | ||
3166 | */ | ||
3167 | if (test_bit(Candidate, &rdev->flags)) { | ||
3168 | pr_info("md: kicking Cluster Candidate %s from array!\n", | ||
3169 | bdevname(rdev->bdev, b)); | ||
3170 | kick_rdev_from_array(rdev); | ||
3171 | } | ||
3172 | } | ||
3163 | if (mddev->level == LEVEL_MULTIPATH) { | 3173 | if (mddev->level == LEVEL_MULTIPATH) { |
3164 | rdev->desc_nr = i++; | 3174 | rdev->desc_nr = i++; |
3165 | rdev->raid_disk = rdev->desc_nr; | 3175 | rdev->raid_disk = rdev->desc_nr; |
@@ -5655,7 +5665,6 @@ static int get_array_info(struct mddev *mddev, void __user *arg) | |||
5655 | info.state |= (1<<MD_SB_BITMAP_PRESENT); | 5665 | info.state |= (1<<MD_SB_BITMAP_PRESENT); |
5656 | if (mddev_is_clustered(mddev)) | 5666 | if (mddev_is_clustered(mddev)) |
5657 | info.state |= (1<<MD_SB_CLUSTERED); | 5667 | info.state |= (1<<MD_SB_CLUSTERED); |
5658 | |||
5659 | info.active_disks = insync; | 5668 | info.active_disks = insync; |
5660 | info.working_disks = working; | 5669 | info.working_disks = working; |
5661 | info.failed_disks = failed; | 5670 | info.failed_disks = failed; |
@@ -5744,6 +5753,13 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
5744 | struct md_rdev *rdev; | 5753 | struct md_rdev *rdev; |
5745 | dev_t dev = MKDEV(info->major,info->minor); | 5754 | dev_t dev = MKDEV(info->major,info->minor); |
5746 | 5755 | ||
5756 | if (mddev_is_clustered(mddev) && | ||
5757 | !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) { | ||
5758 | pr_err("%s: Cannot add to clustered mddev. Try --cluster-add\n", | ||
5759 | mdname(mddev)); | ||
5760 | return -EINVAL; | ||
5761 | } | ||
5762 | |||
5747 | if (info->major != MAJOR(dev) || info->minor != MINOR(dev)) | 5763 | if (info->major != MAJOR(dev) || info->minor != MINOR(dev)) |
5748 | return -EOVERFLOW; | 5764 | return -EOVERFLOW; |
5749 | 5765 | ||
@@ -5830,6 +5846,25 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
5830 | else | 5846 | else |
5831 | clear_bit(WriteMostly, &rdev->flags); | 5847 | clear_bit(WriteMostly, &rdev->flags); |
5832 | 5848 | ||
5849 | /* | ||
5850 | * check whether the device shows up in other nodes | ||
5851 | */ | ||
5852 | if (mddev_is_clustered(mddev)) { | ||
5853 | if (info->state & (1 << MD_DISK_CANDIDATE)) { | ||
5854 | /* Through --cluster-confirm */ | ||
5855 | set_bit(Candidate, &rdev->flags); | ||
5856 | md_cluster_ops->new_disk_ack(mddev, true); | ||
5857 | } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { | ||
5858 | /* --add initiated by this node */ | ||
5859 | err = md_cluster_ops->add_new_disk_start(mddev, rdev); | ||
5860 | if (err) { | ||
5861 | md_cluster_ops->add_new_disk_finish(mddev); | ||
5862 | export_rdev(rdev); | ||
5863 | return err; | ||
5864 | } | ||
5865 | } | ||
5866 | } | ||
5867 | |||
5833 | rdev->raid_disk = -1; | 5868 | rdev->raid_disk = -1; |
5834 | err = bind_rdev_to_array(rdev, mddev); | 5869 | err = bind_rdev_to_array(rdev, mddev); |
5835 | if (!err && !mddev->pers->hot_remove_disk) { | 5870 | if (!err && !mddev->pers->hot_remove_disk) { |
@@ -5855,6 +5890,9 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
5855 | if (!err) | 5890 | if (!err) |
5856 | md_new_event(mddev); | 5891 | md_new_event(mddev); |
5857 | md_wakeup_thread(mddev->thread); | 5892 | md_wakeup_thread(mddev->thread); |
5893 | if (mddev_is_clustered(mddev) && | ||
5894 | (info->state & (1 << MD_DISK_CLUSTER_ADD))) | ||
5895 | md_cluster_ops->add_new_disk_finish(mddev); | ||
5858 | return err; | 5896 | return err; |
5859 | } | 5897 | } |
5860 | 5898 | ||
@@ -6456,6 +6494,7 @@ static inline bool md_ioctl_valid(unsigned int cmd) | |||
6456 | case SET_DISK_FAULTY: | 6494 | case SET_DISK_FAULTY: |
6457 | case STOP_ARRAY: | 6495 | case STOP_ARRAY: |
6458 | case STOP_ARRAY_RO: | 6496 | case STOP_ARRAY_RO: |
6497 | case CLUSTERED_DISK_NACK: | ||
6459 | return true; | 6498 | return true; |
6460 | default: | 6499 | default: |
6461 | return false; | 6500 | return false; |
@@ -6728,6 +6767,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6728 | goto unlock; | 6767 | goto unlock; |
6729 | } | 6768 | } |
6730 | 6769 | ||
6770 | case CLUSTERED_DISK_NACK: | ||
6771 | if (mddev_is_clustered(mddev)) | ||
6772 | md_cluster_ops->new_disk_ack(mddev, false); | ||
6773 | else | ||
6774 | err = -EINVAL; | ||
6775 | goto unlock; | ||
6776 | |||
6731 | case HOT_ADD_DISK: | 6777 | case HOT_ADD_DISK: |
6732 | err = hot_add_disk(mddev, new_decode_dev(arg)); | 6778 | err = hot_add_disk(mddev, new_decode_dev(arg)); |
6733 | goto unlock; | 6779 | goto unlock; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index bfebcfdf54e6..6dc0ce09f50c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -171,6 +171,10 @@ enum flag_bits { | |||
171 | * a want_replacement device with same | 171 | * a want_replacement device with same |
172 | * raid_disk number. | 172 | * raid_disk number. |
173 | */ | 173 | */ |
174 | Candidate, /* For clustered environments only: | ||
175 | * This device is seen locally but not | ||
176 | * by the whole cluster | ||
177 | */ | ||
174 | }; | 178 | }; |
175 | 179 | ||
176 | #define BB_LEN_MASK (0x00000000000001FFULL) | 180 | #define BB_LEN_MASK (0x00000000000001FFULL) |
@@ -666,6 +670,7 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | |||
666 | 670 | ||
667 | extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); | 671 | extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); |
668 | extern void md_reload_sb(struct mddev *mddev); | 672 | extern void md_reload_sb(struct mddev *mddev); |
673 | extern void md_update_sb(struct mddev *mddev, int force); | ||
669 | static inline int mddev_check_plugged(struct mddev *mddev) | 674 | static inline int mddev_check_plugged(struct mddev *mddev) |
670 | { | 675 | { |
671 | return !!blk_check_plugged(md_unplug, mddev, | 676 | return !!blk_check_plugged(md_unplug, mddev, |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f70d74189d16..53ed5d48308f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1571,6 +1571,7 @@ static int raid1_spare_active(struct mddev *mddev) | |||
1571 | struct md_rdev *rdev = conf->mirrors[i].rdev; | 1571 | struct md_rdev *rdev = conf->mirrors[i].rdev; |
1572 | struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; | 1572 | struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; |
1573 | if (repl | 1573 | if (repl |
1574 | && !test_bit(Candidate, &repl->flags) | ||
1574 | && repl->recovery_offset == MaxSector | 1575 | && repl->recovery_offset == MaxSector |
1575 | && !test_bit(Faulty, &repl->flags) | 1576 | && !test_bit(Faulty, &repl->flags) |
1576 | && !test_and_set_bit(In_sync, &repl->flags)) { | 1577 | && !test_and_set_bit(In_sync, &repl->flags)) { |
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 643489d33e68..2ae6131e69a5 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h | |||
@@ -78,6 +78,12 @@ | |||
78 | #define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ | 78 | #define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ |
79 | #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ | 79 | #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ |
80 | #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ | 80 | #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ |
81 | #define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster | ||
82 | * For clustered enviroments only. | ||
83 | */ | ||
84 | #define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed | ||
85 | * For clustered enviroments only. | ||
86 | */ | ||
81 | 87 | ||
82 | #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. | 88 | #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. |
83 | * read requests will only be sent here in | 89 | * read requests will only be sent here in |
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 74e7c60c4716..1cb8aa6850b5 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h | |||
@@ -62,6 +62,7 @@ | |||
62 | #define STOP_ARRAY _IO (MD_MAJOR, 0x32) | 62 | #define STOP_ARRAY _IO (MD_MAJOR, 0x32) |
63 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) | 63 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) |
64 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) | 64 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) |
65 | #define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35) | ||
65 | 66 | ||
66 | /* 63 partitions with the alternate major number (mdp) */ | 67 | /* 63 partitions with the alternate major number (mdp) */ |
67 | #define MdpMinorShift 6 | 68 | #define MdpMinorShift 6 |