diff options
| author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-10-29 19:51:31 -0400 |
|---|---|---|
| committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 10:59:07 -0500 |
| commit | 1aee41f637694d4bbf91c24195f2b63e3f6badd2 (patch) | |
| tree | b6f532e5a4265193dd5ebc547a0ab9f772143762 | |
| parent | 7d49ffcfa3cc08aa2301bf3fdb1e423a3fd33ee7 (diff) | |
Add new disk to clustered array
Algorithm:
1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
2. Node 1 sends NEWDISK with uuid and slot number
3. Other nodes issue kobject_uevent_env with uuid and slot number
(Steps 4,5 could be a udev rule)
4. In userspace, the node searches for the disk, perhaps
using blkid -t SUB_UUID=""
5. Other nodes issue either of the following depending on whether the disk
was found:
ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
disc.number set to slot number)
ioctl(CLUSTERED_DISK_NACK)
6. Other nodes drop lock on no-new-devs (CR) if device is found
7. Node 1 attempts EX lock on no-new-devs
8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
as SpareLocal
9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
10. Other nodes understand if the device is added or not by reading the superblock again after receiving the METADATA_UPDATED message.
Signed-off-by: Lidong Zhong <lzhong@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
| -rw-r--r-- | drivers/md/md-cluster.c | 104 | ||||
| -rw-r--r-- | drivers/md/md-cluster.h | 4 | ||||
| -rw-r--r-- | drivers/md/md.c | 52 | ||||
| -rw-r--r-- | drivers/md/md.h | 5 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 1 | ||||
| -rw-r--r-- | include/uapi/linux/raid/md_p.h | 6 | ||||
| -rw-r--r-- | include/uapi/linux/raid/md_u.h | 1 |
7 files changed, 169 insertions, 4 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index d85a6ca4443e..03e521a9ca7d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
| @@ -12,11 +12,13 @@ | |||
| 12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 13 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> |
| 14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
| 15 | #include <linux/raid/md_p.h> | ||
| 15 | #include "md.h" | 16 | #include "md.h" |
| 16 | #include "bitmap.h" | 17 | #include "bitmap.h" |
| 17 | #include "md-cluster.h" | 18 | #include "md-cluster.h" |
| 18 | 19 | ||
| 19 | #define LVB_SIZE 64 | 20 | #define LVB_SIZE 64 |
| 21 | #define NEW_DEV_TIMEOUT 5000 | ||
| 20 | 22 | ||
| 21 | struct dlm_lock_resource { | 23 | struct dlm_lock_resource { |
| 22 | dlm_lockspace_t *ls; | 24 | dlm_lockspace_t *ls; |
| @@ -56,19 +58,25 @@ struct md_cluster_info { | |||
| 56 | struct dlm_lock_resource *ack_lockres; | 58 | struct dlm_lock_resource *ack_lockres; |
| 57 | struct dlm_lock_resource *message_lockres; | 59 | struct dlm_lock_resource *message_lockres; |
| 58 | struct dlm_lock_resource *token_lockres; | 60 | struct dlm_lock_resource *token_lockres; |
| 61 | struct dlm_lock_resource *no_new_dev_lockres; | ||
| 59 | struct md_thread *recv_thread; | 62 | struct md_thread *recv_thread; |
| 63 | struct completion newdisk_completion; | ||
| 60 | }; | 64 | }; |
| 61 | 65 | ||
| 62 | enum msg_type { | 66 | enum msg_type { |
| 63 | METADATA_UPDATED = 0, | 67 | METADATA_UPDATED = 0, |
| 64 | RESYNCING, | 68 | RESYNCING, |
| 69 | NEWDISK, | ||
| 65 | }; | 70 | }; |
| 66 | 71 | ||
| 67 | struct cluster_msg { | 72 | struct cluster_msg { |
| 68 | int type; | 73 | int type; |
| 69 | int slot; | 74 | int slot; |
| 75 | /* TODO: Unionize this for smaller footprint */ | ||
| 70 | sector_t low; | 76 | sector_t low; |
| 71 | sector_t high; | 77 | sector_t high; |
| 78 | char uuid[16]; | ||
| 79 | int raid_slot; | ||
| 72 | }; | 80 | }; |
| 73 | 81 | ||
| 74 | static void sync_ast(void *arg) | 82 | static void sync_ast(void *arg) |
| @@ -358,13 +366,41 @@ static void process_suspend_info(struct md_cluster_info *cinfo, | |||
| 358 | spin_unlock_irq(&cinfo->suspend_lock); | 366 | spin_unlock_irq(&cinfo->suspend_lock); |
| 359 | } | 367 | } |
| 360 | 368 | ||
| 369 | static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) | ||
| 370 | { | ||
| 371 | char disk_uuid[64]; | ||
| 372 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 373 | char event_name[] = "EVENT=ADD_DEVICE"; | ||
| 374 | char raid_slot[16]; | ||
| 375 | char *envp[] = {event_name, disk_uuid, raid_slot, NULL}; | ||
| 376 | int len; | ||
| 377 | |||
| 378 | len = snprintf(disk_uuid, 64, "DEVICE_UUID="); | ||
| 379 | pretty_uuid(disk_uuid + len, cmsg->uuid); | ||
| 380 | snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot); | ||
| 381 | pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot); | ||
| 382 | init_completion(&cinfo->newdisk_completion); | ||
| 383 | kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp); | ||
| 384 | wait_for_completion_timeout(&cinfo->newdisk_completion, | ||
| 385 | NEW_DEV_TIMEOUT); | ||
| 386 | } | ||
| 387 | |||
| 388 | |||
| 389 | static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg) | ||
| 390 | { | ||
| 391 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 392 | |||
| 393 | md_reload_sb(mddev); | ||
| 394 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); | ||
| 395 | } | ||
| 396 | |||
| 361 | static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) | 397 | static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) |
| 362 | { | 398 | { |
| 363 | switch (msg->type) { | 399 | switch (msg->type) { |
| 364 | case METADATA_UPDATED: | 400 | case METADATA_UPDATED: |
| 365 | pr_info("%s: %d Received message: METADATA_UPDATE from %d\n", | 401 | pr_info("%s: %d Received message: METADATA_UPDATE from %d\n", |
| 366 | __func__, __LINE__, msg->slot); | 402 | __func__, __LINE__, msg->slot); |
| 367 | md_reload_sb(mddev); | 403 | process_metadata_update(mddev, msg); |
| 368 | break; | 404 | break; |
| 369 | case RESYNCING: | 405 | case RESYNCING: |
| 370 | pr_info("%s: %d Received message: RESYNCING from %d\n", | 406 | pr_info("%s: %d Received message: RESYNCING from %d\n", |
| @@ -372,6 +408,10 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) | |||
| 372 | process_suspend_info(mddev->cluster_info, msg->slot, | 408 | process_suspend_info(mddev->cluster_info, msg->slot, |
| 373 | msg->low, msg->high); | 409 | msg->low, msg->high); |
| 374 | break; | 410 | break; |
| 411 | case NEWDISK: | ||
| 412 | pr_info("%s: %d Received message: NEWDISK from %d\n", | ||
| 413 | __func__, __LINE__, msg->slot); | ||
| 414 | process_add_new_disk(mddev, msg); | ||
| 375 | }; | 415 | }; |
| 376 | } | 416 | } |
| 377 | 417 | ||
| @@ -593,10 +633,18 @@ static int join(struct mddev *mddev, int nodes) | |||
| 593 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); | 633 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); |
| 594 | if (!cinfo->ack_lockres) | 634 | if (!cinfo->ack_lockres) |
| 595 | goto err; | 635 | goto err; |
| 636 | cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); | ||
| 637 | if (!cinfo->no_new_dev_lockres) | ||
| 638 | goto err; | ||
| 639 | |||
| 596 | /* get sync CR lock on ACK. */ | 640 | /* get sync CR lock on ACK. */ |
| 597 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) | 641 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) |
| 598 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", | 642 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", |
| 599 | ret); | 643 | ret); |
| 644 | /* get sync CR lock on no-new-dev. */ | ||
| 645 | if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) | ||
| 646 | pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); | ||
| 647 | |||
| 600 | 648 | ||
| 601 | pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); | 649 | pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); |
| 602 | snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); | 650 | snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); |
| @@ -621,6 +669,7 @@ err: | |||
| 621 | lockres_free(cinfo->message_lockres); | 669 | lockres_free(cinfo->message_lockres); |
| 622 | lockres_free(cinfo->token_lockres); | 670 | lockres_free(cinfo->token_lockres); |
| 623 | lockres_free(cinfo->ack_lockres); | 671 | lockres_free(cinfo->ack_lockres); |
| 672 | lockres_free(cinfo->no_new_dev_lockres); | ||
| 624 | lockres_free(cinfo->bitmap_lockres); | 673 | lockres_free(cinfo->bitmap_lockres); |
| 625 | lockres_free(cinfo->sb_lock); | 674 | lockres_free(cinfo->sb_lock); |
| 626 | if (cinfo->lockspace) | 675 | if (cinfo->lockspace) |
| @@ -642,6 +691,7 @@ static int leave(struct mddev *mddev) | |||
| 642 | lockres_free(cinfo->message_lockres); | 691 | lockres_free(cinfo->message_lockres); |
| 643 | lockres_free(cinfo->token_lockres); | 692 | lockres_free(cinfo->token_lockres); |
| 644 | lockres_free(cinfo->ack_lockres); | 693 | lockres_free(cinfo->ack_lockres); |
| 694 | lockres_free(cinfo->no_new_dev_lockres); | ||
| 645 | lockres_free(cinfo->sb_lock); | 695 | lockres_free(cinfo->sb_lock); |
| 646 | lockres_free(cinfo->bitmap_lockres); | 696 | lockres_free(cinfo->bitmap_lockres); |
| 647 | dlm_release_lockspace(cinfo->lockspace, 2); | 697 | dlm_release_lockspace(cinfo->lockspace, 2); |
| @@ -742,6 +792,55 @@ out: | |||
| 742 | return ret; | 792 | return ret; |
| 743 | } | 793 | } |
| 744 | 794 | ||
| 795 | static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) | ||
| 796 | { | ||
| 797 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 798 | struct cluster_msg cmsg; | ||
| 799 | int ret = 0; | ||
| 800 | struct mdp_superblock_1 *sb = page_address(rdev->sb_page); | ||
| 801 | char *uuid = sb->device_uuid; | ||
| 802 | |||
| 803 | memset(&cmsg, 0, sizeof(cmsg)); | ||
| 804 | cmsg.type = cpu_to_le32(NEWDISK); | ||
| 805 | memcpy(cmsg.uuid, uuid, 16); | ||
| 806 | cmsg.raid_slot = rdev->desc_nr; | ||
| 807 | lock_comm(cinfo); | ||
| 808 | ret = __sendmsg(cinfo, &cmsg); | ||
| 809 | if (ret) | ||
| 810 | return ret; | ||
| 811 | cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; | ||
| 812 | ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); | ||
| 813 | cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; | ||
| 814 | /* Some node does not "see" the device */ | ||
| 815 | if (ret == -EAGAIN) | ||
| 816 | ret = -ENOENT; | ||
| 817 | else | ||
| 818 | dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); | ||
| 819 | return ret; | ||
| 820 | } | ||
| 821 | |||
| 822 | static int add_new_disk_finish(struct mddev *mddev) | ||
| 823 | { | ||
| 824 | struct cluster_msg cmsg; | ||
| 825 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 826 | int ret; | ||
| 827 | /* Write sb and inform others */ | ||
| 828 | md_update_sb(mddev, 1); | ||
| 829 | cmsg.type = METADATA_UPDATED; | ||
| 830 | ret = __sendmsg(cinfo, &cmsg); | ||
| 831 | unlock_comm(cinfo); | ||
| 832 | return ret; | ||
| 833 | } | ||
| 834 | |||
| 835 | static void new_disk_ack(struct mddev *mddev, bool ack) | ||
| 836 | { | ||
| 837 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 838 | |||
| 839 | if (ack) | ||
| 840 | dlm_unlock_sync(cinfo->no_new_dev_lockres); | ||
| 841 | complete(&cinfo->newdisk_completion); | ||
| 842 | } | ||
| 843 | |||
| 745 | static struct md_cluster_operations cluster_ops = { | 844 | static struct md_cluster_operations cluster_ops = { |
| 746 | .join = join, | 845 | .join = join, |
| 747 | .leave = leave, | 846 | .leave = leave, |
| @@ -753,6 +852,9 @@ static struct md_cluster_operations cluster_ops = { | |||
| 753 | .metadata_update_finish = metadata_update_finish, | 852 | .metadata_update_finish = metadata_update_finish, |
| 754 | .metadata_update_cancel = metadata_update_cancel, | 853 | .metadata_update_cancel = metadata_update_cancel, |
| 755 | .area_resyncing = area_resyncing, | 854 | .area_resyncing = area_resyncing, |
| 855 | .add_new_disk_start = add_new_disk_start, | ||
| 856 | .add_new_disk_finish = add_new_disk_finish, | ||
| 857 | .new_disk_ack = new_disk_ack, | ||
| 756 | }; | 858 | }; |
| 757 | 859 | ||
| 758 | static int __init cluster_init(void) | 860 | static int __init cluster_init(void) |
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 03785402afaa..60d7e58964f5 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "md.h" | 6 | #include "md.h" |
| 7 | 7 | ||
| 8 | struct mddev; | 8 | struct mddev; |
| 9 | struct md_rdev; | ||
| 9 | 10 | ||
| 10 | struct md_cluster_operations { | 11 | struct md_cluster_operations { |
| 11 | int (*join)(struct mddev *mddev, int nodes); | 12 | int (*join)(struct mddev *mddev, int nodes); |
| @@ -18,6 +19,9 @@ struct md_cluster_operations { | |||
| 18 | int (*metadata_update_finish)(struct mddev *mddev); | 19 | int (*metadata_update_finish)(struct mddev *mddev); |
| 19 | int (*metadata_update_cancel)(struct mddev *mddev); | 20 | int (*metadata_update_cancel)(struct mddev *mddev); |
| 20 | int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); | 21 | int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); |
| 22 | int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); | ||
| 23 | int (*add_new_disk_finish)(struct mddev *mddev); | ||
| 24 | void (*new_disk_ack)(struct mddev *mddev, bool ack); | ||
| 21 | }; | 25 | }; |
| 22 | 26 | ||
| 23 | #endif /* _MD_CLUSTER_H */ | 27 | #endif /* _MD_CLUSTER_H */ |
diff --git a/drivers/md/md.c b/drivers/md/md.c index fe0484648de4..5703c2e89f3a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -2210,7 +2210,7 @@ static void sync_sbs(struct mddev *mddev, int nospares) | |||
| 2210 | } | 2210 | } |
| 2211 | } | 2211 | } |
| 2212 | 2212 | ||
| 2213 | static void md_update_sb(struct mddev *mddev, int force_change) | 2213 | void md_update_sb(struct mddev *mddev, int force_change) |
| 2214 | { | 2214 | { |
| 2215 | struct md_rdev *rdev; | 2215 | struct md_rdev *rdev; |
| 2216 | int sync_req; | 2216 | int sync_req; |
| @@ -2371,6 +2371,7 @@ repeat: | |||
| 2371 | wake_up(&rdev->blocked_wait); | 2371 | wake_up(&rdev->blocked_wait); |
| 2372 | } | 2372 | } |
| 2373 | } | 2373 | } |
| 2374 | EXPORT_SYMBOL(md_update_sb); | ||
| 2374 | 2375 | ||
| 2375 | /* words written to sysfs files may, or may not, be \n terminated. | 2376 | /* words written to sysfs files may, or may not, be \n terminated. |
| 2376 | * We want to accept with case. For this we use cmd_match. | 2377 | * We want to accept with case. For this we use cmd_match. |
| @@ -3151,7 +3152,7 @@ static void analyze_sbs(struct mddev *mddev) | |||
| 3151 | kick_rdev_from_array(rdev); | 3152 | kick_rdev_from_array(rdev); |
| 3152 | continue; | 3153 | continue; |
| 3153 | } | 3154 | } |
| 3154 | if (rdev != freshest) | 3155 | if (rdev != freshest) { |
| 3155 | if (super_types[mddev->major_version]. | 3156 | if (super_types[mddev->major_version]. |
| 3156 | validate_super(mddev, rdev)) { | 3157 | validate_super(mddev, rdev)) { |
| 3157 | printk(KERN_WARNING "md: kicking non-fresh %s" | 3158 | printk(KERN_WARNING "md: kicking non-fresh %s" |
| @@ -3160,6 +3161,15 @@ static void analyze_sbs(struct mddev *mddev) | |||
| 3160 | kick_rdev_from_array(rdev); | 3161 | kick_rdev_from_array(rdev); |
| 3161 | continue; | 3162 | continue; |
| 3162 | } | 3163 | } |
| 3164 | /* No device should have a Candidate flag | ||
| 3165 | * when reading devices | ||
| 3166 | */ | ||
| 3167 | if (test_bit(Candidate, &rdev->flags)) { | ||
| 3168 | pr_info("md: kicking Cluster Candidate %s from array!\n", | ||
| 3169 | bdevname(rdev->bdev, b)); | ||
| 3170 | kick_rdev_from_array(rdev); | ||
| 3171 | } | ||
| 3172 | } | ||
| 3163 | if (mddev->level == LEVEL_MULTIPATH) { | 3173 | if (mddev->level == LEVEL_MULTIPATH) { |
| 3164 | rdev->desc_nr = i++; | 3174 | rdev->desc_nr = i++; |
| 3165 | rdev->raid_disk = rdev->desc_nr; | 3175 | rdev->raid_disk = rdev->desc_nr; |
| @@ -5655,7 +5665,6 @@ static int get_array_info(struct mddev *mddev, void __user *arg) | |||
| 5655 | info.state |= (1<<MD_SB_BITMAP_PRESENT); | 5665 | info.state |= (1<<MD_SB_BITMAP_PRESENT); |
| 5656 | if (mddev_is_clustered(mddev)) | 5666 | if (mddev_is_clustered(mddev)) |
| 5657 | info.state |= (1<<MD_SB_CLUSTERED); | 5667 | info.state |= (1<<MD_SB_CLUSTERED); |
| 5658 | |||
| 5659 | info.active_disks = insync; | 5668 | info.active_disks = insync; |
| 5660 | info.working_disks = working; | 5669 | info.working_disks = working; |
| 5661 | info.failed_disks = failed; | 5670 | info.failed_disks = failed; |
| @@ -5744,6 +5753,13 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
| 5744 | struct md_rdev *rdev; | 5753 | struct md_rdev *rdev; |
| 5745 | dev_t dev = MKDEV(info->major,info->minor); | 5754 | dev_t dev = MKDEV(info->major,info->minor); |
| 5746 | 5755 | ||
| 5756 | if (mddev_is_clustered(mddev) && | ||
| 5757 | !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) { | ||
| 5758 | pr_err("%s: Cannot add to clustered mddev. Try --cluster-add\n", | ||
| 5759 | mdname(mddev)); | ||
| 5760 | return -EINVAL; | ||
| 5761 | } | ||
| 5762 | |||
| 5747 | if (info->major != MAJOR(dev) || info->minor != MINOR(dev)) | 5763 | if (info->major != MAJOR(dev) || info->minor != MINOR(dev)) |
| 5748 | return -EOVERFLOW; | 5764 | return -EOVERFLOW; |
| 5749 | 5765 | ||
| @@ -5830,6 +5846,25 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
| 5830 | else | 5846 | else |
| 5831 | clear_bit(WriteMostly, &rdev->flags); | 5847 | clear_bit(WriteMostly, &rdev->flags); |
| 5832 | 5848 | ||
| 5849 | /* | ||
| 5850 | * check whether the device shows up in other nodes | ||
| 5851 | */ | ||
| 5852 | if (mddev_is_clustered(mddev)) { | ||
| 5853 | if (info->state & (1 << MD_DISK_CANDIDATE)) { | ||
| 5854 | /* Through --cluster-confirm */ | ||
| 5855 | set_bit(Candidate, &rdev->flags); | ||
| 5856 | md_cluster_ops->new_disk_ack(mddev, true); | ||
| 5857 | } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { | ||
| 5858 | /* --add initiated by this node */ | ||
| 5859 | err = md_cluster_ops->add_new_disk_start(mddev, rdev); | ||
| 5860 | if (err) { | ||
| 5861 | md_cluster_ops->add_new_disk_finish(mddev); | ||
| 5862 | export_rdev(rdev); | ||
| 5863 | return err; | ||
| 5864 | } | ||
| 5865 | } | ||
| 5866 | } | ||
| 5867 | |||
| 5833 | rdev->raid_disk = -1; | 5868 | rdev->raid_disk = -1; |
| 5834 | err = bind_rdev_to_array(rdev, mddev); | 5869 | err = bind_rdev_to_array(rdev, mddev); |
| 5835 | if (!err && !mddev->pers->hot_remove_disk) { | 5870 | if (!err && !mddev->pers->hot_remove_disk) { |
| @@ -5855,6 +5890,9 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
| 5855 | if (!err) | 5890 | if (!err) |
| 5856 | md_new_event(mddev); | 5891 | md_new_event(mddev); |
| 5857 | md_wakeup_thread(mddev->thread); | 5892 | md_wakeup_thread(mddev->thread); |
| 5893 | if (mddev_is_clustered(mddev) && | ||
| 5894 | (info->state & (1 << MD_DISK_CLUSTER_ADD))) | ||
| 5895 | md_cluster_ops->add_new_disk_finish(mddev); | ||
| 5858 | return err; | 5896 | return err; |
| 5859 | } | 5897 | } |
| 5860 | 5898 | ||
| @@ -6456,6 +6494,7 @@ static inline bool md_ioctl_valid(unsigned int cmd) | |||
| 6456 | case SET_DISK_FAULTY: | 6494 | case SET_DISK_FAULTY: |
| 6457 | case STOP_ARRAY: | 6495 | case STOP_ARRAY: |
| 6458 | case STOP_ARRAY_RO: | 6496 | case STOP_ARRAY_RO: |
| 6497 | case CLUSTERED_DISK_NACK: | ||
| 6459 | return true; | 6498 | return true; |
| 6460 | default: | 6499 | default: |
| 6461 | return false; | 6500 | return false; |
| @@ -6728,6 +6767,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 6728 | goto unlock; | 6767 | goto unlock; |
| 6729 | } | 6768 | } |
| 6730 | 6769 | ||
| 6770 | case CLUSTERED_DISK_NACK: | ||
| 6771 | if (mddev_is_clustered(mddev)) | ||
| 6772 | md_cluster_ops->new_disk_ack(mddev, false); | ||
| 6773 | else | ||
| 6774 | err = -EINVAL; | ||
| 6775 | goto unlock; | ||
| 6776 | |||
| 6731 | case HOT_ADD_DISK: | 6777 | case HOT_ADD_DISK: |
| 6732 | err = hot_add_disk(mddev, new_decode_dev(arg)); | 6778 | err = hot_add_disk(mddev, new_decode_dev(arg)); |
| 6733 | goto unlock; | 6779 | goto unlock; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index bfebcfdf54e6..6dc0ce09f50c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -171,6 +171,10 @@ enum flag_bits { | |||
| 171 | * a want_replacement device with same | 171 | * a want_replacement device with same |
| 172 | * raid_disk number. | 172 | * raid_disk number. |
| 173 | */ | 173 | */ |
| 174 | Candidate, /* For clustered environments only: | ||
| 175 | * This device is seen locally but not | ||
| 176 | * by the whole cluster | ||
| 177 | */ | ||
| 174 | }; | 178 | }; |
| 175 | 179 | ||
| 176 | #define BB_LEN_MASK (0x00000000000001FFULL) | 180 | #define BB_LEN_MASK (0x00000000000001FFULL) |
| @@ -666,6 +670,7 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | |||
| 666 | 670 | ||
| 667 | extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); | 671 | extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); |
| 668 | extern void md_reload_sb(struct mddev *mddev); | 672 | extern void md_reload_sb(struct mddev *mddev); |
| 673 | extern void md_update_sb(struct mddev *mddev, int force); | ||
| 669 | static inline int mddev_check_plugged(struct mddev *mddev) | 674 | static inline int mddev_check_plugged(struct mddev *mddev) |
| 670 | { | 675 | { |
| 671 | return !!blk_check_plugged(md_unplug, mddev, | 676 | return !!blk_check_plugged(md_unplug, mddev, |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f70d74189d16..53ed5d48308f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1571,6 +1571,7 @@ static int raid1_spare_active(struct mddev *mddev) | |||
| 1571 | struct md_rdev *rdev = conf->mirrors[i].rdev; | 1571 | struct md_rdev *rdev = conf->mirrors[i].rdev; |
| 1572 | struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; | 1572 | struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; |
| 1573 | if (repl | 1573 | if (repl |
| 1574 | && !test_bit(Candidate, &repl->flags) | ||
| 1574 | && repl->recovery_offset == MaxSector | 1575 | && repl->recovery_offset == MaxSector |
| 1575 | && !test_bit(Faulty, &repl->flags) | 1576 | && !test_bit(Faulty, &repl->flags) |
| 1576 | && !test_and_set_bit(In_sync, &repl->flags)) { | 1577 | && !test_and_set_bit(In_sync, &repl->flags)) { |
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 643489d33e68..2ae6131e69a5 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h | |||
| @@ -78,6 +78,12 @@ | |||
| 78 | #define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ | 78 | #define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ |
| 79 | #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ | 79 | #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ |
| 80 | #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ | 80 | #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ |
| 81 | #define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster | ||
| 82 | * For clustered enviroments only. | ||
| 83 | */ | ||
| 84 | #define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed | ||
| 85 | * For clustered enviroments only. | ||
| 86 | */ | ||
| 81 | 87 | ||
| 82 | #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. | 88 | #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. |
| 83 | * read requests will only be sent here in | 89 | * read requests will only be sent here in |
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 74e7c60c4716..1cb8aa6850b5 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h | |||
| @@ -62,6 +62,7 @@ | |||
| 62 | #define STOP_ARRAY _IO (MD_MAJOR, 0x32) | 62 | #define STOP_ARRAY _IO (MD_MAJOR, 0x32) |
| 63 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) | 63 | #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) |
| 64 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) | 64 | #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) |
| 65 | #define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35) | ||
| 65 | 66 | ||
| 66 | /* 63 partitions with the alternate major number (mdp) */ | 67 | /* 63 partitions with the alternate major number (mdp) */ |
| 67 | #define MdpMinorShift 6 | 68 | #define MdpMinorShift 6 |
