aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2015-10-13 16:09:52 -0400
committerNeilBrown <neilb@suse.com>2015-10-13 16:09:52 -0400
commitc2a06c38d92d044a69a3eae0138ab95ff0788030 (patch)
treee193af1aaf9ea876dedf6db38ded53f8d7a7f9b4 /drivers/md/raid1.c
parent25cb62b76430a91cc6195f902e61c2cb84ade622 (diff)
parent23b63f9fa82eed128b5c585cbfe10ced82d73e91 (diff)
Merge branch 'md-next' of git://github.com/goldwynr/linux into for-next
md-cluster: A better way for METADATA_UPDATED processing The processing of METADATA_UPDATED message is too simple and prone to errors. Besides, it would not update the internal data structures as required. This set of patches reads the superblock from one of the device of the MD and checks for changes in the in-memory data structures. If there is a change, it performs the necessary actions to keep the internal data structures as it would be in the primary node. An example is if a devices turns faulty. The algorithm is: 1. The initiator node marks the device as faulty and updates the superblock 2. The initiator node sends METADATA_UPDATED with an advisory device number to the rest of the nodes. 3. The receiving node on receiving the METADATA_UPDATED message 3.1 Reads the superblock 3.2 Detects a device has failed by comparing with memory structure 3.3 Calls the necessary functions to record the failure and get the device out of the active array. 3.4 Acknowledges the message. The patch series also fixes adding the disk which was impacted because of the changes. Patches can also be found at https://github.com/goldwynr/linux branch md-next Changes since V2: - Fix status synchrnoization after --add and --re-add operations - Included Guoqing's patches on endian correctness, zeroing cmsg etc - Restructure add_new_disk() and cancel()
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c33
1 files changed, 32 insertions, 1 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ddd8a5f572aa..ce2d797f8787 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
90#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) 90#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
91#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) 91#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
92#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) 92#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
93#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
94#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
93#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) 95#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
94 96
95static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) 97static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
@@ -1590,6 +1592,15 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1590 if (rdev->raid_disk >= 0) 1592 if (rdev->raid_disk >= 0)
1591 first = last = rdev->raid_disk; 1593 first = last = rdev->raid_disk;
1592 1594
1595 /*
1596 * find the disk ... but prefer rdev->saved_raid_disk
1597 * if possible.
1598 */
1599 if (rdev->saved_raid_disk >= 0 &&
1600 rdev->saved_raid_disk >= first &&
1601 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
1602 first = last = rdev->saved_raid_disk;
1603
1593 for (mirror = first; mirror <= last; mirror++) { 1604 for (mirror = first; mirror <= last; mirror++) {
1594 p = conf->mirrors+mirror; 1605 p = conf->mirrors+mirror;
1595 if (!p->rdev) { 1606 if (!p->rdev) {
@@ -2488,6 +2499,11 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2488 2499
2489 bitmap_close_sync(mddev->bitmap); 2500 bitmap_close_sync(mddev->bitmap);
2490 close_sync(conf); 2501 close_sync(conf);
2502
2503 if (mddev_is_clustered(mddev)) {
2504 conf->cluster_sync_low = 0;
2505 conf->cluster_sync_high = 0;
2506 }
2491 return 0; 2507 return 0;
2492 } 2508 }
2493 2509
@@ -2508,7 +2524,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2508 return sync_blocks; 2524 return sync_blocks;
2509 } 2525 }
2510 2526
2511 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 2527 /* we are incrementing sector_nr below. To be safe, we check against
2528 * sector_nr + two times RESYNC_SECTORS
2529 */
2530
2531 bitmap_cond_end_sync(mddev->bitmap, sector_nr,
2532 mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
2512 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); 2533 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
2513 2534
2514 raise_barrier(conf, sector_nr); 2535 raise_barrier(conf, sector_nr);
@@ -2699,6 +2720,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2699 bio_full: 2720 bio_full:
2700 r1_bio->sectors = nr_sectors; 2721 r1_bio->sectors = nr_sectors;
2701 2722
2723 if (mddev_is_clustered(mddev) &&
2724 conf->cluster_sync_high < sector_nr + nr_sectors) {
2725 conf->cluster_sync_low = mddev->curr_resync_completed;
2726 conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
2727 /* Send resync message */
2728 md_cluster_ops->resync_info_update(mddev,
2729 conf->cluster_sync_low,
2730 conf->cluster_sync_high);
2731 }
2732
2702 /* For a user-requested sync, we read all readable devices and do a 2733 /* For a user-requested sync, we read all readable devices and do a
2703 * compare 2734 * compare
2704 */ 2735 */