aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/md.c176
-rw-r--r--drivers/md/md.h12
-rw-r--r--drivers/md/multipath.c5
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c4
-rw-r--r--drivers/md/raid5.c85
8 files changed, 188 insertions, 104 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5810fa906af0..5fe39c2a3d2b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev)
220 mddev->queue->unplug_fn = linear_unplug; 220 mddev->queue->unplug_fn = linear_unplug;
221 mddev->queue->backing_dev_info.congested_fn = linear_congested; 221 mddev->queue->backing_dev_info.congested_fn = linear_congested;
222 mddev->queue->backing_dev_info.congested_data = mddev; 222 mddev->queue->backing_dev_info.congested_data = mddev;
223 md_integrity_register(mddev);
223 return 0; 224 return 0;
224} 225}
225 226
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
256 rcu_assign_pointer(mddev->private, newconf); 257 rcu_assign_pointer(mddev->private, newconf);
257 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 258 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
258 set_capacity(mddev->gendisk, mddev->array_sectors); 259 set_capacity(mddev->gendisk, mddev->array_sectors);
260 revalidate_disk(mddev->gendisk);
259 call_rcu(&oldconf->rcu, free_conf); 261 call_rcu(&oldconf->rcu, free_conf);
260 return 0; 262 return 0;
261} 263}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d4351ff0849f..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1308 } 1309 }
1309 if (mddev->level != LEVEL_MULTIPATH) { 1310 if (mddev->level != LEVEL_MULTIPATH) {
1310 int role; 1311 int role;
1311 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 1312 if (rdev->desc_nr < 0 ||
1313 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1314 role = 0xffff;
1315 rdev->desc_nr = -1;
1316 } else
1317 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1312 switch(role) { 1318 switch(role) {
1313 case 0xffff: /* spare */ 1319 case 0xffff: /* spare */
1314 break; 1320 break;
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1394 if (rdev2->desc_nr+1 > max_dev) 1400 if (rdev2->desc_nr+1 > max_dev)
1395 max_dev = rdev2->desc_nr+1; 1401 max_dev = rdev2->desc_nr+1;
1396 1402
1397 if (max_dev > le32_to_cpu(sb->max_dev)) 1403 if (max_dev > le32_to_cpu(sb->max_dev)) {
1404 int bmask;
1398 sb->max_dev = cpu_to_le32(max_dev); 1405 sb->max_dev = cpu_to_le32(max_dev);
1406 rdev->sb_size = max_dev * 2 + 256;
1407 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1408 if (rdev->sb_size & bmask)
1409 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1410 }
1399 for (i=0; i<max_dev;i++) 1411 for (i=0; i<max_dev;i++)
1400 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1412 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1401 1413
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1487 1499
1488static LIST_HEAD(pending_raid_disks); 1500static LIST_HEAD(pending_raid_disks);
1489 1501
1490static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) 1502/*
1503 * Try to register data integrity profile for an mddev
1504 *
1505 * This is called when an array is started and after a disk has been kicked
1506 * from the array. It only succeeds if all working and active component devices
1507 * are integrity capable with matching profiles.
1508 */
1509int md_integrity_register(mddev_t *mddev)
1510{
1511 mdk_rdev_t *rdev, *reference = NULL;
1512
1513 if (list_empty(&mddev->disks))
1514 return 0; /* nothing to do */
1515 if (blk_get_integrity(mddev->gendisk))
1516 return 0; /* already registered */
1517 list_for_each_entry(rdev, &mddev->disks, same_set) {
1518 /* skip spares and non-functional disks */
1519 if (test_bit(Faulty, &rdev->flags))
1520 continue;
1521 if (rdev->raid_disk < 0)
1522 continue;
1523 /*
1524 * If at least one rdev is not integrity capable, we can not
1525 * enable data integrity for the md device.
1526 */
1527 if (!bdev_get_integrity(rdev->bdev))
1528 return -EINVAL;
1529 if (!reference) {
1530 /* Use the first rdev as the reference */
1531 reference = rdev;
1532 continue;
1533 }
1534 /* does this rdev's profile match the reference profile? */
1535 if (blk_integrity_compare(reference->bdev->bd_disk,
1536 rdev->bdev->bd_disk) < 0)
1537 return -EINVAL;
1538 }
1539 /*
1540 * All component devices are integrity capable and have matching
1541 * profiles, register the common profile for the md device.
1542 */
1543 if (blk_integrity_register(mddev->gendisk,
1544 bdev_get_integrity(reference->bdev)) != 0) {
1545 printk(KERN_ERR "md: failed to register integrity for %s\n",
1546 mdname(mddev));
1547 return -EINVAL;
1548 }
1549 printk(KERN_NOTICE "md: data integrity on %s enabled\n",
1550 mdname(mddev));
1551 return 0;
1552}
1553EXPORT_SYMBOL(md_integrity_register);
1554
1555/* Disable data integrity if non-capable/non-matching disk is being added */
1556void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1491{ 1557{
1492 struct mdk_personality *pers = mddev->pers;
1493 struct gendisk *disk = mddev->gendisk;
1494 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); 1558 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1495 struct blk_integrity *bi_mddev = blk_get_integrity(disk); 1559 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1496 1560
1497 /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ 1561 if (!bi_mddev) /* nothing to do */
1498 if (pers && pers->level >= 4 && pers->level <= 6)
1499 return; 1562 return;
1500 1563 if (rdev->raid_disk < 0) /* skip spares */
1501 /* If rdev is integrity capable, register profile for mddev */
1502 if (!bi_mddev && bi_rdev) {
1503 if (blk_integrity_register(disk, bi_rdev))
1504 printk(KERN_ERR "%s: %s Could not register integrity!\n",
1505 __func__, disk->disk_name);
1506 else
1507 printk(KERN_NOTICE "Enabling data integrity on %s\n",
1508 disk->disk_name);
1509 return; 1564 return;
1510 } 1565 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1511 1566 rdev->bdev->bd_disk) >= 0)
1512 /* Check that mddev and rdev have matching profiles */ 1567 return;
1513 if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { 1568 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1514 printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, 1569 blk_integrity_unregister(mddev->gendisk);
1515 disk->disk_name, rdev->bdev->bd_disk->disk_name);
1516 printk(KERN_NOTICE "Disabling data integrity on %s\n",
1517 disk->disk_name);
1518 blk_integrity_unregister(disk);
1519 }
1520} 1570}
1571EXPORT_SYMBOL(md_integrity_add_rdev);
1521 1572
1522static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1573static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1523{ 1574{
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1591 /* May as well allow recovery to be retried once */ 1642 /* May as well allow recovery to be retried once */
1592 mddev->recovery_disabled = 0; 1643 mddev->recovery_disabled = 0;
1593 1644
1594 md_integrity_check(rdev, mddev);
1595 return 0; 1645 return 0;
1596 1646
1597 fail: 1647 fail:
@@ -1925,17 +1975,14 @@ repeat:
1925 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1926 mddev->events ++; 1976 mddev->events ++;
1927 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1928 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1929 if ((mddev->events&1)==0) { 1979 * to spares. */
1930 mddev->events++; 1980 if ((mddev->events&1)==0)
1931 nospares = 0; 1981 nospares = 0;
1932 }
1933 } else { 1982 } else {
1934 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1935 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1936 mddev->events++;
1937 nospares = 0; 1985 nospares = 0;
1938 }
1939 } 1986 }
1940 } 1987 }
1941 1988
@@ -2657,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2657 ssize_t rv = len; 2704 ssize_t rv = len;
2658 struct mdk_personality *pers; 2705 struct mdk_personality *pers;
2659 void *priv; 2706 void *priv;
2707 mdk_rdev_t *rdev;
2660 2708
2661 if (mddev->pers == NULL) { 2709 if (mddev->pers == NULL) {
2662 if (len == 0) 2710 if (len == 0)
@@ -2736,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2736 mddev_suspend(mddev); 2784 mddev_suspend(mddev);
2737 mddev->pers->stop(mddev); 2785 mddev->pers->stop(mddev);
2738 module_put(mddev->pers->owner); 2786 module_put(mddev->pers->owner);
2787 /* Invalidate devices that are now superfluous */
2788 list_for_each_entry(rdev, &mddev->disks, same_set)
2789 if (rdev->raid_disk >= mddev->raid_disks) {
2790 rdev->raid_disk = -1;
2791 clear_bit(In_sync, &rdev->flags);
2792 }
2739 mddev->pers = pers; 2793 mddev->pers = pers;
2740 mddev->private = priv; 2794 mddev->private = priv;
2741 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 2795 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@@ -3545,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3545 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3546 return -EINVAL; 3600 return -EINVAL;
3547 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3548 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3549 return -EBUSY; 3604 return -EBUSY;
3550 3605
@@ -3685,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
3685 3740
3686 mddev->array_sectors = sectors; 3741 mddev->array_sectors = sectors;
3687 set_capacity(mddev->gendisk, mddev->array_sectors); 3742 set_capacity(mddev->gendisk, mddev->array_sectors);
3688 if (mddev->pers) { 3743 if (mddev->pers)
3689 struct block_device *bdev = bdget_disk(mddev->gendisk, 0); 3744 revalidate_disk(mddev->gendisk);
3690
3691 if (bdev) {
3692 mutex_lock(&bdev->bd_inode->i_mutex);
3693 i_size_write(bdev->bd_inode,
3694 (loff_t)mddev->array_sectors << 9);
3695 mutex_unlock(&bdev->bd_inode->i_mutex);
3696 bdput(bdev);
3697 }
3698 }
3699 3745
3700 return len; 3746 return len;
3701} 3747}
@@ -4048,10 +4094,6 @@ static int do_md_run(mddev_t * mddev)
4048 } 4094 }
4049 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4095 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4050 4096
4051 if (pers->level >= 4 && pers->level <= 6)
4052 /* Cannot support integrity (yet) */
4053 blk_integrity_unregister(mddev->gendisk);
4054
4055 if (mddev->reshape_position != MaxSector && 4097 if (mddev->reshape_position != MaxSector &&
4056 pers->start_reshape == NULL) { 4098 pers->start_reshape == NULL) {
4057 /* This personality cannot handle reshaping... */ 4099 /* This personality cannot handle reshaping... */
@@ -4189,6 +4231,7 @@ static int do_md_run(mddev_t * mddev)
4189 md_wakeup_thread(mddev->thread); 4231 md_wakeup_thread(mddev->thread);
4190 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4232 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4191 4233
4234 revalidate_disk(mddev->gendisk);
4192 mddev->changed = 1; 4235 mddev->changed = 1;
4193 md_new_event(mddev); 4236 md_new_event(mddev);
4194 sysfs_notify_dirent(mddev->sysfs_state); 4237 sysfs_notify_dirent(mddev->sysfs_state);
@@ -4260,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4260 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4261 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4262 4305
4306 mutex_lock(&mddev->open_mutex);
4263 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4264 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4265 return -EBUSY; 4309 err = -EBUSY;
4266 } 4310 } else if (mddev->pers) {
4267
4268 if (mddev->pers) {
4269 4311
4270 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4271 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4323,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4323 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4324 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4325 } 4367 }
4326 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4327 /* 4372 /*
4328 * Free resources if final stop 4373 * Free resources if final stop
4329 */ 4374 */
@@ -4389,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4389 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4390 md_new_event(mddev); 4435 md_new_event(mddev);
4391 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4392out:
4393 return err; 4437 return err;
4394} 4438}
4395 4439
@@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
5087 return -ENOSPC; 5131 return -ENOSPC;
5088 } 5132 }
5089 rv = mddev->pers->resize(mddev, num_sectors); 5133 rv = mddev->pers->resize(mddev, num_sectors);
5090 if (!rv) { 5134 if (!rv)
5091 struct block_device *bdev; 5135 revalidate_disk(mddev->gendisk);
5092
5093 bdev = bdget_disk(mddev->gendisk, 0);
5094 if (bdev) {
5095 mutex_lock(&bdev->bd_inode->i_mutex);
5096 i_size_write(bdev->bd_inode,
5097 (loff_t)mddev->array_sectors << 9);
5098 mutex_unlock(&bdev->bd_inode->i_mutex);
5099 bdput(bdev);
5100 }
5101 }
5102 return rv; 5136 return rv;
5103} 5137}
5104 5138
@@ -5484,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5484 } 5518 }
5485 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5486 5520
5487 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5488 goto out; 5522 goto out;
5489 5523
5490 err = 0; 5524 err = 0;
5491 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5492 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5493 5527
5494 check_disk_change(bdev); 5528 check_disk_change(bdev);
5495 out: 5529 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 9430a110db93..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
@@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev);
431extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 441extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
432extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); 442extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
433extern int md_check_no_bitmap(mddev_t *mddev); 443extern int md_check_no_bitmap(mddev_t *mddev);
444extern int md_integrity_register(mddev_t *mddev);
445void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
434 446
435#endif /* _MD_MD_H */ 447#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 237fe3fd235c..7140909f6662 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
313 set_bit(In_sync, &rdev->flags); 313 set_bit(In_sync, &rdev->flags);
314 rcu_assign_pointer(p->rdev, rdev); 314 rcu_assign_pointer(p->rdev, rdev);
315 err = 0; 315 err = 0;
316 md_integrity_add_rdev(rdev, mddev);
316 break; 317 break;
317 } 318 }
318 319
@@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
345 /* lost the race, try later */ 346 /* lost the race, try later */
346 err = -EBUSY; 347 err = -EBUSY;
347 p->rdev = rdev; 348 p->rdev = rdev;
349 goto abort;
348 } 350 }
351 md_integrity_register(mddev);
349 } 352 }
350abort: 353abort:
351 354
@@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev)
519 mddev->queue->unplug_fn = multipath_unplug; 522 mddev->queue->unplug_fn = multipath_unplug;
520 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 523 mddev->queue->backing_dev_info.congested_fn = multipath_congested;
521 mddev->queue->backing_dev_info.congested_data = mddev; 524 mddev->queue->backing_dev_info.congested_data = mddev;
522 525 md_integrity_register(mddev);
523 return 0; 526 return 0;
524 527
525out_free_conf: 528out_free_conf:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 335f490dcad6..898e2bdfee47 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev)
351 351
352 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); 352 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
353 dump_zones(mddev); 353 dump_zones(mddev);
354 md_integrity_register(mddev);
354 return 0; 355 return 0;
355} 356}
356 357
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0569efba0c02..8726fd7ebce5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1144 rcu_assign_pointer(p->rdev, rdev); 1144 rcu_assign_pointer(p->rdev, rdev);
1145 break; 1145 break;
1146 } 1146 }
1147 1147 md_integrity_add_rdev(rdev, mddev);
1148 print_conf(conf); 1148 print_conf(conf);
1149 return err; 1149 return err;
1150} 1150}
@@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
1178 /* lost the race, try later */ 1178 /* lost the race, try later */
1179 err = -EBUSY; 1179 err = -EBUSY;
1180 p->rdev = rdev; 1180 p->rdev = rdev;
1181 goto abort;
1181 } 1182 }
1183 md_integrity_register(mddev);
1182 } 1184 }
1183abort: 1185abort:
1184 1186
@@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev)
2067 mddev->queue->unplug_fn = raid1_unplug; 2069 mddev->queue->unplug_fn = raid1_unplug;
2068 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2070 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2069 mddev->queue->backing_dev_info.congested_data = mddev; 2071 mddev->queue->backing_dev_info.congested_data = mddev;
2070 2072 md_integrity_register(mddev);
2071 return 0; 2073 return 0;
2072 2074
2073out_no_mem: 2075out_no_mem:
@@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2132 return -EINVAL; 2134 return -EINVAL;
2133 set_capacity(mddev->gendisk, mddev->array_sectors); 2135 set_capacity(mddev->gendisk, mddev->array_sectors);
2134 mddev->changed = 1; 2136 mddev->changed = 1;
2137 revalidate_disk(mddev->gendisk);
2135 if (sectors > mddev->dev_sectors && 2138 if (sectors > mddev->dev_sectors &&
2136 mddev->recovery_cp == MaxSector) { 2139 mddev->recovery_cp == MaxSector) {
2137 mddev->recovery_cp = mddev->dev_sectors; 2140 mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7298a5e5a183..3d9020cf6f6e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1170 break; 1170 break;
1171 } 1171 }
1172 1172
1173 md_integrity_add_rdev(rdev, mddev);
1173 print_conf(conf); 1174 print_conf(conf);
1174 return err; 1175 return err;
1175} 1176}
@@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
1203 /* lost the race, try later */ 1204 /* lost the race, try later */
1204 err = -EBUSY; 1205 err = -EBUSY;
1205 p->rdev = rdev; 1206 p->rdev = rdev;
1207 goto abort;
1206 } 1208 }
1209 md_integrity_register(mddev);
1207 } 1210 }
1208abort: 1211abort:
1209 1212
@@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev)
2225 2228
2226 if (conf->near_copies < mddev->raid_disks) 2229 if (conf->near_copies < mddev->raid_disks)
2227 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); 2230 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
2231 md_integrity_register(mddev);
2228 return 0; 2232 return 0;
2229 2233
2230out_free_conf: 2234out_free_conf:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 37835538b58e..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3999 return 0; 3999 return 0;
4000 } 4000 }
4001 4001
4002 /* Allow raid5_quiesce to complete */
4003 wait_event(conf->wait_for_overlap, conf->quiesce != 2);
4004
4002 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 4005 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4003 return reshape_request(mddev, sector_nr, skipped); 4006 return reshape_request(mddev, sector_nr, skipped);
4004 4007
@@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4316 return sectors * (raid_disks - conf->max_degraded); 4319 return sectors * (raid_disks - conf->max_degraded);
4317} 4320}
4318 4321
4322static void free_conf(raid5_conf_t *conf)
4323{
4324 shrink_stripes(conf);
4325 safe_put_page(conf->spare_page);
4326 kfree(conf->disks);
4327 kfree(conf->stripe_hashtbl);
4328 kfree(conf);
4329}
4330
4319static raid5_conf_t *setup_conf(mddev_t *mddev) 4331static raid5_conf_t *setup_conf(mddev_t *mddev)
4320{ 4332{
4321 raid5_conf_t *conf; 4333 raid5_conf_t *conf;
@@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4447 4459
4448 abort: 4460 abort:
4449 if (conf) { 4461 if (conf) {
4450 shrink_stripes(conf); 4462 free_conf(conf);
4451 safe_put_page(conf->spare_page);
4452 kfree(conf->disks);
4453 kfree(conf->stripe_hashtbl);
4454 kfree(conf);
4455 return ERR_PTR(-EIO); 4463 return ERR_PTR(-EIO);
4456 } else 4464 } else
4457 return ERR_PTR(-ENOMEM); 4465 return ERR_PTR(-ENOMEM);
@@ -4501,7 +4509,26 @@ static int run(mddev_t *mddev)
4501 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4502 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4503 * from */ 4511 * from */
4504 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4505 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4506 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4507 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -4629,12 +4656,8 @@ abort:
4629 md_unregister_thread(mddev->thread); 4656 md_unregister_thread(mddev->thread);
4630 mddev->thread = NULL; 4657 mddev->thread = NULL;
4631 if (conf) { 4658 if (conf) {
4632 shrink_stripes(conf);
4633 print_raid5_conf(conf); 4659 print_raid5_conf(conf);
4634 safe_put_page(conf->spare_page); 4660 free_conf(conf);
4635 kfree(conf->disks);
4636 kfree(conf->stripe_hashtbl);
4637 kfree(conf);
4638 } 4661 }
4639 mddev->private = NULL; 4662 mddev->private = NULL;
4640 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); 4663 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@@ -4649,13 +4672,10 @@ static int stop(mddev_t *mddev)
4649 4672
4650 md_unregister_thread(mddev->thread); 4673 md_unregister_thread(mddev->thread);
4651 mddev->thread = NULL; 4674 mddev->thread = NULL;
4652 shrink_stripes(conf);
4653 kfree(conf->stripe_hashtbl);
4654 mddev->queue->backing_dev_info.congested_fn = NULL; 4675 mddev->queue->backing_dev_info.congested_fn = NULL;
4655 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 4676 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
4656 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); 4677 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
4657 kfree(conf->disks); 4678 free_conf(conf);
4658 kfree(conf);
4659 mddev->private = NULL; 4679 mddev->private = NULL;
4660 return 0; 4680 return 0;
4661} 4681}
@@ -4857,6 +4877,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
4857 return -EINVAL; 4877 return -EINVAL;
4858 set_capacity(mddev->gendisk, mddev->array_sectors); 4878 set_capacity(mddev->gendisk, mddev->array_sectors);
4859 mddev->changed = 1; 4879 mddev->changed = 1;
4880 revalidate_disk(mddev->gendisk);
4860 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { 4881 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
4861 mddev->recovery_cp = mddev->dev_sectors; 4882 mddev->recovery_cp = mddev->dev_sectors;
4862 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4883 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5002,7 +5023,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5002 spin_unlock_irqrestore(&conf->device_lock, flags); 5023 spin_unlock_irqrestore(&conf->device_lock, flags);
5003 } 5024 }
5004 mddev->raid_disks = conf->raid_disks; 5025 mddev->raid_disks = conf->raid_disks;
5005 mddev->reshape_position = 0; 5026 mddev->reshape_position = conf->reshape_progress;
5006 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5027 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5007 5028
5008 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 5029 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -5057,7 +5078,6 @@ static void end_reshape(raid5_conf_t *conf)
5057 */ 5078 */
5058static void raid5_finish_reshape(mddev_t *mddev) 5079static void raid5_finish_reshape(mddev_t *mddev)
5059{ 5080{
5060 struct block_device *bdev;
5061 raid5_conf_t *conf = mddev->private; 5081 raid5_conf_t *conf = mddev->private;
5062 5082
5063 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5083 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5066,15 +5086,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
5066 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5086 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5067 set_capacity(mddev->gendisk, mddev->array_sectors); 5087 set_capacity(mddev->gendisk, mddev->array_sectors);
5068 mddev->changed = 1; 5088 mddev->changed = 1;
5069 5089 revalidate_disk(mddev->gendisk);
5070 bdev = bdget_disk(mddev->gendisk, 0);
5071 if (bdev) {
5072 mutex_lock(&bdev->bd_inode->i_mutex);
5073 i_size_write(bdev->bd_inode,
5074 (loff_t)mddev->array_sectors << 9);
5075 mutex_unlock(&bdev->bd_inode->i_mutex);
5076 bdput(bdev);
5077 }
5078 } else { 5090 } else {
5079 int d; 5091 int d;
5080 mddev->degraded = conf->raid_disks; 5092 mddev->degraded = conf->raid_disks;
@@ -5085,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5085 mddev->degraded--; 5097 mddev->degraded--;
5086 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5087 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5088 d++) 5100 d++) {
5089 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5090 } 5109 }
5091 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5092 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;
@@ -5106,12 +5125,18 @@ static void raid5_quiesce(mddev_t *mddev, int state)
5106 5125
5107 case 1: /* stop all writes */ 5126 case 1: /* stop all writes */
5108 spin_lock_irq(&conf->device_lock); 5127 spin_lock_irq(&conf->device_lock);
5109 conf->quiesce = 1; 5128 /* '2' tells resync/reshape to pause so that all
5129 * active stripes can drain
5130 */
5131 conf->quiesce = 2;
5110 wait_event_lock_irq(conf->wait_for_stripe, 5132 wait_event_lock_irq(conf->wait_for_stripe,
5111 atomic_read(&conf->active_stripes) == 0 && 5133 atomic_read(&conf->active_stripes) == 0 &&
5112 atomic_read(&conf->active_aligned_reads) == 0, 5134 atomic_read(&conf->active_aligned_reads) == 0,
5113 conf->device_lock, /* nothing */); 5135 conf->device_lock, /* nothing */);
5136 conf->quiesce = 1;
5114 spin_unlock_irq(&conf->device_lock); 5137 spin_unlock_irq(&conf->device_lock);
5138 /* allow reshape to continue */
5139 wake_up(&conf->wait_for_overlap);
5115 break; 5140 break;
5116 5141
5117 case 0: /* re-enable writes */ 5142 case 0: /* re-enable writes */