diff options
author | Paul Mundt <lethal@linux-sh.org> | 2009-08-15 00:00:02 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2009-08-15 00:00:02 -0400 |
commit | 4b6b987969b076298485697bfb0d0e35502642a3 (patch) | |
tree | a8f5ebd6a0b9efbe30272012d759669b0c5ddc13 /drivers/md | |
parent | df47cd096c8f54a5242e3a2ffb4525c804567eda (diff) | |
parent | 60e0a4c7adc700f2d2929cdb2d0055e519a3eb3d (diff) |
Merge branch 'master' into sh/hwblk
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/linear.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 176 | ||||
-rw-r--r-- | drivers/md/md.h | 12 | ||||
-rw-r--r-- | drivers/md/multipath.c | 5 | ||||
-rw-r--r-- | drivers/md/raid0.c | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | drivers/md/raid10.c | 4 | ||||
-rw-r--r-- | drivers/md/raid5.c | 85 |
8 files changed, 188 insertions, 104 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5810fa906af0..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) | |||
220 | mddev->queue->unplug_fn = linear_unplug; | 220 | mddev->queue->unplug_fn = linear_unplug; |
221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; |
222 | mddev->queue->backing_dev_info.congested_data = mddev; | 222 | mddev->queue->backing_dev_info.congested_data = mddev; |
223 | md_integrity_register(mddev); | ||
223 | return 0; | 224 | return 0; |
224 | } | 225 | } |
225 | 226 | ||
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
256 | rcu_assign_pointer(mddev->private, newconf); | 257 | rcu_assign_pointer(mddev->private, newconf); |
257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 258 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
258 | set_capacity(mddev->gendisk, mddev->array_sectors); | 259 | set_capacity(mddev->gendisk, mddev->array_sectors); |
260 | revalidate_disk(mddev->gendisk); | ||
259 | call_rcu(&oldconf->rcu, free_conf); | 261 | call_rcu(&oldconf->rcu, free_conf); |
260 | return 0; | 262 | return 0; |
261 | } | 263 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index d4351ff0849f..103f2d33fa89 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
359 | else | 359 | else |
360 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
361 | 361 | ||
362 | mutex_init(&new->open_mutex); | ||
362 | mutex_init(&new->reconfig_mutex); | 363 | mutex_init(&new->reconfig_mutex); |
363 | INIT_LIST_HEAD(&new->disks); | 364 | INIT_LIST_HEAD(&new->disks); |
364 | INIT_LIST_HEAD(&new->all_mddevs); | 365 | INIT_LIST_HEAD(&new->all_mddevs); |
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1308 | } | 1309 | } |
1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1310 | if (mddev->level != LEVEL_MULTIPATH) { |
1310 | int role; | 1311 | int role; |
1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1312 | if (rdev->desc_nr < 0 || |
1313 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
1314 | role = 0xffff; | ||
1315 | rdev->desc_nr = -1; | ||
1316 | } else | ||
1317 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
1312 | switch(role) { | 1318 | switch(role) { |
1313 | case 0xffff: /* spare */ | 1319 | case 0xffff: /* spare */ |
1314 | break; | 1320 | break; |
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1394 | if (rdev2->desc_nr+1 > max_dev) | 1400 | if (rdev2->desc_nr+1 > max_dev) |
1395 | max_dev = rdev2->desc_nr+1; | 1401 | max_dev = rdev2->desc_nr+1; |
1396 | 1402 | ||
1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1403 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
1404 | int bmask; | ||
1398 | sb->max_dev = cpu_to_le32(max_dev); | 1405 | sb->max_dev = cpu_to_le32(max_dev); |
1406 | rdev->sb_size = max_dev * 2 + 256; | ||
1407 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
1408 | if (rdev->sb_size & bmask) | ||
1409 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
1410 | } | ||
1399 | for (i=0; i<max_dev;i++) | 1411 | for (i=0; i<max_dev;i++) |
1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1412 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1401 | 1413 | ||
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1487 | 1499 | ||
1488 | static LIST_HEAD(pending_raid_disks); | 1500 | static LIST_HEAD(pending_raid_disks); |
1489 | 1501 | ||
1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1502 | /* |
1503 | * Try to register data integrity profile for an mddev | ||
1504 | * | ||
1505 | * This is called when an array is started and after a disk has been kicked | ||
1506 | * from the array. It only succeeds if all working and active component devices | ||
1507 | * are integrity capable with matching profiles. | ||
1508 | */ | ||
1509 | int md_integrity_register(mddev_t *mddev) | ||
1510 | { | ||
1511 | mdk_rdev_t *rdev, *reference = NULL; | ||
1512 | |||
1513 | if (list_empty(&mddev->disks)) | ||
1514 | return 0; /* nothing to do */ | ||
1515 | if (blk_get_integrity(mddev->gendisk)) | ||
1516 | return 0; /* already registered */ | ||
1517 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
1518 | /* skip spares and non-functional disks */ | ||
1519 | if (test_bit(Faulty, &rdev->flags)) | ||
1520 | continue; | ||
1521 | if (rdev->raid_disk < 0) | ||
1522 | continue; | ||
1523 | /* | ||
1524 | * If at least one rdev is not integrity capable, we can not | ||
1525 | * enable data integrity for the md device. | ||
1526 | */ | ||
1527 | if (!bdev_get_integrity(rdev->bdev)) | ||
1528 | return -EINVAL; | ||
1529 | if (!reference) { | ||
1530 | /* Use the first rdev as the reference */ | ||
1531 | reference = rdev; | ||
1532 | continue; | ||
1533 | } | ||
1534 | /* does this rdev's profile match the reference profile? */ | ||
1535 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
1536 | rdev->bdev->bd_disk) < 0) | ||
1537 | return -EINVAL; | ||
1538 | } | ||
1539 | /* | ||
1540 | * All component devices are integrity capable and have matching | ||
1541 | * profiles, register the common profile for the md device. | ||
1542 | */ | ||
1543 | if (blk_integrity_register(mddev->gendisk, | ||
1544 | bdev_get_integrity(reference->bdev)) != 0) { | ||
1545 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
1546 | mdname(mddev)); | ||
1547 | return -EINVAL; | ||
1548 | } | ||
1549 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
1550 | mdname(mddev)); | ||
1551 | return 0; | ||
1552 | } | ||
1553 | EXPORT_SYMBOL(md_integrity_register); | ||
1554 | |||
1555 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
1556 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1491 | { | 1557 | { |
1492 | struct mdk_personality *pers = mddev->pers; | ||
1493 | struct gendisk *disk = mddev->gendisk; | ||
1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1558 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1559 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
1496 | 1560 | ||
1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1561 | if (!bi_mddev) /* nothing to do */ |
1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1499 | return; | 1562 | return; |
1500 | 1563 | if (rdev->raid_disk < 0) /* skip spares */ | |
1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
1502 | if (!bi_mddev && bi_rdev) { | ||
1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1505 | __func__, disk->disk_name); | ||
1506 | else | ||
1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1508 | disk->disk_name); | ||
1509 | return; | 1564 | return; |
1510 | } | 1565 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
1511 | 1566 | rdev->bdev->bd_disk) >= 0) | |
1512 | /* Check that mddev and rdev have matching profiles */ | 1567 | return; |
1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1568 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1569 | blk_integrity_unregister(mddev->gendisk); |
1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1517 | disk->disk_name); | ||
1518 | blk_integrity_unregister(disk); | ||
1519 | } | ||
1520 | } | 1570 | } |
1571 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
1521 | 1572 | ||
1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1573 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1523 | { | 1574 | { |
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1591 | /* May as well allow recovery to be retried once */ | 1642 | /* May as well allow recovery to be retried once */ |
1592 | mddev->recovery_disabled = 0; | 1643 | mddev->recovery_disabled = 0; |
1593 | 1644 | ||
1594 | md_integrity_check(rdev, mddev); | ||
1595 | return 0; | 1645 | return 0; |
1596 | 1646 | ||
1597 | fail: | 1647 | fail: |
@@ -1925,17 +1975,14 @@ repeat: | |||
1925 | /* otherwise we have to go forward and ... */ | 1975 | /* otherwise we have to go forward and ... */ |
1926 | mddev->events ++; | 1976 | mddev->events ++; |
1927 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | 1977 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ |
1928 | /* .. if the array isn't clean, insist on an odd 'events' */ | 1978 | /* .. if the array isn't clean, an 'even' event must also go |
1929 | if ((mddev->events&1)==0) { | 1979 | * to spares. */ |
1930 | mddev->events++; | 1980 | if ((mddev->events&1)==0) |
1931 | nospares = 0; | 1981 | nospares = 0; |
1932 | } | ||
1933 | } else { | 1982 | } else { |
1934 | /* otherwise insist on an even 'events' (for clean states) */ | 1983 | /* otherwise an 'odd' event must go to spares */ |
1935 | if ((mddev->events&1)) { | 1984 | if ((mddev->events&1)) |
1936 | mddev->events++; | ||
1937 | nospares = 0; | 1985 | nospares = 0; |
1938 | } | ||
1939 | } | 1986 | } |
1940 | } | 1987 | } |
1941 | 1988 | ||
@@ -2657,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2657 | ssize_t rv = len; | 2704 | ssize_t rv = len; |
2658 | struct mdk_personality *pers; | 2705 | struct mdk_personality *pers; |
2659 | void *priv; | 2706 | void *priv; |
2707 | mdk_rdev_t *rdev; | ||
2660 | 2708 | ||
2661 | if (mddev->pers == NULL) { | 2709 | if (mddev->pers == NULL) { |
2662 | if (len == 0) | 2710 | if (len == 0) |
@@ -2736,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2736 | mddev_suspend(mddev); | 2784 | mddev_suspend(mddev); |
2737 | mddev->pers->stop(mddev); | 2785 | mddev->pers->stop(mddev); |
2738 | module_put(mddev->pers->owner); | 2786 | module_put(mddev->pers->owner); |
2787 | /* Invalidate devices that are now superfluous */ | ||
2788 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
2789 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
2790 | rdev->raid_disk = -1; | ||
2791 | clear_bit(In_sync, &rdev->flags); | ||
2792 | } | ||
2739 | mddev->pers = pers; | 2793 | mddev->pers = pers; |
2740 | mddev->private = priv; | 2794 | mddev->private = priv; |
2741 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2795 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
@@ -3545,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
3545 | if (max < mddev->resync_min) | 3599 | if (max < mddev->resync_min) |
3546 | return -EINVAL; | 3600 | return -EINVAL; |
3547 | if (max < mddev->resync_max && | 3601 | if (max < mddev->resync_max && |
3602 | mddev->ro == 0 && | ||
3548 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3603 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
3549 | return -EBUSY; | 3604 | return -EBUSY; |
3550 | 3605 | ||
@@ -3685,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
3685 | 3740 | ||
3686 | mddev->array_sectors = sectors; | 3741 | mddev->array_sectors = sectors; |
3687 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3742 | set_capacity(mddev->gendisk, mddev->array_sectors); |
3688 | if (mddev->pers) { | 3743 | if (mddev->pers) |
3689 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3744 | revalidate_disk(mddev->gendisk); |
3690 | |||
3691 | if (bdev) { | ||
3692 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3693 | i_size_write(bdev->bd_inode, | ||
3694 | (loff_t)mddev->array_sectors << 9); | ||
3695 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3696 | bdput(bdev); | ||
3697 | } | ||
3698 | } | ||
3699 | 3745 | ||
3700 | return len; | 3746 | return len; |
3701 | } | 3747 | } |
@@ -4048,10 +4094,6 @@ static int do_md_run(mddev_t * mddev) | |||
4048 | } | 4094 | } |
4049 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4095 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
4050 | 4096 | ||
4051 | if (pers->level >= 4 && pers->level <= 6) | ||
4052 | /* Cannot support integrity (yet) */ | ||
4053 | blk_integrity_unregister(mddev->gendisk); | ||
4054 | |||
4055 | if (mddev->reshape_position != MaxSector && | 4097 | if (mddev->reshape_position != MaxSector && |
4056 | pers->start_reshape == NULL) { | 4098 | pers->start_reshape == NULL) { |
4057 | /* This personality cannot handle reshaping... */ | 4099 | /* This personality cannot handle reshaping... */ |
@@ -4189,6 +4231,7 @@ static int do_md_run(mddev_t * mddev) | |||
4189 | md_wakeup_thread(mddev->thread); | 4231 | md_wakeup_thread(mddev->thread); |
4190 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4232 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4191 | 4233 | ||
4234 | revalidate_disk(mddev->gendisk); | ||
4192 | mddev->changed = 1; | 4235 | mddev->changed = 1; |
4193 | md_new_event(mddev); | 4236 | md_new_event(mddev); |
4194 | sysfs_notify_dirent(mddev->sysfs_state); | 4237 | sysfs_notify_dirent(mddev->sysfs_state); |
@@ -4260,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4260 | struct gendisk *disk = mddev->gendisk; | 4303 | struct gendisk *disk = mddev->gendisk; |
4261 | mdk_rdev_t *rdev; | 4304 | mdk_rdev_t *rdev; |
4262 | 4305 | ||
4306 | mutex_lock(&mddev->open_mutex); | ||
4263 | if (atomic_read(&mddev->openers) > is_open) { | 4307 | if (atomic_read(&mddev->openers) > is_open) { |
4264 | printk("md: %s still in use.\n",mdname(mddev)); | 4308 | printk("md: %s still in use.\n",mdname(mddev)); |
4265 | return -EBUSY; | 4309 | err = -EBUSY; |
4266 | } | 4310 | } else if (mddev->pers) { |
4267 | |||
4268 | if (mddev->pers) { | ||
4269 | 4311 | ||
4270 | if (mddev->sync_thread) { | 4312 | if (mddev->sync_thread) { |
4271 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4313 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4323,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4323 | set_disk_ro(disk, 1); | 4365 | set_disk_ro(disk, 1); |
4324 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4325 | } | 4367 | } |
4326 | 4368 | out: | |
4369 | mutex_unlock(&mddev->open_mutex); | ||
4370 | if (err) | ||
4371 | return err; | ||
4327 | /* | 4372 | /* |
4328 | * Free resources if final stop | 4373 | * Free resources if final stop |
4329 | */ | 4374 | */ |
@@ -4389,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4389 | blk_integrity_unregister(disk); | 4434 | blk_integrity_unregister(disk); |
4390 | md_new_event(mddev); | 4435 | md_new_event(mddev); |
4391 | sysfs_notify_dirent(mddev->sysfs_state); | 4436 | sysfs_notify_dirent(mddev->sysfs_state); |
4392 | out: | ||
4393 | return err; | 4437 | return err; |
4394 | } | 4438 | } |
4395 | 4439 | ||
@@ -5087,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
5087 | return -ENOSPC; | 5131 | return -ENOSPC; |
5088 | } | 5132 | } |
5089 | rv = mddev->pers->resize(mddev, num_sectors); | 5133 | rv = mddev->pers->resize(mddev, num_sectors); |
5090 | if (!rv) { | 5134 | if (!rv) |
5091 | struct block_device *bdev; | 5135 | revalidate_disk(mddev->gendisk); |
5092 | |||
5093 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5094 | if (bdev) { | ||
5095 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5096 | i_size_write(bdev->bd_inode, | ||
5097 | (loff_t)mddev->array_sectors << 9); | ||
5098 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5099 | bdput(bdev); | ||
5100 | } | ||
5101 | } | ||
5102 | return rv; | 5136 | return rv; |
5103 | } | 5137 | } |
5104 | 5138 | ||
@@ -5484,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5484 | } | 5518 | } |
5485 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5519 | BUG_ON(mddev != bdev->bd_disk->private_data); |
5486 | 5520 | ||
5487 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5521 | if ((err = mutex_lock_interruptible(&mddev->open_mutex))) |
5488 | goto out; | 5522 | goto out; |
5489 | 5523 | ||
5490 | err = 0; | 5524 | err = 0; |
5491 | atomic_inc(&mddev->openers); | 5525 | atomic_inc(&mddev->openers); |
5492 | mddev_unlock(mddev); | 5526 | mutex_unlock(&mddev->open_mutex); |
5493 | 5527 | ||
5494 | check_disk_change(bdev); | 5528 | check_disk_change(bdev); |
5495 | out: | 5529 | out: |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -223,6 +223,16 @@ struct mddev_s | |||
223 | * so we don't loop trying */ | 223 | * so we don't loop trying */ |
224 | 224 | ||
225 | int in_sync; /* know to not need resync */ | 225 | int in_sync; /* know to not need resync */ |
226 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so | ||
227 | * that we are never stopping an array while it is open. | ||
228 | * 'reconfig_mutex' protects all other reconfiguration. | ||
229 | * These locks are separate due to conflicting interactions | ||
230 | * with bdev->bd_mutex. | ||
231 | * Lock ordering is: | ||
232 | * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk | ||
233 | * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open | ||
234 | */ | ||
235 | struct mutex open_mutex; | ||
226 | struct mutex reconfig_mutex; | 236 | struct mutex reconfig_mutex; |
227 | atomic_t active; /* general refcount */ | 237 | atomic_t active; /* general refcount */ |
228 | atomic_t openers; /* number of active opens */ | 238 | atomic_t openers; /* number of active opens */ |
@@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev); | |||
431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 441 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 442 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
433 | extern int md_check_no_bitmap(mddev_t *mddev); | 443 | extern int md_check_no_bitmap(mddev_t *mddev); |
444 | extern int md_integrity_register(mddev_t *mddev); | ||
445 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
434 | 446 | ||
435 | #endif /* _MD_MD_H */ | 447 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 237fe3fd235c..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -313,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
313 | set_bit(In_sync, &rdev->flags); | 313 | set_bit(In_sync, &rdev->flags); |
314 | rcu_assign_pointer(p->rdev, rdev); | 314 | rcu_assign_pointer(p->rdev, rdev); |
315 | err = 0; | 315 | err = 0; |
316 | md_integrity_add_rdev(rdev, mddev); | ||
316 | break; | 317 | break; |
317 | } | 318 | } |
318 | 319 | ||
@@ -345,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
345 | /* lost the race, try later */ | 346 | /* lost the race, try later */ |
346 | err = -EBUSY; | 347 | err = -EBUSY; |
347 | p->rdev = rdev; | 348 | p->rdev = rdev; |
349 | goto abort; | ||
348 | } | 350 | } |
351 | md_integrity_register(mddev); | ||
349 | } | 352 | } |
350 | abort: | 353 | abort: |
351 | 354 | ||
@@ -519,7 +522,7 @@ static int multipath_run (mddev_t *mddev) | |||
519 | mddev->queue->unplug_fn = multipath_unplug; | 522 | mddev->queue->unplug_fn = multipath_unplug; |
520 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 523 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
521 | mddev->queue->backing_dev_info.congested_data = mddev; | 524 | mddev->queue->backing_dev_info.congested_data = mddev; |
522 | 525 | md_integrity_register(mddev); | |
523 | return 0; | 526 | return 0; |
524 | 527 | ||
525 | out_free_conf: | 528 | out_free_conf: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 335f490dcad6..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -351,6 +351,7 @@ static int raid0_run(mddev_t *mddev) | |||
351 | 351 | ||
352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
353 | dump_zones(mddev); | 353 | dump_zones(mddev); |
354 | md_integrity_register(mddev); | ||
354 | return 0; | 355 | return 0; |
355 | } | 356 | } |
356 | 357 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0569efba0c02..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1144 | rcu_assign_pointer(p->rdev, rdev); | 1144 | rcu_assign_pointer(p->rdev, rdev); |
1145 | break; | 1145 | break; |
1146 | } | 1146 | } |
1147 | 1147 | md_integrity_add_rdev(rdev, mddev); | |
1148 | print_conf(conf); | 1148 | print_conf(conf); |
1149 | return err; | 1149 | return err; |
1150 | } | 1150 | } |
@@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
1178 | /* lost the race, try later */ | 1178 | /* lost the race, try later */ |
1179 | err = -EBUSY; | 1179 | err = -EBUSY; |
1180 | p->rdev = rdev; | 1180 | p->rdev = rdev; |
1181 | goto abort; | ||
1181 | } | 1182 | } |
1183 | md_integrity_register(mddev); | ||
1182 | } | 1184 | } |
1183 | abort: | 1185 | abort: |
1184 | 1186 | ||
@@ -2067,7 +2069,7 @@ static int run(mddev_t *mddev) | |||
2067 | mddev->queue->unplug_fn = raid1_unplug; | 2069 | mddev->queue->unplug_fn = raid1_unplug; |
2068 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2070 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2069 | mddev->queue->backing_dev_info.congested_data = mddev; | 2071 | mddev->queue->backing_dev_info.congested_data = mddev; |
2070 | 2072 | md_integrity_register(mddev); | |
2071 | return 0; | 2073 | return 0; |
2072 | 2074 | ||
2073 | out_no_mem: | 2075 | out_no_mem: |
@@ -2132,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
2132 | return -EINVAL; | 2134 | return -EINVAL; |
2133 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2135 | set_capacity(mddev->gendisk, mddev->array_sectors); |
2134 | mddev->changed = 1; | 2136 | mddev->changed = 1; |
2137 | revalidate_disk(mddev->gendisk); | ||
2135 | if (sectors > mddev->dev_sectors && | 2138 | if (sectors > mddev->dev_sectors && |
2136 | mddev->recovery_cp == MaxSector) { | 2139 | mddev->recovery_cp == MaxSector) { |
2137 | mddev->recovery_cp = mddev->dev_sectors; | 2140 | mddev->recovery_cp = mddev->dev_sectors; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7298a5e5a183..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1170 | break; | 1170 | break; |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | md_integrity_add_rdev(rdev, mddev); | ||
1173 | print_conf(conf); | 1174 | print_conf(conf); |
1174 | return err; | 1175 | return err; |
1175 | } | 1176 | } |
@@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
1203 | /* lost the race, try later */ | 1204 | /* lost the race, try later */ |
1204 | err = -EBUSY; | 1205 | err = -EBUSY; |
1205 | p->rdev = rdev; | 1206 | p->rdev = rdev; |
1207 | goto abort; | ||
1206 | } | 1208 | } |
1209 | md_integrity_register(mddev); | ||
1207 | } | 1210 | } |
1208 | abort: | 1211 | abort: |
1209 | 1212 | ||
@@ -2225,6 +2228,7 @@ static int run(mddev_t *mddev) | |||
2225 | 2228 | ||
2226 | if (conf->near_copies < mddev->raid_disks) | 2229 | if (conf->near_copies < mddev->raid_disks) |
2227 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2230 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
2231 | md_integrity_register(mddev); | ||
2228 | return 0; | 2232 | return 0; |
2229 | 2233 | ||
2230 | out_free_conf: | 2234 | out_free_conf: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37835538b58e..b8a2c5dc67ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3785 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | 3785 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
3786 | sector_nr = raid5_size(mddev, 0, 0) | 3786 | sector_nr = raid5_size(mddev, 0, 0) |
3787 | - conf->reshape_progress; | 3787 | - conf->reshape_progress; |
3788 | } else if (mddev->delta_disks > 0 && | 3788 | } else if (mddev->delta_disks >= 0 && |
3789 | conf->reshape_progress > 0) | 3789 | conf->reshape_progress > 0) |
3790 | sector_nr = conf->reshape_progress; | 3790 | sector_nr = conf->reshape_progress; |
3791 | sector_div(sector_nr, new_data_disks); | 3791 | sector_div(sector_nr, new_data_disks); |
@@ -3999,6 +3999,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3999 | return 0; | 3999 | return 0; |
4000 | } | 4000 | } |
4001 | 4001 | ||
4002 | /* Allow raid5_quiesce to complete */ | ||
4003 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
4004 | |||
4002 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4005 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4003 | return reshape_request(mddev, sector_nr, skipped); | 4006 | return reshape_request(mddev, sector_nr, skipped); |
4004 | 4007 | ||
@@ -4316,6 +4319,15 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
4316 | return sectors * (raid_disks - conf->max_degraded); | 4319 | return sectors * (raid_disks - conf->max_degraded); |
4317 | } | 4320 | } |
4318 | 4321 | ||
4322 | static void free_conf(raid5_conf_t *conf) | ||
4323 | { | ||
4324 | shrink_stripes(conf); | ||
4325 | safe_put_page(conf->spare_page); | ||
4326 | kfree(conf->disks); | ||
4327 | kfree(conf->stripe_hashtbl); | ||
4328 | kfree(conf); | ||
4329 | } | ||
4330 | |||
4319 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4331 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
4320 | { | 4332 | { |
4321 | raid5_conf_t *conf; | 4333 | raid5_conf_t *conf; |
@@ -4447,11 +4459,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4447 | 4459 | ||
4448 | abort: | 4460 | abort: |
4449 | if (conf) { | 4461 | if (conf) { |
4450 | shrink_stripes(conf); | 4462 | free_conf(conf); |
4451 | safe_put_page(conf->spare_page); | ||
4452 | kfree(conf->disks); | ||
4453 | kfree(conf->stripe_hashtbl); | ||
4454 | kfree(conf); | ||
4455 | return ERR_PTR(-EIO); | 4463 | return ERR_PTR(-EIO); |
4456 | } else | 4464 | } else |
4457 | return ERR_PTR(-ENOMEM); | 4465 | return ERR_PTR(-ENOMEM); |
@@ -4501,7 +4509,26 @@ static int run(mddev_t *mddev) | |||
4501 | (old_disks-max_degraded)); | 4509 | (old_disks-max_degraded)); |
4502 | /* here_old is the first stripe that we might need to read | 4510 | /* here_old is the first stripe that we might need to read |
4503 | * from */ | 4511 | * from */ |
4504 | if (here_new >= here_old) { | 4512 | if (mddev->delta_disks == 0) { |
4513 | /* We cannot be sure it is safe to start an in-place | ||
4514 | * reshape. It is only safe if user-space if monitoring | ||
4515 | * and taking constant backups. | ||
4516 | * mdadm always starts a situation like this in | ||
4517 | * readonly mode so it can take control before | ||
4518 | * allowing any writes. So just check for that. | ||
4519 | */ | ||
4520 | if ((here_new * mddev->new_chunk_sectors != | ||
4521 | here_old * mddev->chunk_sectors) || | ||
4522 | mddev->ro == 0) { | ||
4523 | printk(KERN_ERR "raid5: in-place reshape must be started" | ||
4524 | " in read-only mode - aborting\n"); | ||
4525 | return -EINVAL; | ||
4526 | } | ||
4527 | } else if (mddev->delta_disks < 0 | ||
4528 | ? (here_new * mddev->new_chunk_sectors <= | ||
4529 | here_old * mddev->chunk_sectors) | ||
4530 | : (here_new * mddev->new_chunk_sectors >= | ||
4531 | here_old * mddev->chunk_sectors)) { | ||
4505 | /* Reading from the same stripe as writing to - bad */ | 4532 | /* Reading from the same stripe as writing to - bad */ |
4506 | printk(KERN_ERR "raid5: reshape_position too early for " | 4533 | printk(KERN_ERR "raid5: reshape_position too early for " |
4507 | "auto-recovery - aborting.\n"); | 4534 | "auto-recovery - aborting.\n"); |
@@ -4629,12 +4656,8 @@ abort: | |||
4629 | md_unregister_thread(mddev->thread); | 4656 | md_unregister_thread(mddev->thread); |
4630 | mddev->thread = NULL; | 4657 | mddev->thread = NULL; |
4631 | if (conf) { | 4658 | if (conf) { |
4632 | shrink_stripes(conf); | ||
4633 | print_raid5_conf(conf); | 4659 | print_raid5_conf(conf); |
4634 | safe_put_page(conf->spare_page); | 4660 | free_conf(conf); |
4635 | kfree(conf->disks); | ||
4636 | kfree(conf->stripe_hashtbl); | ||
4637 | kfree(conf); | ||
4638 | } | 4661 | } |
4639 | mddev->private = NULL; | 4662 | mddev->private = NULL; |
4640 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); | 4663 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); |
@@ -4649,13 +4672,10 @@ static int stop(mddev_t *mddev) | |||
4649 | 4672 | ||
4650 | md_unregister_thread(mddev->thread); | 4673 | md_unregister_thread(mddev->thread); |
4651 | mddev->thread = NULL; | 4674 | mddev->thread = NULL; |
4652 | shrink_stripes(conf); | ||
4653 | kfree(conf->stripe_hashtbl); | ||
4654 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4675 | mddev->queue->backing_dev_info.congested_fn = NULL; |
4655 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 4676 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
4656 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | 4677 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); |
4657 | kfree(conf->disks); | 4678 | free_conf(conf); |
4658 | kfree(conf); | ||
4659 | mddev->private = NULL; | 4679 | mddev->private = NULL; |
4660 | return 0; | 4680 | return 0; |
4661 | } | 4681 | } |
@@ -4857,6 +4877,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
4857 | return -EINVAL; | 4877 | return -EINVAL; |
4858 | set_capacity(mddev->gendisk, mddev->array_sectors); | 4878 | set_capacity(mddev->gendisk, mddev->array_sectors); |
4859 | mddev->changed = 1; | 4879 | mddev->changed = 1; |
4880 | revalidate_disk(mddev->gendisk); | ||
4860 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 4881 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
4861 | mddev->recovery_cp = mddev->dev_sectors; | 4882 | mddev->recovery_cp = mddev->dev_sectors; |
4862 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4883 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -5002,7 +5023,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5002 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5023 | spin_unlock_irqrestore(&conf->device_lock, flags); |
5003 | } | 5024 | } |
5004 | mddev->raid_disks = conf->raid_disks; | 5025 | mddev->raid_disks = conf->raid_disks; |
5005 | mddev->reshape_position = 0; | 5026 | mddev->reshape_position = conf->reshape_progress; |
5006 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5027 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5007 | 5028 | ||
5008 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5029 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
@@ -5057,7 +5078,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
5057 | */ | 5078 | */ |
5058 | static void raid5_finish_reshape(mddev_t *mddev) | 5079 | static void raid5_finish_reshape(mddev_t *mddev) |
5059 | { | 5080 | { |
5060 | struct block_device *bdev; | ||
5061 | raid5_conf_t *conf = mddev->private; | 5081 | raid5_conf_t *conf = mddev->private; |
5062 | 5082 | ||
5063 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5083 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -5066,15 +5086,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5066 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5086 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
5067 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5087 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5068 | mddev->changed = 1; | 5088 | mddev->changed = 1; |
5069 | 5089 | revalidate_disk(mddev->gendisk); | |
5070 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5071 | if (bdev) { | ||
5072 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5073 | i_size_write(bdev->bd_inode, | ||
5074 | (loff_t)mddev->array_sectors << 9); | ||
5075 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5076 | bdput(bdev); | ||
5077 | } | ||
5078 | } else { | 5090 | } else { |
5079 | int d; | 5091 | int d; |
5080 | mddev->degraded = conf->raid_disks; | 5092 | mddev->degraded = conf->raid_disks; |
@@ -5085,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5085 | mddev->degraded--; | 5097 | mddev->degraded--; |
5086 | for (d = conf->raid_disks ; | 5098 | for (d = conf->raid_disks ; |
5087 | d < conf->raid_disks - mddev->delta_disks; | 5099 | d < conf->raid_disks - mddev->delta_disks; |
5088 | d++) | 5100 | d++) { |
5089 | raid5_remove_disk(mddev, d); | 5101 | mdk_rdev_t *rdev = conf->disks[d].rdev; |
5102 | if (rdev && raid5_remove_disk(mddev, d) == 0) { | ||
5103 | char nm[20]; | ||
5104 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5105 | sysfs_remove_link(&mddev->kobj, nm); | ||
5106 | rdev->raid_disk = -1; | ||
5107 | } | ||
5108 | } | ||
5090 | } | 5109 | } |
5091 | mddev->layout = conf->algorithm; | 5110 | mddev->layout = conf->algorithm; |
5092 | mddev->chunk_sectors = conf->chunk_sectors; | 5111 | mddev->chunk_sectors = conf->chunk_sectors; |
@@ -5106,12 +5125,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
5106 | 5125 | ||
5107 | case 1: /* stop all writes */ | 5126 | case 1: /* stop all writes */ |
5108 | spin_lock_irq(&conf->device_lock); | 5127 | spin_lock_irq(&conf->device_lock); |
5109 | conf->quiesce = 1; | 5128 | /* '2' tells resync/reshape to pause so that all |
5129 | * active stripes can drain | ||
5130 | */ | ||
5131 | conf->quiesce = 2; | ||
5110 | wait_event_lock_irq(conf->wait_for_stripe, | 5132 | wait_event_lock_irq(conf->wait_for_stripe, |
5111 | atomic_read(&conf->active_stripes) == 0 && | 5133 | atomic_read(&conf->active_stripes) == 0 && |
5112 | atomic_read(&conf->active_aligned_reads) == 0, | 5134 | atomic_read(&conf->active_aligned_reads) == 0, |
5113 | conf->device_lock, /* nothing */); | 5135 | conf->device_lock, /* nothing */); |
5136 | conf->quiesce = 1; | ||
5114 | spin_unlock_irq(&conf->device_lock); | 5137 | spin_unlock_irq(&conf->device_lock); |
5138 | /* allow reshape to continue */ | ||
5139 | wake_up(&conf->wait_for_overlap); | ||
5115 | break; | 5140 | break; |
5116 | 5141 | ||
5117 | case 0: /* re-enable writes */ | 5142 | case 0: /* re-enable writes */ |