aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c194
1 files changed, 115 insertions, 79 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0f4a70c43ffc..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1308 } 1309 }
1309 if (mddev->level != LEVEL_MULTIPATH) { 1310 if (mddev->level != LEVEL_MULTIPATH) {
1310 int role; 1311 int role;
1311 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 1312 if (rdev->desc_nr < 0 ||
1313 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1314 role = 0xffff;
1315 rdev->desc_nr = -1;
1316 } else
1317 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1312 switch(role) { 1318 switch(role) {
1313 case 0xffff: /* spare */ 1319 case 0xffff: /* spare */
1314 break; 1320 break;
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1394 if (rdev2->desc_nr+1 > max_dev) 1400 if (rdev2->desc_nr+1 > max_dev)
1395 max_dev = rdev2->desc_nr+1; 1401 max_dev = rdev2->desc_nr+1;
1396 1402
1397 if (max_dev > le32_to_cpu(sb->max_dev)) 1403 if (max_dev > le32_to_cpu(sb->max_dev)) {
1404 int bmask;
1398 sb->max_dev = cpu_to_le32(max_dev); 1405 sb->max_dev = cpu_to_le32(max_dev);
1406 rdev->sb_size = max_dev * 2 + 256;
1407 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1408 if (rdev->sb_size & bmask)
1409 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1410 }
1399 for (i=0; i<max_dev;i++) 1411 for (i=0; i<max_dev;i++)
1400 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1412 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1401 1413
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1487 1499
1488static LIST_HEAD(pending_raid_disks); 1500static LIST_HEAD(pending_raid_disks);
1489 1501
1490static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) 1502/*
1503 * Try to register data integrity profile for an mddev
1504 *
1505 * This is called when an array is started and after a disk has been kicked
1506 * from the array. It only succeeds if all working and active component devices
1507 * are integrity capable with matching profiles.
1508 */
1509int md_integrity_register(mddev_t *mddev)
1510{
1511 mdk_rdev_t *rdev, *reference = NULL;
1512
1513 if (list_empty(&mddev->disks))
1514 return 0; /* nothing to do */
1515 if (blk_get_integrity(mddev->gendisk))
1516 return 0; /* already registered */
1517 list_for_each_entry(rdev, &mddev->disks, same_set) {
1518 /* skip spares and non-functional disks */
1519 if (test_bit(Faulty, &rdev->flags))
1520 continue;
1521 if (rdev->raid_disk < 0)
1522 continue;
1523 /*
1524 * If at least one rdev is not integrity capable, we can not
1525 * enable data integrity for the md device.
1526 */
1527 if (!bdev_get_integrity(rdev->bdev))
1528 return -EINVAL;
1529 if (!reference) {
1530 /* Use the first rdev as the reference */
1531 reference = rdev;
1532 continue;
1533 }
1534 /* does this rdev's profile match the reference profile? */
1535 if (blk_integrity_compare(reference->bdev->bd_disk,
1536 rdev->bdev->bd_disk) < 0)
1537 return -EINVAL;
1538 }
1539 /*
1540 * All component devices are integrity capable and have matching
1541 * profiles, register the common profile for the md device.
1542 */
1543 if (blk_integrity_register(mddev->gendisk,
1544 bdev_get_integrity(reference->bdev)) != 0) {
1545 printk(KERN_ERR "md: failed to register integrity for %s\n",
1546 mdname(mddev));
1547 return -EINVAL;
1548 }
1549 printk(KERN_NOTICE "md: data integrity on %s enabled\n",
1550 mdname(mddev));
1551 return 0;
1552}
1553EXPORT_SYMBOL(md_integrity_register);
1554
1555/* Disable data integrity if non-capable/non-matching disk is being added */
1556void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1491{ 1557{
1492 struct mdk_personality *pers = mddev->pers;
1493 struct gendisk *disk = mddev->gendisk;
1494 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); 1558 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1495 struct blk_integrity *bi_mddev = blk_get_integrity(disk); 1559 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1496 1560
1497 /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ 1561 if (!bi_mddev) /* nothing to do */
1498 if (pers && pers->level >= 4 && pers->level <= 6)
1499 return; 1562 return;
1500 1563 if (rdev->raid_disk < 0) /* skip spares */
1501 /* If rdev is integrity capable, register profile for mddev */
1502 if (!bi_mddev && bi_rdev) {
1503 if (blk_integrity_register(disk, bi_rdev))
1504 printk(KERN_ERR "%s: %s Could not register integrity!\n",
1505 __func__, disk->disk_name);
1506 else
1507 printk(KERN_NOTICE "Enabling data integrity on %s\n",
1508 disk->disk_name);
1509 return; 1564 return;
1510 } 1565 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1511 1566 rdev->bdev->bd_disk) >= 0)
1512 /* Check that mddev and rdev have matching profiles */ 1567 return;
1513 if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { 1568 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1514 printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, 1569 blk_integrity_unregister(mddev->gendisk);
1515 disk->disk_name, rdev->bdev->bd_disk->disk_name);
1516 printk(KERN_NOTICE "Disabling data integrity on %s\n",
1517 disk->disk_name);
1518 blk_integrity_unregister(disk);
1519 }
1520} 1570}
1571EXPORT_SYMBOL(md_integrity_add_rdev);
1521 1572
1522static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1573static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1523{ 1574{
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1591 /* May as well allow recovery to be retried once */ 1642 /* May as well allow recovery to be retried once */
1592 mddev->recovery_disabled = 0; 1643 mddev->recovery_disabled = 0;
1593 1644
1594 md_integrity_check(rdev, mddev);
1595 return 0; 1645 return 0;
1596 1646
1597 fail: 1647 fail:
@@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1756 __u8 *uuid; 1806 __u8 *uuid;
1757 1807
1758 uuid = sb->set_uuid; 1808 uuid = sb->set_uuid;
1759 printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" 1809 printk(KERN_INFO
1760 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" 1810 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
1761 KERN_INFO "md: Name: \"%s\" CT:%llu\n", 1811 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1812 "md: Name: \"%s\" CT:%llu\n",
1762 le32_to_cpu(sb->major_version), 1813 le32_to_cpu(sb->major_version),
1763 le32_to_cpu(sb->feature_map), 1814 le32_to_cpu(sb->feature_map),
1764 uuid[0], uuid[1], uuid[2], uuid[3], 1815 uuid[0], uuid[1], uuid[2], uuid[3],
@@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1770 & MD_SUPERBLOCK_1_TIME_SEC_MASK); 1821 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
1771 1822
1772 uuid = sb->device_uuid; 1823 uuid = sb->device_uuid;
1773 printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" 1824 printk(KERN_INFO
1825 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1774 " RO:%llu\n" 1826 " RO:%llu\n"
1775 KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" 1827 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
1776 ":%02x%02x%02x%02x%02x%02x\n" 1828 ":%02x%02x%02x%02x%02x%02x\n"
1777 KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" 1829 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1778 KERN_INFO "md: (MaxDev:%u) \n", 1830 "md: (MaxDev:%u) \n",
1779 le32_to_cpu(sb->level), 1831 le32_to_cpu(sb->level),
1780 (unsigned long long)le64_to_cpu(sb->size), 1832 (unsigned long long)le64_to_cpu(sb->size),
1781 le32_to_cpu(sb->raid_disks), 1833 le32_to_cpu(sb->raid_disks),
@@ -1923,17 +1975,14 @@ repeat:
1923 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1924 mddev->events ++; 1976 mddev->events ++;
1925 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1926 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1927 if ((mddev->events&1)==0) { 1979 * to spares. */
1928 mddev->events++; 1980 if ((mddev->events&1)==0)
1929 nospares = 0; 1981 nospares = 0;
1930 }
1931 } else { 1982 } else {
1932 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1933 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1934 mddev->events++;
1935 nospares = 0; 1985 nospares = 0;
1936 }
1937 } 1986 }
1938 } 1987 }
1939 1988
@@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2655 ssize_t rv = len; 2704 ssize_t rv = len;
2656 struct mdk_personality *pers; 2705 struct mdk_personality *pers;
2657 void *priv; 2706 void *priv;
2707 mdk_rdev_t *rdev;
2658 2708
2659 if (mddev->pers == NULL) { 2709 if (mddev->pers == NULL) {
2660 if (len == 0) 2710 if (len == 0)
@@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2734 mddev_suspend(mddev); 2784 mddev_suspend(mddev);
2735 mddev->pers->stop(mddev); 2785 mddev->pers->stop(mddev);
2736 module_put(mddev->pers->owner); 2786 module_put(mddev->pers->owner);
2787 /* Invalidate devices that are now superfluous */
2788 list_for_each_entry(rdev, &mddev->disks, same_set)
2789 if (rdev->raid_disk >= mddev->raid_disks) {
2790 rdev->raid_disk = -1;
2791 clear_bit(In_sync, &rdev->flags);
2792 }
2737 mddev->pers = pers; 2793 mddev->pers = pers;
2738 mddev->private = priv; 2794 mddev->private = priv;
2739 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 2795 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3543 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3544 return -EINVAL; 3600 return -EINVAL;
3545 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3546 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3547 return -EBUSY; 3604 return -EBUSY;
3548 3605
@@ -3683,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
3683 3740
3684 mddev->array_sectors = sectors; 3741 mddev->array_sectors = sectors;
3685 set_capacity(mddev->gendisk, mddev->array_sectors); 3742 set_capacity(mddev->gendisk, mddev->array_sectors);
3686 if (mddev->pers) { 3743 if (mddev->pers)
3687 struct block_device *bdev = bdget_disk(mddev->gendisk, 0); 3744 revalidate_disk(mddev->gendisk);
3688
3689 if (bdev) {
3690 mutex_lock(&bdev->bd_inode->i_mutex);
3691 i_size_write(bdev->bd_inode,
3692 (loff_t)mddev->array_sectors << 9);
3693 mutex_unlock(&bdev->bd_inode->i_mutex);
3694 bdput(bdev);
3695 }
3696 }
3697 3745
3698 return len; 3746 return len;
3699} 3747}
@@ -4046,10 +4094,6 @@ static int do_md_run(mddev_t * mddev)
4046 } 4094 }
4047 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4095 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4048 4096
4049 if (pers->level >= 4 && pers->level <= 6)
4050 /* Cannot support integrity (yet) */
4051 blk_integrity_unregister(mddev->gendisk);
4052
4053 if (mddev->reshape_position != MaxSector && 4097 if (mddev->reshape_position != MaxSector &&
4054 pers->start_reshape == NULL) { 4098 pers->start_reshape == NULL) {
4055 /* This personality cannot handle reshaping... */ 4099 /* This personality cannot handle reshaping... */
@@ -4187,6 +4231,7 @@ static int do_md_run(mddev_t * mddev)
4187 md_wakeup_thread(mddev->thread); 4231 md_wakeup_thread(mddev->thread);
4188 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4232 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4189 4233
4234 revalidate_disk(mddev->gendisk);
4190 mddev->changed = 1; 4235 mddev->changed = 1;
4191 md_new_event(mddev); 4236 md_new_event(mddev);
4192 sysfs_notify_dirent(mddev->sysfs_state); 4237 sysfs_notify_dirent(mddev->sysfs_state);
@@ -4258,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4258 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4259 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4260 4305
4306 mutex_lock(&mddev->open_mutex);
4261 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4262 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4263 return -EBUSY; 4309 err = -EBUSY;
4264 } 4310 } else if (mddev->pers) {
4265
4266 if (mddev->pers) {
4267 4311
4268 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4269 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4321,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4321 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4322 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4323 } 4367 }
4324 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4325 /* 4372 /*
4326 * Free resources if final stop 4373 * Free resources if final stop
4327 */ 4374 */
@@ -4387,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4387 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4388 md_new_event(mddev); 4435 md_new_event(mddev);
4389 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4390out:
4391 return err; 4437 return err;
4392} 4438}
4393 4439
@@ -5085,18 +5131,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
5085 return -ENOSPC; 5131 return -ENOSPC;
5086 } 5132 }
5087 rv = mddev->pers->resize(mddev, num_sectors); 5133 rv = mddev->pers->resize(mddev, num_sectors);
5088 if (!rv) { 5134 if (!rv)
5089 struct block_device *bdev; 5135 revalidate_disk(mddev->gendisk);
5090
5091 bdev = bdget_disk(mddev->gendisk, 0);
5092 if (bdev) {
5093 mutex_lock(&bdev->bd_inode->i_mutex);
5094 i_size_write(bdev->bd_inode,
5095 (loff_t)mddev->array_sectors << 9);
5096 mutex_unlock(&bdev->bd_inode->i_mutex);
5097 bdput(bdev);
5098 }
5099 }
5100 return rv; 5136 return rv;
5101} 5137}
5102 5138
@@ -5482,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5482 } 5518 }
5483 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5484 5520
5485 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5486 goto out; 5522 goto out;
5487 5523
5488 err = 0; 5524 err = 0;
5489 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5490 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5491 5527
5492 check_disk_change(bdev); 5528 check_disk_change(bdev);
5493 out: 5529 out: