diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 177 |
1 files changed, 106 insertions, 71 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index d4351ff0849f..9dd872000cec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
359 | else | 359 | else |
360 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
361 | 361 | ||
362 | mutex_init(&new->open_mutex); | ||
362 | mutex_init(&new->reconfig_mutex); | 363 | mutex_init(&new->reconfig_mutex); |
363 | INIT_LIST_HEAD(&new->disks); | 364 | INIT_LIST_HEAD(&new->disks); |
364 | INIT_LIST_HEAD(&new->all_mddevs); | 365 | INIT_LIST_HEAD(&new->all_mddevs); |
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1308 | } | 1309 | } |
1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1310 | if (mddev->level != LEVEL_MULTIPATH) { |
1310 | int role; | 1311 | int role; |
1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1312 | if (rdev->desc_nr < 0 || |
1313 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
1314 | role = 0xffff; | ||
1315 | rdev->desc_nr = -1; | ||
1316 | } else | ||
1317 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
1312 | switch(role) { | 1318 | switch(role) { |
1313 | case 0xffff: /* spare */ | 1319 | case 0xffff: /* spare */ |
1314 | break; | 1320 | break; |
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1394 | if (rdev2->desc_nr+1 > max_dev) | 1400 | if (rdev2->desc_nr+1 > max_dev) |
1395 | max_dev = rdev2->desc_nr+1; | 1401 | max_dev = rdev2->desc_nr+1; |
1396 | 1402 | ||
1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1403 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
1404 | int bmask; | ||
1398 | sb->max_dev = cpu_to_le32(max_dev); | 1405 | sb->max_dev = cpu_to_le32(max_dev); |
1406 | rdev->sb_size = max_dev * 2 + 256; | ||
1407 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
1408 | if (rdev->sb_size & bmask) | ||
1409 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
1410 | } | ||
1399 | for (i=0; i<max_dev;i++) | 1411 | for (i=0; i<max_dev;i++) |
1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1412 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1401 | 1413 | ||
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1487 | 1499 | ||
1488 | static LIST_HEAD(pending_raid_disks); | 1500 | static LIST_HEAD(pending_raid_disks); |
1489 | 1501 | ||
1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1502 | /* |
1503 | * Try to register data integrity profile for an mddev | ||
1504 | * | ||
1505 | * This is called when an array is started and after a disk has been kicked | ||
1506 | * from the array. It only succeeds if all working and active component devices | ||
1507 | * are integrity capable with matching profiles. | ||
1508 | */ | ||
1509 | int md_integrity_register(mddev_t *mddev) | ||
1510 | { | ||
1511 | mdk_rdev_t *rdev, *reference = NULL; | ||
1512 | |||
1513 | if (list_empty(&mddev->disks)) | ||
1514 | return 0; /* nothing to do */ | ||
1515 | if (blk_get_integrity(mddev->gendisk)) | ||
1516 | return 0; /* already registered */ | ||
1517 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
1518 | /* skip spares and non-functional disks */ | ||
1519 | if (test_bit(Faulty, &rdev->flags)) | ||
1520 | continue; | ||
1521 | if (rdev->raid_disk < 0) | ||
1522 | continue; | ||
1523 | /* | ||
1524 | * If at least one rdev is not integrity capable, we can not | ||
1525 | * enable data integrity for the md device. | ||
1526 | */ | ||
1527 | if (!bdev_get_integrity(rdev->bdev)) | ||
1528 | return -EINVAL; | ||
1529 | if (!reference) { | ||
1530 | /* Use the first rdev as the reference */ | ||
1531 | reference = rdev; | ||
1532 | continue; | ||
1533 | } | ||
1534 | /* does this rdev's profile match the reference profile? */ | ||
1535 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
1536 | rdev->bdev->bd_disk) < 0) | ||
1537 | return -EINVAL; | ||
1538 | } | ||
1539 | /* | ||
1540 | * All component devices are integrity capable and have matching | ||
1541 | * profiles, register the common profile for the md device. | ||
1542 | */ | ||
1543 | if (blk_integrity_register(mddev->gendisk, | ||
1544 | bdev_get_integrity(reference->bdev)) != 0) { | ||
1545 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
1546 | mdname(mddev)); | ||
1547 | return -EINVAL; | ||
1548 | } | ||
1549 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
1550 | mdname(mddev)); | ||
1551 | return 0; | ||
1552 | } | ||
1553 | EXPORT_SYMBOL(md_integrity_register); | ||
1554 | |||
1555 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
1556 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1491 | { | 1557 | { |
1492 | struct mdk_personality *pers = mddev->pers; | ||
1493 | struct gendisk *disk = mddev->gendisk; | ||
1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1558 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1559 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
1496 | 1560 | ||
1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1561 | if (!bi_mddev) /* nothing to do */ |
1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1499 | return; | 1562 | return; |
1500 | 1563 | if (rdev->raid_disk < 0) /* skip spares */ | |
1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
1502 | if (!bi_mddev && bi_rdev) { | ||
1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1505 | __func__, disk->disk_name); | ||
1506 | else | ||
1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1508 | disk->disk_name); | ||
1509 | return; | 1564 | return; |
1510 | } | 1565 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
1511 | 1566 | rdev->bdev->bd_disk) >= 0) | |
1512 | /* Check that mddev and rdev have matching profiles */ | 1567 | return; |
1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1568 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1569 | blk_integrity_unregister(mddev->gendisk); |
1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1517 | disk->disk_name); | ||
1518 | blk_integrity_unregister(disk); | ||
1519 | } | ||
1520 | } | 1570 | } |
1571 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
1521 | 1572 | ||
1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1573 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1523 | { | 1574 | { |
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1591 | /* May as well allow recovery to be retried once */ | 1642 | /* May as well allow recovery to be retried once */ |
1592 | mddev->recovery_disabled = 0; | 1643 | mddev->recovery_disabled = 0; |
1593 | 1644 | ||
1594 | md_integrity_check(rdev, mddev); | ||
1595 | return 0; | 1645 | return 0; |
1596 | 1646 | ||
1597 | fail: | 1647 | fail: |
@@ -1925,17 +1975,14 @@ repeat: | |||
1925 | /* otherwise we have to go forward and ... */ | 1975 | /* otherwise we have to go forward and ... */ |
1926 | mddev->events ++; | 1976 | mddev->events ++; |
1927 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | 1977 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ |
1928 | /* .. if the array isn't clean, insist on an odd 'events' */ | 1978 | /* .. if the array isn't clean, an 'even' event must also go |
1929 | if ((mddev->events&1)==0) { | 1979 | * to spares. */ |
1930 | mddev->events++; | 1980 | if ((mddev->events&1)==0) |
1931 | nospares = 0; | 1981 | nospares = 0; |
1932 | } | ||
1933 | } else { | 1982 | } else { |
1934 | /* otherwise insist on an even 'events' (for clean states) */ | 1983 | /* otherwise an 'odd' event must go to spares */ |
1935 | if ((mddev->events&1)) { | 1984 | if ((mddev->events&1)) |
1936 | mddev->events++; | ||
1937 | nospares = 0; | 1985 | nospares = 0; |
1938 | } | ||
1939 | } | 1986 | } |
1940 | } | 1987 | } |
1941 | 1988 | ||
@@ -2657,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2657 | ssize_t rv = len; | 2704 | ssize_t rv = len; |
2658 | struct mdk_personality *pers; | 2705 | struct mdk_personality *pers; |
2659 | void *priv; | 2706 | void *priv; |
2707 | mdk_rdev_t *rdev; | ||
2660 | 2708 | ||
2661 | if (mddev->pers == NULL) { | 2709 | if (mddev->pers == NULL) { |
2662 | if (len == 0) | 2710 | if (len == 0) |
@@ -2736,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2736 | mddev_suspend(mddev); | 2784 | mddev_suspend(mddev); |
2737 | mddev->pers->stop(mddev); | 2785 | mddev->pers->stop(mddev); |
2738 | module_put(mddev->pers->owner); | 2786 | module_put(mddev->pers->owner); |
2787 | /* Invalidate devices that are now superfluous */ | ||
2788 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
2789 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
2790 | rdev->raid_disk = -1; | ||
2791 | clear_bit(In_sync, &rdev->flags); | ||
2792 | } | ||
2739 | mddev->pers = pers; | 2793 | mddev->pers = pers; |
2740 | mddev->private = priv; | 2794 | mddev->private = priv; |
2741 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2795 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
@@ -3545,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
3545 | if (max < mddev->resync_min) | 3599 | if (max < mddev->resync_min) |
3546 | return -EINVAL; | 3600 | return -EINVAL; |
3547 | if (max < mddev->resync_max && | 3601 | if (max < mddev->resync_max && |
3602 | mddev->ro == 0 && | ||
3548 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3603 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
3549 | return -EBUSY; | 3604 | return -EBUSY; |
3550 | 3605 | ||
@@ -3685,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
3685 | 3740 | ||
3686 | mddev->array_sectors = sectors; | 3741 | mddev->array_sectors = sectors; |
3687 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3742 | set_capacity(mddev->gendisk, mddev->array_sectors); |
3688 | if (mddev->pers) { | 3743 | if (mddev->pers) |
3689 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3744 | revalidate_disk(mddev->gendisk); |
3690 | |||
3691 | if (bdev) { | ||
3692 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3693 | i_size_write(bdev->bd_inode, | ||
3694 | (loff_t)mddev->array_sectors << 9); | ||
3695 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3696 | bdput(bdev); | ||
3697 | } | ||
3698 | } | ||
3699 | 3745 | ||
3700 | return len; | 3746 | return len; |
3701 | } | 3747 | } |
@@ -4048,10 +4094,6 @@ static int do_md_run(mddev_t * mddev) | |||
4048 | } | 4094 | } |
4049 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4095 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
4050 | 4096 | ||
4051 | if (pers->level >= 4 && pers->level <= 6) | ||
4052 | /* Cannot support integrity (yet) */ | ||
4053 | blk_integrity_unregister(mddev->gendisk); | ||
4054 | |||
4055 | if (mddev->reshape_position != MaxSector && | 4097 | if (mddev->reshape_position != MaxSector && |
4056 | pers->start_reshape == NULL) { | 4098 | pers->start_reshape == NULL) { |
4057 | /* This personality cannot handle reshaping... */ | 4099 | /* This personality cannot handle reshaping... */ |
@@ -4189,6 +4231,7 @@ static int do_md_run(mddev_t * mddev) | |||
4189 | md_wakeup_thread(mddev->thread); | 4231 | md_wakeup_thread(mddev->thread); |
4190 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4232 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4191 | 4233 | ||
4234 | revalidate_disk(mddev->gendisk); | ||
4192 | mddev->changed = 1; | 4235 | mddev->changed = 1; |
4193 | md_new_event(mddev); | 4236 | md_new_event(mddev); |
4194 | sysfs_notify_dirent(mddev->sysfs_state); | 4237 | sysfs_notify_dirent(mddev->sysfs_state); |
@@ -4260,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4260 | struct gendisk *disk = mddev->gendisk; | 4303 | struct gendisk *disk = mddev->gendisk; |
4261 | mdk_rdev_t *rdev; | 4304 | mdk_rdev_t *rdev; |
4262 | 4305 | ||
4306 | mutex_lock(&mddev->open_mutex); | ||
4263 | if (atomic_read(&mddev->openers) > is_open) { | 4307 | if (atomic_read(&mddev->openers) > is_open) { |
4264 | printk("md: %s still in use.\n",mdname(mddev)); | 4308 | printk("md: %s still in use.\n",mdname(mddev)); |
4265 | return -EBUSY; | 4309 | err = -EBUSY; |
4266 | } | 4310 | } else if (mddev->pers) { |
4267 | |||
4268 | if (mddev->pers) { | ||
4269 | 4311 | ||
4270 | if (mddev->sync_thread) { | 4312 | if (mddev->sync_thread) { |
4271 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4313 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4322,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4322 | if (mode == 1) | 4364 | if (mode == 1) |
4323 | set_disk_ro(disk, 1); | 4365 | set_disk_ro(disk, 1); |
4324 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4367 | err = 0; | ||
4325 | } | 4368 | } |
4326 | 4369 | out: | |
4370 | mutex_unlock(&mddev->open_mutex); | ||
4371 | if (err) | ||
4372 | return err; | ||
4327 | /* | 4373 | /* |
4328 | * Free resources if final stop | 4374 | * Free resources if final stop |
4329 | */ | 4375 | */ |
@@ -4389,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4389 | blk_integrity_unregister(disk); | 4435 | blk_integrity_unregister(disk); |
4390 | md_new_event(mddev); | 4436 | md_new_event(mddev); |
4391 | sysfs_notify_dirent(mddev->sysfs_state); | 4437 | sysfs_notify_dirent(mddev->sysfs_state); |
4392 | out: | ||
4393 | return err; | 4438 | return err; |
4394 | } | 4439 | } |
4395 | 4440 | ||
@@ -5087,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
5087 | return -ENOSPC; | 5132 | return -ENOSPC; |
5088 | } | 5133 | } |
5089 | rv = mddev->pers->resize(mddev, num_sectors); | 5134 | rv = mddev->pers->resize(mddev, num_sectors); |
5090 | if (!rv) { | 5135 | if (!rv) |
5091 | struct block_device *bdev; | 5136 | revalidate_disk(mddev->gendisk); |
5092 | |||
5093 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5094 | if (bdev) { | ||
5095 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5096 | i_size_write(bdev->bd_inode, | ||
5097 | (loff_t)mddev->array_sectors << 9); | ||
5098 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5099 | bdput(bdev); | ||
5100 | } | ||
5101 | } | ||
5102 | return rv; | 5137 | return rv; |
5103 | } | 5138 | } |
5104 | 5139 | ||
@@ -5484,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5484 | } | 5519 | } |
5485 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5520 | BUG_ON(mddev != bdev->bd_disk->private_data); |
5486 | 5521 | ||
5487 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5522 | if ((err = mutex_lock_interruptible(&mddev->open_mutex))) |
5488 | goto out; | 5523 | goto out; |
5489 | 5524 | ||
5490 | err = 0; | 5525 | err = 0; |
5491 | atomic_inc(&mddev->openers); | 5526 | atomic_inc(&mddev->openers); |
5492 | mddev_unlock(mddev); | 5527 | mutex_unlock(&mddev->open_mutex); |
5493 | 5528 | ||
5494 | check_disk_change(bdev); | 5529 | check_disk_change(bdev); |
5495 | out: | 5530 | out: |