diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 395 |
1 files changed, 297 insertions, 98 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index c28a120b4161..5fc326d3970e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -195,7 +195,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
195 | * Any code which breaks out of this loop while own | 195 | * Any code which breaks out of this loop while own |
196 | * a reference to the current mddev and must mddev_put it. | 196 | * a reference to the current mddev and must mddev_put it. |
197 | */ | 197 | */ |
198 | #define ITERATE_MDDEV(mddev,tmp) \ | 198 | #define for_each_mddev(mddev,tmp) \ |
199 | \ | 199 | \ |
200 | for (({ spin_lock(&all_mddevs_lock); \ | 200 | for (({ spin_lock(&all_mddevs_lock); \ |
201 | tmp = all_mddevs.next; \ | 201 | tmp = all_mddevs.next; \ |
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
275 | spin_lock_init(&new->write_lock); | 275 | spin_lock_init(&new->write_lock); |
276 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | 277 | new->reshape_position = MaxSector; |
278 | new->resync_max = MaxSector; | ||
278 | 279 | ||
279 | new->queue = blk_alloc_queue(GFP_KERNEL); | 280 | new->queue = blk_alloc_queue(GFP_KERNEL); |
280 | if (!new->queue) { | 281 | if (!new->queue) { |
@@ -310,7 +311,7 @@ static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) | |||
310 | mdk_rdev_t * rdev; | 311 | mdk_rdev_t * rdev; |
311 | struct list_head *tmp; | 312 | struct list_head *tmp; |
312 | 313 | ||
313 | ITERATE_RDEV(mddev,rdev,tmp) { | 314 | rdev_for_each(rdev, tmp, mddev) { |
314 | if (rdev->desc_nr == nr) | 315 | if (rdev->desc_nr == nr) |
315 | return rdev; | 316 | return rdev; |
316 | } | 317 | } |
@@ -322,7 +323,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) | |||
322 | struct list_head *tmp; | 323 | struct list_head *tmp; |
323 | mdk_rdev_t *rdev; | 324 | mdk_rdev_t *rdev; |
324 | 325 | ||
325 | ITERATE_RDEV(mddev,rdev,tmp) { | 326 | rdev_for_each(rdev, tmp, mddev) { |
326 | if (rdev->bdev->bd_dev == dev) | 327 | if (rdev->bdev->bd_dev == dev) |
327 | return rdev; | 328 | return rdev; |
328 | } | 329 | } |
@@ -773,12 +774,16 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
773 | __u64 ev1 = md_event(sb); | 774 | __u64 ev1 = md_event(sb); |
774 | 775 | ||
775 | rdev->raid_disk = -1; | 776 | rdev->raid_disk = -1; |
776 | rdev->flags = 0; | 777 | clear_bit(Faulty, &rdev->flags); |
778 | clear_bit(In_sync, &rdev->flags); | ||
779 | clear_bit(WriteMostly, &rdev->flags); | ||
780 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
781 | |||
777 | if (mddev->raid_disks == 0) { | 782 | if (mddev->raid_disks == 0) { |
778 | mddev->major_version = 0; | 783 | mddev->major_version = 0; |
779 | mddev->minor_version = sb->minor_version; | 784 | mddev->minor_version = sb->minor_version; |
780 | mddev->patch_version = sb->patch_version; | 785 | mddev->patch_version = sb->patch_version; |
781 | mddev->persistent = ! sb->not_persistent; | 786 | mddev->external = 0; |
782 | mddev->chunk_size = sb->chunk_size; | 787 | mddev->chunk_size = sb->chunk_size; |
783 | mddev->ctime = sb->ctime; | 788 | mddev->ctime = sb->ctime; |
784 | mddev->utime = sb->utime; | 789 | mddev->utime = sb->utime; |
@@ -904,7 +909,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
904 | sb->size = mddev->size; | 909 | sb->size = mddev->size; |
905 | sb->raid_disks = mddev->raid_disks; | 910 | sb->raid_disks = mddev->raid_disks; |
906 | sb->md_minor = mddev->md_minor; | 911 | sb->md_minor = mddev->md_minor; |
907 | sb->not_persistent = !mddev->persistent; | 912 | sb->not_persistent = 0; |
908 | sb->utime = mddev->utime; | 913 | sb->utime = mddev->utime; |
909 | sb->state = 0; | 914 | sb->state = 0; |
910 | sb->events_hi = (mddev->events>>32); | 915 | sb->events_hi = (mddev->events>>32); |
@@ -938,7 +943,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
938 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 943 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
939 | 944 | ||
940 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 945 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
941 | ITERATE_RDEV(mddev,rdev2,tmp) { | 946 | rdev_for_each(rdev2, tmp, mddev) { |
942 | mdp_disk_t *d; | 947 | mdp_disk_t *d; |
943 | int desc_nr; | 948 | int desc_nr; |
944 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) | 949 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
@@ -1153,11 +1158,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1153 | __u64 ev1 = le64_to_cpu(sb->events); | 1158 | __u64 ev1 = le64_to_cpu(sb->events); |
1154 | 1159 | ||
1155 | rdev->raid_disk = -1; | 1160 | rdev->raid_disk = -1; |
1156 | rdev->flags = 0; | 1161 | clear_bit(Faulty, &rdev->flags); |
1162 | clear_bit(In_sync, &rdev->flags); | ||
1163 | clear_bit(WriteMostly, &rdev->flags); | ||
1164 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1165 | |||
1157 | if (mddev->raid_disks == 0) { | 1166 | if (mddev->raid_disks == 0) { |
1158 | mddev->major_version = 1; | 1167 | mddev->major_version = 1; |
1159 | mddev->patch_version = 0; | 1168 | mddev->patch_version = 0; |
1160 | mddev->persistent = 1; | 1169 | mddev->external = 0; |
1161 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; | 1170 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; |
1162 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); | 1171 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); |
1163 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); | 1172 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); |
@@ -1286,7 +1295,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1286 | } | 1295 | } |
1287 | 1296 | ||
1288 | max_dev = 0; | 1297 | max_dev = 0; |
1289 | ITERATE_RDEV(mddev,rdev2,tmp) | 1298 | rdev_for_each(rdev2, tmp, mddev) |
1290 | if (rdev2->desc_nr+1 > max_dev) | 1299 | if (rdev2->desc_nr+1 > max_dev) |
1291 | max_dev = rdev2->desc_nr+1; | 1300 | max_dev = rdev2->desc_nr+1; |
1292 | 1301 | ||
@@ -1295,7 +1304,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1295 | for (i=0; i<max_dev;i++) | 1304 | for (i=0; i<max_dev;i++) |
1296 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1305 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1297 | 1306 | ||
1298 | ITERATE_RDEV(mddev,rdev2,tmp) { | 1307 | rdev_for_each(rdev2, tmp, mddev) { |
1299 | i = rdev2->desc_nr; | 1308 | i = rdev2->desc_nr; |
1300 | if (test_bit(Faulty, &rdev2->flags)) | 1309 | if (test_bit(Faulty, &rdev2->flags)) |
1301 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1310 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
@@ -1333,8 +1342,8 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1333 | struct list_head *tmp, *tmp2; | 1342 | struct list_head *tmp, *tmp2; |
1334 | mdk_rdev_t *rdev, *rdev2; | 1343 | mdk_rdev_t *rdev, *rdev2; |
1335 | 1344 | ||
1336 | ITERATE_RDEV(mddev1,rdev,tmp) | 1345 | rdev_for_each(rdev, tmp, mddev1) |
1337 | ITERATE_RDEV(mddev2, rdev2, tmp2) | 1346 | rdev_for_each(rdev2, tmp2, mddev2) |
1338 | if (rdev->bdev->bd_contains == | 1347 | if (rdev->bdev->bd_contains == |
1339 | rdev2->bdev->bd_contains) | 1348 | rdev2->bdev->bd_contains) |
1340 | return 1; | 1349 | return 1; |
@@ -1401,7 +1410,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1401 | goto fail; | 1410 | goto fail; |
1402 | } | 1411 | } |
1403 | list_add(&rdev->same_set, &mddev->disks); | 1412 | list_add(&rdev->same_set, &mddev->disks); |
1404 | bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); | 1413 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
1405 | return 0; | 1414 | return 0; |
1406 | 1415 | ||
1407 | fail: | 1416 | fail: |
@@ -1410,10 +1419,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1410 | return err; | 1419 | return err; |
1411 | } | 1420 | } |
1412 | 1421 | ||
1413 | static void delayed_delete(struct work_struct *ws) | 1422 | static void md_delayed_delete(struct work_struct *ws) |
1414 | { | 1423 | { |
1415 | mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); | 1424 | mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); |
1416 | kobject_del(&rdev->kobj); | 1425 | kobject_del(&rdev->kobj); |
1426 | kobject_put(&rdev->kobj); | ||
1417 | } | 1427 | } |
1418 | 1428 | ||
1419 | static void unbind_rdev_from_array(mdk_rdev_t * rdev) | 1429 | static void unbind_rdev_from_array(mdk_rdev_t * rdev) |
@@ -1432,7 +1442,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1432 | /* We need to delay this, otherwise we can deadlock when | 1442 | /* We need to delay this, otherwise we can deadlock when |
1433 | * writing to 'remove' to "dev/state" | 1443 | * writing to 'remove' to "dev/state" |
1434 | */ | 1444 | */ |
1435 | INIT_WORK(&rdev->del_work, delayed_delete); | 1445 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1446 | kobject_get(&rdev->kobj); | ||
1436 | schedule_work(&rdev->del_work); | 1447 | schedule_work(&rdev->del_work); |
1437 | } | 1448 | } |
1438 | 1449 | ||
@@ -1441,7 +1452,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1441 | * otherwise reused by a RAID array (or any other kernel | 1452 | * otherwise reused by a RAID array (or any other kernel |
1442 | * subsystem), by bd_claiming the device. | 1453 | * subsystem), by bd_claiming the device. |
1443 | */ | 1454 | */ |
1444 | static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | 1455 | static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) |
1445 | { | 1456 | { |
1446 | int err = 0; | 1457 | int err = 0; |
1447 | struct block_device *bdev; | 1458 | struct block_device *bdev; |
@@ -1453,13 +1464,15 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1453 | __bdevname(dev, b)); | 1464 | __bdevname(dev, b)); |
1454 | return PTR_ERR(bdev); | 1465 | return PTR_ERR(bdev); |
1455 | } | 1466 | } |
1456 | err = bd_claim(bdev, rdev); | 1467 | err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev); |
1457 | if (err) { | 1468 | if (err) { |
1458 | printk(KERN_ERR "md: could not bd_claim %s.\n", | 1469 | printk(KERN_ERR "md: could not bd_claim %s.\n", |
1459 | bdevname(bdev, b)); | 1470 | bdevname(bdev, b)); |
1460 | blkdev_put(bdev); | 1471 | blkdev_put(bdev); |
1461 | return err; | 1472 | return err; |
1462 | } | 1473 | } |
1474 | if (!shared) | ||
1475 | set_bit(AllReserved, &rdev->flags); | ||
1463 | rdev->bdev = bdev; | 1476 | rdev->bdev = bdev; |
1464 | return err; | 1477 | return err; |
1465 | } | 1478 | } |
@@ -1503,7 +1516,7 @@ static void export_array(mddev_t *mddev) | |||
1503 | struct list_head *tmp; | 1516 | struct list_head *tmp; |
1504 | mdk_rdev_t *rdev; | 1517 | mdk_rdev_t *rdev; |
1505 | 1518 | ||
1506 | ITERATE_RDEV(mddev,rdev,tmp) { | 1519 | rdev_for_each(rdev, tmp, mddev) { |
1507 | if (!rdev->mddev) { | 1520 | if (!rdev->mddev) { |
1508 | MD_BUG(); | 1521 | MD_BUG(); |
1509 | continue; | 1522 | continue; |
@@ -1581,17 +1594,17 @@ static void md_print_devices(void) | |||
1581 | printk("md: **********************************\n"); | 1594 | printk("md: **********************************\n"); |
1582 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); | 1595 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); |
1583 | printk("md: **********************************\n"); | 1596 | printk("md: **********************************\n"); |
1584 | ITERATE_MDDEV(mddev,tmp) { | 1597 | for_each_mddev(mddev, tmp) { |
1585 | 1598 | ||
1586 | if (mddev->bitmap) | 1599 | if (mddev->bitmap) |
1587 | bitmap_print_sb(mddev->bitmap); | 1600 | bitmap_print_sb(mddev->bitmap); |
1588 | else | 1601 | else |
1589 | printk("%s: ", mdname(mddev)); | 1602 | printk("%s: ", mdname(mddev)); |
1590 | ITERATE_RDEV(mddev,rdev,tmp2) | 1603 | rdev_for_each(rdev, tmp2, mddev) |
1591 | printk("<%s>", bdevname(rdev->bdev,b)); | 1604 | printk("<%s>", bdevname(rdev->bdev,b)); |
1592 | printk("\n"); | 1605 | printk("\n"); |
1593 | 1606 | ||
1594 | ITERATE_RDEV(mddev,rdev,tmp2) | 1607 | rdev_for_each(rdev, tmp2, mddev) |
1595 | print_rdev(rdev); | 1608 | print_rdev(rdev); |
1596 | } | 1609 | } |
1597 | printk("md: **********************************\n"); | 1610 | printk("md: **********************************\n"); |
@@ -1610,7 +1623,7 @@ static void sync_sbs(mddev_t * mddev, int nospares) | |||
1610 | mdk_rdev_t *rdev; | 1623 | mdk_rdev_t *rdev; |
1611 | struct list_head *tmp; | 1624 | struct list_head *tmp; |
1612 | 1625 | ||
1613 | ITERATE_RDEV(mddev,rdev,tmp) { | 1626 | rdev_for_each(rdev, tmp, mddev) { |
1614 | if (rdev->sb_events == mddev->events || | 1627 | if (rdev->sb_events == mddev->events || |
1615 | (nospares && | 1628 | (nospares && |
1616 | rdev->raid_disk < 0 && | 1629 | rdev->raid_disk < 0 && |
@@ -1696,18 +1709,20 @@ repeat: | |||
1696 | MD_BUG(); | 1709 | MD_BUG(); |
1697 | mddev->events --; | 1710 | mddev->events --; |
1698 | } | 1711 | } |
1699 | sync_sbs(mddev, nospares); | ||
1700 | 1712 | ||
1701 | /* | 1713 | /* |
1702 | * do not write anything to disk if using | 1714 | * do not write anything to disk if using |
1703 | * nonpersistent superblocks | 1715 | * nonpersistent superblocks |
1704 | */ | 1716 | */ |
1705 | if (!mddev->persistent) { | 1717 | if (!mddev->persistent) { |
1706 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 1718 | if (!mddev->external) |
1719 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
1720 | |||
1707 | spin_unlock_irq(&mddev->write_lock); | 1721 | spin_unlock_irq(&mddev->write_lock); |
1708 | wake_up(&mddev->sb_wait); | 1722 | wake_up(&mddev->sb_wait); |
1709 | return; | 1723 | return; |
1710 | } | 1724 | } |
1725 | sync_sbs(mddev, nospares); | ||
1711 | spin_unlock_irq(&mddev->write_lock); | 1726 | spin_unlock_irq(&mddev->write_lock); |
1712 | 1727 | ||
1713 | dprintk(KERN_INFO | 1728 | dprintk(KERN_INFO |
@@ -1715,7 +1730,7 @@ repeat: | |||
1715 | mdname(mddev),mddev->in_sync); | 1730 | mdname(mddev),mddev->in_sync); |
1716 | 1731 | ||
1717 | bitmap_update_sb(mddev->bitmap); | 1732 | bitmap_update_sb(mddev->bitmap); |
1718 | ITERATE_RDEV(mddev,rdev,tmp) { | 1733 | rdev_for_each(rdev, tmp, mddev) { |
1719 | char b[BDEVNAME_SIZE]; | 1734 | char b[BDEVNAME_SIZE]; |
1720 | dprintk(KERN_INFO "md: "); | 1735 | dprintk(KERN_INFO "md: "); |
1721 | if (rdev->sb_loaded != 1) | 1736 | if (rdev->sb_loaded != 1) |
@@ -1785,7 +1800,7 @@ static ssize_t | |||
1785 | state_show(mdk_rdev_t *rdev, char *page) | 1800 | state_show(mdk_rdev_t *rdev, char *page) |
1786 | { | 1801 | { |
1787 | char *sep = ""; | 1802 | char *sep = ""; |
1788 | int len=0; | 1803 | size_t len = 0; |
1789 | 1804 | ||
1790 | if (test_bit(Faulty, &rdev->flags)) { | 1805 | if (test_bit(Faulty, &rdev->flags)) { |
1791 | len+= sprintf(page+len, "%sfaulty",sep); | 1806 | len+= sprintf(page+len, "%sfaulty",sep); |
@@ -1887,20 +1902,45 @@ static ssize_t | |||
1887 | slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 1902 | slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
1888 | { | 1903 | { |
1889 | char *e; | 1904 | char *e; |
1905 | int err; | ||
1906 | char nm[20]; | ||
1890 | int slot = simple_strtoul(buf, &e, 10); | 1907 | int slot = simple_strtoul(buf, &e, 10); |
1891 | if (strncmp(buf, "none", 4)==0) | 1908 | if (strncmp(buf, "none", 4)==0) |
1892 | slot = -1; | 1909 | slot = -1; |
1893 | else if (e==buf || (*e && *e!= '\n')) | 1910 | else if (e==buf || (*e && *e!= '\n')) |
1894 | return -EINVAL; | 1911 | return -EINVAL; |
1895 | if (rdev->mddev->pers) | 1912 | if (rdev->mddev->pers) { |
1896 | /* Cannot set slot in active array (yet) */ | 1913 | /* Setting 'slot' on an active array requires also |
1897 | return -EBUSY; | 1914 | * updating the 'rd%d' link, and communicating |
1898 | if (slot >= rdev->mddev->raid_disks) | 1915 | * with the personality with ->hot_*_disk. |
1899 | return -ENOSPC; | 1916 | * For now we only support removing |
1900 | rdev->raid_disk = slot; | 1917 | * failed/spare devices. This normally happens automatically, |
1901 | /* assume it is working */ | 1918 | * but not when the metadata is externally managed. |
1902 | rdev->flags = 0; | 1919 | */ |
1903 | set_bit(In_sync, &rdev->flags); | 1920 | if (slot != -1) |
1921 | return -EBUSY; | ||
1922 | if (rdev->raid_disk == -1) | ||
1923 | return -EEXIST; | ||
1924 | /* personality does all needed checks */ | ||
1925 | if (rdev->mddev->pers->hot_add_disk == NULL) | ||
1926 | return -EINVAL; | ||
1927 | err = rdev->mddev->pers-> | ||
1928 | hot_remove_disk(rdev->mddev, rdev->raid_disk); | ||
1929 | if (err) | ||
1930 | return err; | ||
1931 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
1932 | sysfs_remove_link(&rdev->mddev->kobj, nm); | ||
1933 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | ||
1934 | md_wakeup_thread(rdev->mddev->thread); | ||
1935 | } else { | ||
1936 | if (slot >= rdev->mddev->raid_disks) | ||
1937 | return -ENOSPC; | ||
1938 | rdev->raid_disk = slot; | ||
1939 | /* assume it is working */ | ||
1940 | clear_bit(Faulty, &rdev->flags); | ||
1941 | clear_bit(WriteMostly, &rdev->flags); | ||
1942 | set_bit(In_sync, &rdev->flags); | ||
1943 | } | ||
1904 | return len; | 1944 | return len; |
1905 | } | 1945 | } |
1906 | 1946 | ||
@@ -1923,6 +1963,10 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1923 | return -EINVAL; | 1963 | return -EINVAL; |
1924 | if (rdev->mddev->pers) | 1964 | if (rdev->mddev->pers) |
1925 | return -EBUSY; | 1965 | return -EBUSY; |
1966 | if (rdev->size && rdev->mddev->external) | ||
1967 | /* Must set offset before size, so overlap checks | ||
1968 | * can be sane */ | ||
1969 | return -EBUSY; | ||
1926 | rdev->data_offset = offset; | 1970 | rdev->data_offset = offset; |
1927 | return len; | 1971 | return len; |
1928 | } | 1972 | } |
@@ -1936,16 +1980,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *page) | |||
1936 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); | 1980 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); |
1937 | } | 1981 | } |
1938 | 1982 | ||
1983 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | ||
1984 | { | ||
1985 | /* check if two start/length pairs overlap */ | ||
1986 | if (s1+l1 <= s2) | ||
1987 | return 0; | ||
1988 | if (s2+l2 <= s1) | ||
1989 | return 0; | ||
1990 | return 1; | ||
1991 | } | ||
1992 | |||
1939 | static ssize_t | 1993 | static ssize_t |
1940 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 1994 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
1941 | { | 1995 | { |
1942 | char *e; | 1996 | char *e; |
1943 | unsigned long long size = simple_strtoull(buf, &e, 10); | 1997 | unsigned long long size = simple_strtoull(buf, &e, 10); |
1998 | unsigned long long oldsize = rdev->size; | ||
1944 | if (e==buf || (*e && *e != '\n')) | 1999 | if (e==buf || (*e && *e != '\n')) |
1945 | return -EINVAL; | 2000 | return -EINVAL; |
1946 | if (rdev->mddev->pers) | 2001 | if (rdev->mddev->pers) |
1947 | return -EBUSY; | 2002 | return -EBUSY; |
1948 | rdev->size = size; | 2003 | rdev->size = size; |
2004 | if (size > oldsize && rdev->mddev->external) { | ||
2005 | /* need to check that all other rdevs with the same ->bdev | ||
2006 | * do not overlap. We need to unlock the mddev to avoid | ||
2007 | * a deadlock. We have already changed rdev->size, and if | ||
2008 | * we have to change it back, we will have the lock again. | ||
2009 | */ | ||
2010 | mddev_t *mddev; | ||
2011 | int overlap = 0; | ||
2012 | struct list_head *tmp, *tmp2; | ||
2013 | |||
2014 | mddev_unlock(rdev->mddev); | ||
2015 | for_each_mddev(mddev, tmp) { | ||
2016 | mdk_rdev_t *rdev2; | ||
2017 | |||
2018 | mddev_lock(mddev); | ||
2019 | rdev_for_each(rdev2, tmp2, mddev) | ||
2020 | if (test_bit(AllReserved, &rdev2->flags) || | ||
2021 | (rdev->bdev == rdev2->bdev && | ||
2022 | rdev != rdev2 && | ||
2023 | overlaps(rdev->data_offset, rdev->size, | ||
2024 | rdev2->data_offset, rdev2->size))) { | ||
2025 | overlap = 1; | ||
2026 | break; | ||
2027 | } | ||
2028 | mddev_unlock(mddev); | ||
2029 | if (overlap) { | ||
2030 | mddev_put(mddev); | ||
2031 | break; | ||
2032 | } | ||
2033 | } | ||
2034 | mddev_lock(rdev->mddev); | ||
2035 | if (overlap) { | ||
2036 | /* Someone else could have slipped in a size | ||
2037 | * change here, but doing so is just silly. | ||
2038 | * We put oldsize back because we *know* it is | ||
2039 | * safe, and trust userspace not to race with | ||
2040 | * itself | ||
2041 | */ | ||
2042 | rdev->size = oldsize; | ||
2043 | return -EBUSY; | ||
2044 | } | ||
2045 | } | ||
1949 | if (size < rdev->mddev->size || rdev->mddev->size == 0) | 2046 | if (size < rdev->mddev->size || rdev->mddev->size == 0) |
1950 | rdev->mddev->size = size; | 2047 | rdev->mddev->size = size; |
1951 | return len; | 2048 | return len; |
@@ -1980,12 +2077,18 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
1980 | { | 2077 | { |
1981 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | 2078 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); |
1982 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | 2079 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); |
2080 | int rv; | ||
1983 | 2081 | ||
1984 | if (!entry->store) | 2082 | if (!entry->store) |
1985 | return -EIO; | 2083 | return -EIO; |
1986 | if (!capable(CAP_SYS_ADMIN)) | 2084 | if (!capable(CAP_SYS_ADMIN)) |
1987 | return -EACCES; | 2085 | return -EACCES; |
1988 | return entry->store(rdev, page, length); | 2086 | rv = mddev_lock(rdev->mddev); |
2087 | if (!rv) { | ||
2088 | rv = entry->store(rdev, page, length); | ||
2089 | mddev_unlock(rdev->mddev); | ||
2090 | } | ||
2091 | return rv; | ||
1989 | } | 2092 | } |
1990 | 2093 | ||
1991 | static void rdev_free(struct kobject *ko) | 2094 | static void rdev_free(struct kobject *ko) |
@@ -2029,7 +2132,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2029 | if ((err = alloc_disk_sb(rdev))) | 2132 | if ((err = alloc_disk_sb(rdev))) |
2030 | goto abort_free; | 2133 | goto abort_free; |
2031 | 2134 | ||
2032 | err = lock_rdev(rdev, newdev); | 2135 | err = lock_rdev(rdev, newdev, super_format == -2); |
2033 | if (err) | 2136 | if (err) |
2034 | goto abort_free; | 2137 | goto abort_free; |
2035 | 2138 | ||
@@ -2099,7 +2202,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
2099 | char b[BDEVNAME_SIZE]; | 2202 | char b[BDEVNAME_SIZE]; |
2100 | 2203 | ||
2101 | freshest = NULL; | 2204 | freshest = NULL; |
2102 | ITERATE_RDEV(mddev,rdev,tmp) | 2205 | rdev_for_each(rdev, tmp, mddev) |
2103 | switch (super_types[mddev->major_version]. | 2206 | switch (super_types[mddev->major_version]. |
2104 | load_super(rdev, freshest, mddev->minor_version)) { | 2207 | load_super(rdev, freshest, mddev->minor_version)) { |
2105 | case 1: | 2208 | case 1: |
@@ -2120,7 +2223,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
2120 | validate_super(mddev, freshest); | 2223 | validate_super(mddev, freshest); |
2121 | 2224 | ||
2122 | i = 0; | 2225 | i = 0; |
2123 | ITERATE_RDEV(mddev,rdev,tmp) { | 2226 | rdev_for_each(rdev, tmp, mddev) { |
2124 | if (rdev != freshest) | 2227 | if (rdev != freshest) |
2125 | if (super_types[mddev->major_version]. | 2228 | if (super_types[mddev->major_version]. |
2126 | validate_super(mddev, rdev)) { | 2229 | validate_super(mddev, rdev)) { |
@@ -2215,7 +2318,7 @@ level_show(mddev_t *mddev, char *page) | |||
2215 | static ssize_t | 2318 | static ssize_t |
2216 | level_store(mddev_t *mddev, const char *buf, size_t len) | 2319 | level_store(mddev_t *mddev, const char *buf, size_t len) |
2217 | { | 2320 | { |
2218 | int rv = len; | 2321 | ssize_t rv = len; |
2219 | if (mddev->pers) | 2322 | if (mddev->pers) |
2220 | return -EBUSY; | 2323 | return -EBUSY; |
2221 | if (len == 0) | 2324 | if (len == 0) |
@@ -2425,6 +2528,8 @@ array_state_show(mddev_t *mddev, char *page) | |||
2425 | case 0: | 2528 | case 0: |
2426 | if (mddev->in_sync) | 2529 | if (mddev->in_sync) |
2427 | st = clean; | 2530 | st = clean; |
2531 | else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) | ||
2532 | st = write_pending; | ||
2428 | else if (mddev->safemode) | 2533 | else if (mddev->safemode) |
2429 | st = active_idle; | 2534 | st = active_idle; |
2430 | else | 2535 | else |
@@ -2455,11 +2560,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2455 | break; | 2560 | break; |
2456 | case clear: | 2561 | case clear: |
2457 | /* stopping an active array */ | 2562 | /* stopping an active array */ |
2458 | if (mddev->pers) { | 2563 | if (atomic_read(&mddev->active) > 1) |
2459 | if (atomic_read(&mddev->active) > 1) | 2564 | return -EBUSY; |
2460 | return -EBUSY; | 2565 | err = do_md_stop(mddev, 0); |
2461 | err = do_md_stop(mddev, 0); | ||
2462 | } | ||
2463 | break; | 2566 | break; |
2464 | case inactive: | 2567 | case inactive: |
2465 | /* stopping an active array */ | 2568 | /* stopping an active array */ |
@@ -2467,7 +2570,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2467 | if (atomic_read(&mddev->active) > 1) | 2570 | if (atomic_read(&mddev->active) > 1) |
2468 | return -EBUSY; | 2571 | return -EBUSY; |
2469 | err = do_md_stop(mddev, 2); | 2572 | err = do_md_stop(mddev, 2); |
2470 | } | 2573 | } else |
2574 | err = 0; /* already inactive */ | ||
2471 | break; | 2575 | break; |
2472 | case suspended: | 2576 | case suspended: |
2473 | break; /* not supported yet */ | 2577 | break; /* not supported yet */ |
@@ -2495,9 +2599,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2495 | restart_array(mddev); | 2599 | restart_array(mddev); |
2496 | spin_lock_irq(&mddev->write_lock); | 2600 | spin_lock_irq(&mddev->write_lock); |
2497 | if (atomic_read(&mddev->writes_pending) == 0) { | 2601 | if (atomic_read(&mddev->writes_pending) == 0) { |
2498 | mddev->in_sync = 1; | 2602 | if (mddev->in_sync == 0) { |
2499 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2603 | mddev->in_sync = 1; |
2500 | } | 2604 | if (mddev->persistent) |
2605 | set_bit(MD_CHANGE_CLEAN, | ||
2606 | &mddev->flags); | ||
2607 | } | ||
2608 | err = 0; | ||
2609 | } else | ||
2610 | err = -EBUSY; | ||
2501 | spin_unlock_irq(&mddev->write_lock); | 2611 | spin_unlock_irq(&mddev->write_lock); |
2502 | } else { | 2612 | } else { |
2503 | mddev->ro = 0; | 2613 | mddev->ro = 0; |
@@ -2508,7 +2618,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2508 | case active: | 2618 | case active: |
2509 | if (mddev->pers) { | 2619 | if (mddev->pers) { |
2510 | restart_array(mddev); | 2620 | restart_array(mddev); |
2511 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2621 | if (mddev->external) |
2622 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
2512 | wake_up(&mddev->sb_wait); | 2623 | wake_up(&mddev->sb_wait); |
2513 | err = 0; | 2624 | err = 0; |
2514 | } else { | 2625 | } else { |
@@ -2574,7 +2685,9 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len) | |||
2574 | if (err < 0) | 2685 | if (err < 0) |
2575 | goto out; | 2686 | goto out; |
2576 | } | 2687 | } |
2577 | } else | 2688 | } else if (mddev->external) |
2689 | rdev = md_import_device(dev, -2, -1); | ||
2690 | else | ||
2578 | rdev = md_import_device(dev, -1, -1); | 2691 | rdev = md_import_device(dev, -1, -1); |
2579 | 2692 | ||
2580 | if (IS_ERR(rdev)) | 2693 | if (IS_ERR(rdev)) |
@@ -2659,7 +2772,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); | |||
2659 | 2772 | ||
2660 | 2773 | ||
2661 | /* Metdata version. | 2774 | /* Metdata version. |
2662 | * This is either 'none' for arrays with externally managed metadata, | 2775 | * This is one of |
2776 | * 'none' for arrays with no metadata (good luck...) | ||
2777 | * 'external' for arrays with externally managed metadata, | ||
2663 | * or N.M for internally known formats | 2778 | * or N.M for internally known formats |
2664 | */ | 2779 | */ |
2665 | static ssize_t | 2780 | static ssize_t |
@@ -2668,6 +2783,8 @@ metadata_show(mddev_t *mddev, char *page) | |||
2668 | if (mddev->persistent) | 2783 | if (mddev->persistent) |
2669 | return sprintf(page, "%d.%d\n", | 2784 | return sprintf(page, "%d.%d\n", |
2670 | mddev->major_version, mddev->minor_version); | 2785 | mddev->major_version, mddev->minor_version); |
2786 | else if (mddev->external) | ||
2787 | return sprintf(page, "external:%s\n", mddev->metadata_type); | ||
2671 | else | 2788 | else |
2672 | return sprintf(page, "none\n"); | 2789 | return sprintf(page, "none\n"); |
2673 | } | 2790 | } |
@@ -2682,6 +2799,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2682 | 2799 | ||
2683 | if (cmd_match(buf, "none")) { | 2800 | if (cmd_match(buf, "none")) { |
2684 | mddev->persistent = 0; | 2801 | mddev->persistent = 0; |
2802 | mddev->external = 0; | ||
2803 | mddev->major_version = 0; | ||
2804 | mddev->minor_version = 90; | ||
2805 | return len; | ||
2806 | } | ||
2807 | if (strncmp(buf, "external:", 9) == 0) { | ||
2808 | size_t namelen = len-9; | ||
2809 | if (namelen >= sizeof(mddev->metadata_type)) | ||
2810 | namelen = sizeof(mddev->metadata_type)-1; | ||
2811 | strncpy(mddev->metadata_type, buf+9, namelen); | ||
2812 | mddev->metadata_type[namelen] = 0; | ||
2813 | if (namelen && mddev->metadata_type[namelen-1] == '\n') | ||
2814 | mddev->metadata_type[--namelen] = 0; | ||
2815 | mddev->persistent = 0; | ||
2816 | mddev->external = 1; | ||
2685 | mddev->major_version = 0; | 2817 | mddev->major_version = 0; |
2686 | mddev->minor_version = 90; | 2818 | mddev->minor_version = 90; |
2687 | return len; | 2819 | return len; |
@@ -2698,6 +2830,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2698 | mddev->major_version = major; | 2830 | mddev->major_version = major; |
2699 | mddev->minor_version = minor; | 2831 | mddev->minor_version = minor; |
2700 | mddev->persistent = 1; | 2832 | mddev->persistent = 1; |
2833 | mddev->external = 0; | ||
2701 | return len; | 2834 | return len; |
2702 | } | 2835 | } |
2703 | 2836 | ||
@@ -2865,6 +2998,43 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2865 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 2998 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
2866 | 2999 | ||
2867 | static ssize_t | 3000 | static ssize_t |
3001 | max_sync_show(mddev_t *mddev, char *page) | ||
3002 | { | ||
3003 | if (mddev->resync_max == MaxSector) | ||
3004 | return sprintf(page, "max\n"); | ||
3005 | else | ||
3006 | return sprintf(page, "%llu\n", | ||
3007 | (unsigned long long)mddev->resync_max); | ||
3008 | } | ||
3009 | static ssize_t | ||
3010 | max_sync_store(mddev_t *mddev, const char *buf, size_t len) | ||
3011 | { | ||
3012 | if (strncmp(buf, "max", 3) == 0) | ||
3013 | mddev->resync_max = MaxSector; | ||
3014 | else { | ||
3015 | char *ep; | ||
3016 | unsigned long long max = simple_strtoull(buf, &ep, 10); | ||
3017 | if (ep == buf || (*ep != 0 && *ep != '\n')) | ||
3018 | return -EINVAL; | ||
3019 | if (max < mddev->resync_max && | ||
3020 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | ||
3021 | return -EBUSY; | ||
3022 | |||
3023 | /* Must be a multiple of chunk_size */ | ||
3024 | if (mddev->chunk_size) { | ||
3025 | if (max & (sector_t)((mddev->chunk_size>>9)-1)) | ||
3026 | return -EINVAL; | ||
3027 | } | ||
3028 | mddev->resync_max = max; | ||
3029 | } | ||
3030 | wake_up(&mddev->recovery_wait); | ||
3031 | return len; | ||
3032 | } | ||
3033 | |||
3034 | static struct md_sysfs_entry md_max_sync = | ||
3035 | __ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store); | ||
3036 | |||
3037 | static ssize_t | ||
2868 | suspend_lo_show(mddev_t *mddev, char *page) | 3038 | suspend_lo_show(mddev_t *mddev, char *page) |
2869 | { | 3039 | { |
2870 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); | 3040 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); |
@@ -2974,6 +3144,7 @@ static struct attribute *md_redundancy_attrs[] = { | |||
2974 | &md_sync_max.attr, | 3144 | &md_sync_max.attr, |
2975 | &md_sync_speed.attr, | 3145 | &md_sync_speed.attr, |
2976 | &md_sync_completed.attr, | 3146 | &md_sync_completed.attr, |
3147 | &md_max_sync.attr, | ||
2977 | &md_suspend_lo.attr, | 3148 | &md_suspend_lo.attr, |
2978 | &md_suspend_hi.attr, | 3149 | &md_suspend_hi.attr, |
2979 | &md_bitmap.attr, | 3150 | &md_bitmap.attr, |
@@ -3118,8 +3289,11 @@ static int do_md_run(mddev_t * mddev) | |||
3118 | /* | 3289 | /* |
3119 | * Analyze all RAID superblock(s) | 3290 | * Analyze all RAID superblock(s) |
3120 | */ | 3291 | */ |
3121 | if (!mddev->raid_disks) | 3292 | if (!mddev->raid_disks) { |
3293 | if (!mddev->persistent) | ||
3294 | return -EINVAL; | ||
3122 | analyze_sbs(mddev); | 3295 | analyze_sbs(mddev); |
3296 | } | ||
3123 | 3297 | ||
3124 | chunk_size = mddev->chunk_size; | 3298 | chunk_size = mddev->chunk_size; |
3125 | 3299 | ||
@@ -3143,7 +3317,7 @@ static int do_md_run(mddev_t * mddev) | |||
3143 | } | 3317 | } |
3144 | 3318 | ||
3145 | /* devices must have minimum size of one chunk */ | 3319 | /* devices must have minimum size of one chunk */ |
3146 | ITERATE_RDEV(mddev,rdev,tmp) { | 3320 | rdev_for_each(rdev, tmp, mddev) { |
3147 | if (test_bit(Faulty, &rdev->flags)) | 3321 | if (test_bit(Faulty, &rdev->flags)) |
3148 | continue; | 3322 | continue; |
3149 | if (rdev->size < chunk_size / 1024) { | 3323 | if (rdev->size < chunk_size / 1024) { |
@@ -3170,7 +3344,7 @@ static int do_md_run(mddev_t * mddev) | |||
3170 | * the only valid external interface is through the md | 3344 | * the only valid external interface is through the md |
3171 | * device. | 3345 | * device. |
3172 | */ | 3346 | */ |
3173 | ITERATE_RDEV(mddev,rdev,tmp) { | 3347 | rdev_for_each(rdev, tmp, mddev) { |
3174 | if (test_bit(Faulty, &rdev->flags)) | 3348 | if (test_bit(Faulty, &rdev->flags)) |
3175 | continue; | 3349 | continue; |
3176 | sync_blockdev(rdev->bdev); | 3350 | sync_blockdev(rdev->bdev); |
@@ -3236,8 +3410,8 @@ static int do_md_run(mddev_t * mddev) | |||
3236 | mdk_rdev_t *rdev2; | 3410 | mdk_rdev_t *rdev2; |
3237 | struct list_head *tmp2; | 3411 | struct list_head *tmp2; |
3238 | int warned = 0; | 3412 | int warned = 0; |
3239 | ITERATE_RDEV(mddev, rdev, tmp) { | 3413 | rdev_for_each(rdev, tmp, mddev) { |
3240 | ITERATE_RDEV(mddev, rdev2, tmp2) { | 3414 | rdev_for_each(rdev2, tmp2, mddev) { |
3241 | if (rdev < rdev2 && | 3415 | if (rdev < rdev2 && |
3242 | rdev->bdev->bd_contains == | 3416 | rdev->bdev->bd_contains == |
3243 | rdev2->bdev->bd_contains) { | 3417 | rdev2->bdev->bd_contains) { |
@@ -3297,7 +3471,7 @@ static int do_md_run(mddev_t * mddev) | |||
3297 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ | 3471 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
3298 | mddev->in_sync = 1; | 3472 | mddev->in_sync = 1; |
3299 | 3473 | ||
3300 | ITERATE_RDEV(mddev,rdev,tmp) | 3474 | rdev_for_each(rdev, tmp, mddev) |
3301 | if (rdev->raid_disk >= 0) { | 3475 | if (rdev->raid_disk >= 0) { |
3302 | char nm[20]; | 3476 | char nm[20]; |
3303 | sprintf(nm, "rd%d", rdev->raid_disk); | 3477 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -3330,7 +3504,7 @@ static int do_md_run(mddev_t * mddev) | |||
3330 | if (mddev->degraded && !mddev->sync_thread) { | 3504 | if (mddev->degraded && !mddev->sync_thread) { |
3331 | struct list_head *rtmp; | 3505 | struct list_head *rtmp; |
3332 | int spares = 0; | 3506 | int spares = 0; |
3333 | ITERATE_RDEV(mddev,rdev,rtmp) | 3507 | rdev_for_each(rdev, rtmp, mddev) |
3334 | if (rdev->raid_disk >= 0 && | 3508 | if (rdev->raid_disk >= 0 && |
3335 | !test_bit(In_sync, &rdev->flags) && | 3509 | !test_bit(In_sync, &rdev->flags) && |
3336 | !test_bit(Faulty, &rdev->flags)) | 3510 | !test_bit(Faulty, &rdev->flags)) |
@@ -3507,14 +3681,14 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3507 | } | 3681 | } |
3508 | mddev->bitmap_offset = 0; | 3682 | mddev->bitmap_offset = 0; |
3509 | 3683 | ||
3510 | ITERATE_RDEV(mddev,rdev,tmp) | 3684 | rdev_for_each(rdev, tmp, mddev) |
3511 | if (rdev->raid_disk >= 0) { | 3685 | if (rdev->raid_disk >= 0) { |
3512 | char nm[20]; | 3686 | char nm[20]; |
3513 | sprintf(nm, "rd%d", rdev->raid_disk); | 3687 | sprintf(nm, "rd%d", rdev->raid_disk); |
3514 | sysfs_remove_link(&mddev->kobj, nm); | 3688 | sysfs_remove_link(&mddev->kobj, nm); |
3515 | } | 3689 | } |
3516 | 3690 | ||
3517 | /* make sure all delayed_delete calls have finished */ | 3691 | /* make sure all md_delayed_delete calls have finished */ |
3518 | flush_scheduled_work(); | 3692 | flush_scheduled_work(); |
3519 | 3693 | ||
3520 | export_array(mddev); | 3694 | export_array(mddev); |
@@ -3523,7 +3697,10 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3523 | mddev->size = 0; | 3697 | mddev->size = 0; |
3524 | mddev->raid_disks = 0; | 3698 | mddev->raid_disks = 0; |
3525 | mddev->recovery_cp = 0; | 3699 | mddev->recovery_cp = 0; |
3700 | mddev->resync_max = MaxSector; | ||
3526 | mddev->reshape_position = MaxSector; | 3701 | mddev->reshape_position = MaxSector; |
3702 | mddev->external = 0; | ||
3703 | mddev->persistent = 0; | ||
3527 | 3704 | ||
3528 | } else if (mddev->pers) | 3705 | } else if (mddev->pers) |
3529 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3706 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -3546,7 +3723,7 @@ static void autorun_array(mddev_t *mddev) | |||
3546 | 3723 | ||
3547 | printk(KERN_INFO "md: running: "); | 3724 | printk(KERN_INFO "md: running: "); |
3548 | 3725 | ||
3549 | ITERATE_RDEV(mddev,rdev,tmp) { | 3726 | rdev_for_each(rdev, tmp, mddev) { |
3550 | char b[BDEVNAME_SIZE]; | 3727 | char b[BDEVNAME_SIZE]; |
3551 | printk("<%s>", bdevname(rdev->bdev,b)); | 3728 | printk("<%s>", bdevname(rdev->bdev,b)); |
3552 | } | 3729 | } |
@@ -3589,7 +3766,7 @@ static void autorun_devices(int part) | |||
3589 | printk(KERN_INFO "md: considering %s ...\n", | 3766 | printk(KERN_INFO "md: considering %s ...\n", |
3590 | bdevname(rdev0->bdev,b)); | 3767 | bdevname(rdev0->bdev,b)); |
3591 | INIT_LIST_HEAD(&candidates); | 3768 | INIT_LIST_HEAD(&candidates); |
3592 | ITERATE_RDEV_PENDING(rdev,tmp) | 3769 | rdev_for_each_list(rdev, tmp, pending_raid_disks) |
3593 | if (super_90_load(rdev, rdev0, 0) >= 0) { | 3770 | if (super_90_load(rdev, rdev0, 0) >= 0) { |
3594 | printk(KERN_INFO "md: adding %s ...\n", | 3771 | printk(KERN_INFO "md: adding %s ...\n", |
3595 | bdevname(rdev->bdev,b)); | 3772 | bdevname(rdev->bdev,b)); |
@@ -3632,7 +3809,8 @@ static void autorun_devices(int part) | |||
3632 | mddev_unlock(mddev); | 3809 | mddev_unlock(mddev); |
3633 | } else { | 3810 | } else { |
3634 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); | 3811 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); |
3635 | ITERATE_RDEV_GENERIC(candidates,rdev,tmp) { | 3812 | mddev->persistent = 1; |
3813 | rdev_for_each_list(rdev, tmp, candidates) { | ||
3636 | list_del_init(&rdev->same_set); | 3814 | list_del_init(&rdev->same_set); |
3637 | if (bind_rdev_to_array(rdev, mddev)) | 3815 | if (bind_rdev_to_array(rdev, mddev)) |
3638 | export_rdev(rdev); | 3816 | export_rdev(rdev); |
@@ -3643,7 +3821,7 @@ static void autorun_devices(int part) | |||
3643 | /* on success, candidates will be empty, on error | 3821 | /* on success, candidates will be empty, on error |
3644 | * it won't... | 3822 | * it won't... |
3645 | */ | 3823 | */ |
3646 | ITERATE_RDEV_GENERIC(candidates,rdev,tmp) | 3824 | rdev_for_each_list(rdev, tmp, candidates) |
3647 | export_rdev(rdev); | 3825 | export_rdev(rdev); |
3648 | mddev_put(mddev); | 3826 | mddev_put(mddev); |
3649 | } | 3827 | } |
@@ -3673,7 +3851,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
3673 | struct list_head *tmp; | 3851 | struct list_head *tmp; |
3674 | 3852 | ||
3675 | nr=working=active=failed=spare=0; | 3853 | nr=working=active=failed=spare=0; |
3676 | ITERATE_RDEV(mddev,rdev,tmp) { | 3854 | rdev_for_each(rdev, tmp, mddev) { |
3677 | nr++; | 3855 | nr++; |
3678 | if (test_bit(Faulty, &rdev->flags)) | 3856 | if (test_bit(Faulty, &rdev->flags)) |
3679 | failed++; | 3857 | failed++; |
@@ -3919,8 +4097,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
3919 | else | 4097 | else |
3920 | rdev->raid_disk = -1; | 4098 | rdev->raid_disk = -1; |
3921 | 4099 | ||
3922 | rdev->flags = 0; | ||
3923 | |||
3924 | if (rdev->raid_disk < mddev->raid_disks) | 4100 | if (rdev->raid_disk < mddev->raid_disks) |
3925 | if (info->state & (1<<MD_DISK_SYNC)) | 4101 | if (info->state & (1<<MD_DISK_SYNC)) |
3926 | set_bit(In_sync, &rdev->flags); | 4102 | set_bit(In_sync, &rdev->flags); |
@@ -4165,13 +4341,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4165 | else | 4341 | else |
4166 | mddev->recovery_cp = 0; | 4342 | mddev->recovery_cp = 0; |
4167 | mddev->persistent = ! info->not_persistent; | 4343 | mddev->persistent = ! info->not_persistent; |
4344 | mddev->external = 0; | ||
4168 | 4345 | ||
4169 | mddev->layout = info->layout; | 4346 | mddev->layout = info->layout; |
4170 | mddev->chunk_size = info->chunk_size; | 4347 | mddev->chunk_size = info->chunk_size; |
4171 | 4348 | ||
4172 | mddev->max_disks = MD_SB_DISKS; | 4349 | mddev->max_disks = MD_SB_DISKS; |
4173 | 4350 | ||
4174 | mddev->flags = 0; | 4351 | if (mddev->persistent) |
4352 | mddev->flags = 0; | ||
4175 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4353 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
4176 | 4354 | ||
4177 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 4355 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
@@ -4213,7 +4391,7 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
4213 | */ | 4391 | */ |
4214 | if (mddev->sync_thread) | 4392 | if (mddev->sync_thread) |
4215 | return -EBUSY; | 4393 | return -EBUSY; |
4216 | ITERATE_RDEV(mddev,rdev,tmp) { | 4394 | rdev_for_each(rdev, tmp, mddev) { |
4217 | sector_t avail; | 4395 | sector_t avail; |
4218 | avail = rdev->size * 2; | 4396 | avail = rdev->size * 2; |
4219 | 4397 | ||
@@ -4471,9 +4649,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4471 | */ | 4649 | */ |
4472 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, | 4650 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, |
4473 | * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */ | 4651 | * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */ |
4474 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY | 4652 | if ((!mddev->raid_disks && !mddev->external) |
4475 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE | 4653 | && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY |
4476 | && cmd != GET_BITMAP_FILE) { | 4654 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE |
4655 | && cmd != GET_BITMAP_FILE) { | ||
4477 | err = -ENODEV; | 4656 | err = -ENODEV; |
4478 | goto abort_unlock; | 4657 | goto abort_unlock; |
4479 | } | 4658 | } |
@@ -4757,7 +4936,7 @@ static void status_unused(struct seq_file *seq) | |||
4757 | 4936 | ||
4758 | seq_printf(seq, "unused devices: "); | 4937 | seq_printf(seq, "unused devices: "); |
4759 | 4938 | ||
4760 | ITERATE_RDEV_PENDING(rdev,tmp) { | 4939 | rdev_for_each_list(rdev, tmp, pending_raid_disks) { |
4761 | char b[BDEVNAME_SIZE]; | 4940 | char b[BDEVNAME_SIZE]; |
4762 | i++; | 4941 | i++; |
4763 | seq_printf(seq, "%s ", | 4942 | seq_printf(seq, "%s ", |
@@ -4953,7 +5132,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
4953 | } | 5132 | } |
4954 | 5133 | ||
4955 | size = 0; | 5134 | size = 0; |
4956 | ITERATE_RDEV(mddev,rdev,tmp2) { | 5135 | rdev_for_each(rdev, tmp2, mddev) { |
4957 | char b[BDEVNAME_SIZE]; | 5136 | char b[BDEVNAME_SIZE]; |
4958 | seq_printf(seq, " %s[%d]", | 5137 | seq_printf(seq, " %s[%d]", |
4959 | bdevname(rdev->bdev,b), rdev->desc_nr); | 5138 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -4982,7 +5161,10 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
4982 | mddev->major_version, | 5161 | mddev->major_version, |
4983 | mddev->minor_version); | 5162 | mddev->minor_version); |
4984 | } | 5163 | } |
4985 | } else | 5164 | } else if (mddev->external) |
5165 | seq_printf(seq, " super external:%s", | ||
5166 | mddev->metadata_type); | ||
5167 | else | ||
4986 | seq_printf(seq, " super non-persistent"); | 5168 | seq_printf(seq, " super non-persistent"); |
4987 | 5169 | ||
4988 | if (mddev->pers) { | 5170 | if (mddev->pers) { |
@@ -5106,7 +5288,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5106 | long curr_events; | 5288 | long curr_events; |
5107 | 5289 | ||
5108 | idle = 1; | 5290 | idle = 1; |
5109 | ITERATE_RDEV(mddev,rdev,tmp) { | 5291 | rdev_for_each(rdev, tmp, mddev) { |
5110 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 5292 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5111 | curr_events = disk_stat_read(disk, sectors[0]) + | 5293 | curr_events = disk_stat_read(disk, sectors[0]) + |
5112 | disk_stat_read(disk, sectors[1]) - | 5294 | disk_stat_read(disk, sectors[1]) - |
@@ -5283,7 +5465,7 @@ void md_do_sync(mddev_t *mddev) | |||
5283 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5465 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5284 | goto skip; | 5466 | goto skip; |
5285 | } | 5467 | } |
5286 | ITERATE_MDDEV(mddev2,tmp) { | 5468 | for_each_mddev(mddev2, tmp) { |
5287 | if (mddev2 == mddev) | 5469 | if (mddev2 == mddev) |
5288 | continue; | 5470 | continue; |
5289 | if (mddev2->curr_resync && | 5471 | if (mddev2->curr_resync && |
@@ -5333,7 +5515,7 @@ void md_do_sync(mddev_t *mddev) | |||
5333 | /* recovery follows the physical size of devices */ | 5515 | /* recovery follows the physical size of devices */ |
5334 | max_sectors = mddev->size << 1; | 5516 | max_sectors = mddev->size << 1; |
5335 | j = MaxSector; | 5517 | j = MaxSector; |
5336 | ITERATE_RDEV(mddev,rdev,rtmp) | 5518 | rdev_for_each(rdev, rtmp, mddev) |
5337 | if (rdev->raid_disk >= 0 && | 5519 | if (rdev->raid_disk >= 0 && |
5338 | !test_bit(Faulty, &rdev->flags) && | 5520 | !test_bit(Faulty, &rdev->flags) && |
5339 | !test_bit(In_sync, &rdev->flags) && | 5521 | !test_bit(In_sync, &rdev->flags) && |
@@ -5381,8 +5563,16 @@ void md_do_sync(mddev_t *mddev) | |||
5381 | sector_t sectors; | 5563 | sector_t sectors; |
5382 | 5564 | ||
5383 | skipped = 0; | 5565 | skipped = 0; |
5566 | if (j >= mddev->resync_max) { | ||
5567 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5568 | wait_event(mddev->recovery_wait, | ||
5569 | mddev->resync_max > j | ||
5570 | || kthread_should_stop()); | ||
5571 | } | ||
5572 | if (kthread_should_stop()) | ||
5573 | goto interrupted; | ||
5384 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 5574 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
5385 | currspeed < speed_min(mddev)); | 5575 | currspeed < speed_min(mddev)); |
5386 | if (sectors == 0) { | 5576 | if (sectors == 0) { |
5387 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 5577 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
5388 | goto out; | 5578 | goto out; |
@@ -5424,15 +5614,9 @@ void md_do_sync(mddev_t *mddev) | |||
5424 | } | 5614 | } |
5425 | 5615 | ||
5426 | 5616 | ||
5427 | if (kthread_should_stop()) { | 5617 | if (kthread_should_stop()) |
5428 | /* | 5618 | goto interrupted; |
5429 | * got a signal, exit. | 5619 | |
5430 | */ | ||
5431 | printk(KERN_INFO | ||
5432 | "md: md_do_sync() got signal ... exiting\n"); | ||
5433 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5434 | goto out; | ||
5435 | } | ||
5436 | 5620 | ||
5437 | /* | 5621 | /* |
5438 | * this loop exits only if either when we are slower than | 5622 | * this loop exits only if either when we are slower than |
@@ -5484,7 +5668,7 @@ void md_do_sync(mddev_t *mddev) | |||
5484 | } else { | 5668 | } else { |
5485 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 5669 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
5486 | mddev->curr_resync = MaxSector; | 5670 | mddev->curr_resync = MaxSector; |
5487 | ITERATE_RDEV(mddev,rdev,rtmp) | 5671 | rdev_for_each(rdev, rtmp, mddev) |
5488 | if (rdev->raid_disk >= 0 && | 5672 | if (rdev->raid_disk >= 0 && |
5489 | !test_bit(Faulty, &rdev->flags) && | 5673 | !test_bit(Faulty, &rdev->flags) && |
5490 | !test_bit(In_sync, &rdev->flags) && | 5674 | !test_bit(In_sync, &rdev->flags) && |
@@ -5496,9 +5680,22 @@ void md_do_sync(mddev_t *mddev) | |||
5496 | 5680 | ||
5497 | skip: | 5681 | skip: |
5498 | mddev->curr_resync = 0; | 5682 | mddev->curr_resync = 0; |
5683 | mddev->resync_max = MaxSector; | ||
5684 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5499 | wake_up(&resync_wait); | 5685 | wake_up(&resync_wait); |
5500 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5686 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5501 | md_wakeup_thread(mddev->thread); | 5687 | md_wakeup_thread(mddev->thread); |
5688 | return; | ||
5689 | |||
5690 | interrupted: | ||
5691 | /* | ||
5692 | * got a signal, exit. | ||
5693 | */ | ||
5694 | printk(KERN_INFO | ||
5695 | "md: md_do_sync() got signal ... exiting\n"); | ||
5696 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5697 | goto out; | ||
5698 | |||
5502 | } | 5699 | } |
5503 | EXPORT_SYMBOL_GPL(md_do_sync); | 5700 | EXPORT_SYMBOL_GPL(md_do_sync); |
5504 | 5701 | ||
@@ -5509,8 +5706,9 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5509 | struct list_head *rtmp; | 5706 | struct list_head *rtmp; |
5510 | int spares = 0; | 5707 | int spares = 0; |
5511 | 5708 | ||
5512 | ITERATE_RDEV(mddev,rdev,rtmp) | 5709 | rdev_for_each(rdev, rtmp, mddev) |
5513 | if (rdev->raid_disk >= 0 && | 5710 | if (rdev->raid_disk >= 0 && |
5711 | !mddev->external && | ||
5514 | (test_bit(Faulty, &rdev->flags) || | 5712 | (test_bit(Faulty, &rdev->flags) || |
5515 | ! test_bit(In_sync, &rdev->flags)) && | 5713 | ! test_bit(In_sync, &rdev->flags)) && |
5516 | atomic_read(&rdev->nr_pending)==0) { | 5714 | atomic_read(&rdev->nr_pending)==0) { |
@@ -5524,7 +5722,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5524 | } | 5722 | } |
5525 | 5723 | ||
5526 | if (mddev->degraded) { | 5724 | if (mddev->degraded) { |
5527 | ITERATE_RDEV(mddev,rdev,rtmp) | 5725 | rdev_for_each(rdev, rtmp, mddev) |
5528 | if (rdev->raid_disk < 0 | 5726 | if (rdev->raid_disk < 0 |
5529 | && !test_bit(Faulty, &rdev->flags)) { | 5727 | && !test_bit(Faulty, &rdev->flags)) { |
5530 | rdev->recovery_offset = 0; | 5728 | rdev->recovery_offset = 0; |
@@ -5589,7 +5787,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5589 | } | 5787 | } |
5590 | 5788 | ||
5591 | if ( ! ( | 5789 | if ( ! ( |
5592 | mddev->flags || | 5790 | (mddev->flags && !mddev->external) || |
5593 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 5791 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
5594 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || | 5792 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
5595 | (mddev->safemode == 1) || | 5793 | (mddev->safemode == 1) || |
@@ -5605,7 +5803,8 @@ void md_check_recovery(mddev_t *mddev) | |||
5605 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5803 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
5606 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5804 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
5607 | mddev->in_sync = 1; | 5805 | mddev->in_sync = 1; |
5608 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 5806 | if (mddev->persistent) |
5807 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
5609 | } | 5808 | } |
5610 | if (mddev->safemode == 1) | 5809 | if (mddev->safemode == 1) |
5611 | mddev->safemode = 0; | 5810 | mddev->safemode = 0; |
@@ -5637,7 +5836,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5637 | * information must be scrapped | 5836 | * information must be scrapped |
5638 | */ | 5837 | */ |
5639 | if (!mddev->degraded) | 5838 | if (!mddev->degraded) |
5640 | ITERATE_RDEV(mddev,rdev,rtmp) | 5839 | rdev_for_each(rdev, rtmp, mddev) |
5641 | rdev->saved_raid_disk = -1; | 5840 | rdev->saved_raid_disk = -1; |
5642 | 5841 | ||
5643 | mddev->recovery = 0; | 5842 | mddev->recovery = 0; |
@@ -5714,7 +5913,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
5714 | 5913 | ||
5715 | printk(KERN_INFO "md: stopping all md devices.\n"); | 5914 | printk(KERN_INFO "md: stopping all md devices.\n"); |
5716 | 5915 | ||
5717 | ITERATE_MDDEV(mddev,tmp) | 5916 | for_each_mddev(mddev, tmp) |
5718 | if (mddev_trylock(mddev)) { | 5917 | if (mddev_trylock(mddev)) { |
5719 | do_md_stop (mddev, 1); | 5918 | do_md_stop (mddev, 1); |
5720 | mddev_unlock(mddev); | 5919 | mddev_unlock(mddev); |
@@ -5848,7 +6047,7 @@ static __exit void md_exit(void) | |||
5848 | unregister_reboot_notifier(&md_notifier); | 6047 | unregister_reboot_notifier(&md_notifier); |
5849 | unregister_sysctl_table(raid_table_header); | 6048 | unregister_sysctl_table(raid_table_header); |
5850 | remove_proc_entry("mdstat", NULL); | 6049 | remove_proc_entry("mdstat", NULL); |
5851 | ITERATE_MDDEV(mddev,tmp) { | 6050 | for_each_mddev(mddev, tmp) { |
5852 | struct gendisk *disk = mddev->gendisk; | 6051 | struct gendisk *disk = mddev->gendisk; |
5853 | if (!disk) | 6052 | if (!disk) |
5854 | continue; | 6053 | continue; |