aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c395
1 files changed, 297 insertions, 98 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c28a120b4161..5fc326d3970e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -195,7 +195,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
195 * Any code which breaks out of this loop while own 195 * Any code which breaks out of this loop while own
196 * a reference to the current mddev and must mddev_put it. 196 * a reference to the current mddev and must mddev_put it.
197 */ 197 */
198#define ITERATE_MDDEV(mddev,tmp) \ 198#define for_each_mddev(mddev,tmp) \
199 \ 199 \
200 for (({ spin_lock(&all_mddevs_lock); \ 200 for (({ spin_lock(&all_mddevs_lock); \
201 tmp = all_mddevs.next; \ 201 tmp = all_mddevs.next; \
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
275 spin_lock_init(&new->write_lock); 275 spin_lock_init(&new->write_lock);
276 init_waitqueue_head(&new->sb_wait); 276 init_waitqueue_head(&new->sb_wait);
277 new->reshape_position = MaxSector; 277 new->reshape_position = MaxSector;
278 new->resync_max = MaxSector;
278 279
279 new->queue = blk_alloc_queue(GFP_KERNEL); 280 new->queue = blk_alloc_queue(GFP_KERNEL);
280 if (!new->queue) { 281 if (!new->queue) {
@@ -310,7 +311,7 @@ static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
310 mdk_rdev_t * rdev; 311 mdk_rdev_t * rdev;
311 struct list_head *tmp; 312 struct list_head *tmp;
312 313
313 ITERATE_RDEV(mddev,rdev,tmp) { 314 rdev_for_each(rdev, tmp, mddev) {
314 if (rdev->desc_nr == nr) 315 if (rdev->desc_nr == nr)
315 return rdev; 316 return rdev;
316 } 317 }
@@ -322,7 +323,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
322 struct list_head *tmp; 323 struct list_head *tmp;
323 mdk_rdev_t *rdev; 324 mdk_rdev_t *rdev;
324 325
325 ITERATE_RDEV(mddev,rdev,tmp) { 326 rdev_for_each(rdev, tmp, mddev) {
326 if (rdev->bdev->bd_dev == dev) 327 if (rdev->bdev->bd_dev == dev)
327 return rdev; 328 return rdev;
328 } 329 }
@@ -773,12 +774,16 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
773 __u64 ev1 = md_event(sb); 774 __u64 ev1 = md_event(sb);
774 775
775 rdev->raid_disk = -1; 776 rdev->raid_disk = -1;
776 rdev->flags = 0; 777 clear_bit(Faulty, &rdev->flags);
778 clear_bit(In_sync, &rdev->flags);
779 clear_bit(WriteMostly, &rdev->flags);
780 clear_bit(BarriersNotsupp, &rdev->flags);
781
777 if (mddev->raid_disks == 0) { 782 if (mddev->raid_disks == 0) {
778 mddev->major_version = 0; 783 mddev->major_version = 0;
779 mddev->minor_version = sb->minor_version; 784 mddev->minor_version = sb->minor_version;
780 mddev->patch_version = sb->patch_version; 785 mddev->patch_version = sb->patch_version;
781 mddev->persistent = ! sb->not_persistent; 786 mddev->external = 0;
782 mddev->chunk_size = sb->chunk_size; 787 mddev->chunk_size = sb->chunk_size;
783 mddev->ctime = sb->ctime; 788 mddev->ctime = sb->ctime;
784 mddev->utime = sb->utime; 789 mddev->utime = sb->utime;
@@ -904,7 +909,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
904 sb->size = mddev->size; 909 sb->size = mddev->size;
905 sb->raid_disks = mddev->raid_disks; 910 sb->raid_disks = mddev->raid_disks;
906 sb->md_minor = mddev->md_minor; 911 sb->md_minor = mddev->md_minor;
907 sb->not_persistent = !mddev->persistent; 912 sb->not_persistent = 0;
908 sb->utime = mddev->utime; 913 sb->utime = mddev->utime;
909 sb->state = 0; 914 sb->state = 0;
910 sb->events_hi = (mddev->events>>32); 915 sb->events_hi = (mddev->events>>32);
@@ -938,7 +943,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
938 sb->state |= (1<<MD_SB_BITMAP_PRESENT); 943 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
939 944
940 sb->disks[0].state = (1<<MD_DISK_REMOVED); 945 sb->disks[0].state = (1<<MD_DISK_REMOVED);
941 ITERATE_RDEV(mddev,rdev2,tmp) { 946 rdev_for_each(rdev2, tmp, mddev) {
942 mdp_disk_t *d; 947 mdp_disk_t *d;
943 int desc_nr; 948 int desc_nr;
944 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) 949 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1153,11 +1158,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1153 __u64 ev1 = le64_to_cpu(sb->events); 1158 __u64 ev1 = le64_to_cpu(sb->events);
1154 1159
1155 rdev->raid_disk = -1; 1160 rdev->raid_disk = -1;
1156 rdev->flags = 0; 1161 clear_bit(Faulty, &rdev->flags);
1162 clear_bit(In_sync, &rdev->flags);
1163 clear_bit(WriteMostly, &rdev->flags);
1164 clear_bit(BarriersNotsupp, &rdev->flags);
1165
1157 if (mddev->raid_disks == 0) { 1166 if (mddev->raid_disks == 0) {
1158 mddev->major_version = 1; 1167 mddev->major_version = 1;
1159 mddev->patch_version = 0; 1168 mddev->patch_version = 0;
1160 mddev->persistent = 1; 1169 mddev->external = 0;
1161 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; 1170 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
1162 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); 1171 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1163 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); 1172 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
@@ -1286,7 +1295,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1286 } 1295 }
1287 1296
1288 max_dev = 0; 1297 max_dev = 0;
1289 ITERATE_RDEV(mddev,rdev2,tmp) 1298 rdev_for_each(rdev2, tmp, mddev)
1290 if (rdev2->desc_nr+1 > max_dev) 1299 if (rdev2->desc_nr+1 > max_dev)
1291 max_dev = rdev2->desc_nr+1; 1300 max_dev = rdev2->desc_nr+1;
1292 1301
@@ -1295,7 +1304,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1295 for (i=0; i<max_dev;i++) 1304 for (i=0; i<max_dev;i++)
1296 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1305 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1297 1306
1298 ITERATE_RDEV(mddev,rdev2,tmp) { 1307 rdev_for_each(rdev2, tmp, mddev) {
1299 i = rdev2->desc_nr; 1308 i = rdev2->desc_nr;
1300 if (test_bit(Faulty, &rdev2->flags)) 1309 if (test_bit(Faulty, &rdev2->flags))
1301 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1310 sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1333,8 +1342,8 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1333 struct list_head *tmp, *tmp2; 1342 struct list_head *tmp, *tmp2;
1334 mdk_rdev_t *rdev, *rdev2; 1343 mdk_rdev_t *rdev, *rdev2;
1335 1344
1336 ITERATE_RDEV(mddev1,rdev,tmp) 1345 rdev_for_each(rdev, tmp, mddev1)
1337 ITERATE_RDEV(mddev2, rdev2, tmp2) 1346 rdev_for_each(rdev2, tmp2, mddev2)
1338 if (rdev->bdev->bd_contains == 1347 if (rdev->bdev->bd_contains ==
1339 rdev2->bdev->bd_contains) 1348 rdev2->bdev->bd_contains)
1340 return 1; 1349 return 1;
@@ -1401,7 +1410,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1401 goto fail; 1410 goto fail;
1402 } 1411 }
1403 list_add(&rdev->same_set, &mddev->disks); 1412 list_add(&rdev->same_set, &mddev->disks);
1404 bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); 1413 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1405 return 0; 1414 return 0;
1406 1415
1407 fail: 1416 fail:
@@ -1410,10 +1419,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1410 return err; 1419 return err;
1411} 1420}
1412 1421
1413static void delayed_delete(struct work_struct *ws) 1422static void md_delayed_delete(struct work_struct *ws)
1414{ 1423{
1415 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); 1424 mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
1416 kobject_del(&rdev->kobj); 1425 kobject_del(&rdev->kobj);
1426 kobject_put(&rdev->kobj);
1417} 1427}
1418 1428
1419static void unbind_rdev_from_array(mdk_rdev_t * rdev) 1429static void unbind_rdev_from_array(mdk_rdev_t * rdev)
@@ -1432,7 +1442,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1432 /* We need to delay this, otherwise we can deadlock when 1442 /* We need to delay this, otherwise we can deadlock when
1433 * writing to 'remove' to "dev/state" 1443 * writing to 'remove' to "dev/state"
1434 */ 1444 */
1435 INIT_WORK(&rdev->del_work, delayed_delete); 1445 INIT_WORK(&rdev->del_work, md_delayed_delete);
1446 kobject_get(&rdev->kobj);
1436 schedule_work(&rdev->del_work); 1447 schedule_work(&rdev->del_work);
1437} 1448}
1438 1449
@@ -1441,7 +1452,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1441 * otherwise reused by a RAID array (or any other kernel 1452 * otherwise reused by a RAID array (or any other kernel
1442 * subsystem), by bd_claiming the device. 1453 * subsystem), by bd_claiming the device.
1443 */ 1454 */
1444static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) 1455static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1445{ 1456{
1446 int err = 0; 1457 int err = 0;
1447 struct block_device *bdev; 1458 struct block_device *bdev;
@@ -1453,13 +1464,15 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1453 __bdevname(dev, b)); 1464 __bdevname(dev, b));
1454 return PTR_ERR(bdev); 1465 return PTR_ERR(bdev);
1455 } 1466 }
1456 err = bd_claim(bdev, rdev); 1467 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1457 if (err) { 1468 if (err) {
1458 printk(KERN_ERR "md: could not bd_claim %s.\n", 1469 printk(KERN_ERR "md: could not bd_claim %s.\n",
1459 bdevname(bdev, b)); 1470 bdevname(bdev, b));
1460 blkdev_put(bdev); 1471 blkdev_put(bdev);
1461 return err; 1472 return err;
1462 } 1473 }
1474 if (!shared)
1475 set_bit(AllReserved, &rdev->flags);
1463 rdev->bdev = bdev; 1476 rdev->bdev = bdev;
1464 return err; 1477 return err;
1465} 1478}
@@ -1503,7 +1516,7 @@ static void export_array(mddev_t *mddev)
1503 struct list_head *tmp; 1516 struct list_head *tmp;
1504 mdk_rdev_t *rdev; 1517 mdk_rdev_t *rdev;
1505 1518
1506 ITERATE_RDEV(mddev,rdev,tmp) { 1519 rdev_for_each(rdev, tmp, mddev) {
1507 if (!rdev->mddev) { 1520 if (!rdev->mddev) {
1508 MD_BUG(); 1521 MD_BUG();
1509 continue; 1522 continue;
@@ -1581,17 +1594,17 @@ static void md_print_devices(void)
1581 printk("md: **********************************\n"); 1594 printk("md: **********************************\n");
1582 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); 1595 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1583 printk("md: **********************************\n"); 1596 printk("md: **********************************\n");
1584 ITERATE_MDDEV(mddev,tmp) { 1597 for_each_mddev(mddev, tmp) {
1585 1598
1586 if (mddev->bitmap) 1599 if (mddev->bitmap)
1587 bitmap_print_sb(mddev->bitmap); 1600 bitmap_print_sb(mddev->bitmap);
1588 else 1601 else
1589 printk("%s: ", mdname(mddev)); 1602 printk("%s: ", mdname(mddev));
1590 ITERATE_RDEV(mddev,rdev,tmp2) 1603 rdev_for_each(rdev, tmp2, mddev)
1591 printk("<%s>", bdevname(rdev->bdev,b)); 1604 printk("<%s>", bdevname(rdev->bdev,b));
1592 printk("\n"); 1605 printk("\n");
1593 1606
1594 ITERATE_RDEV(mddev,rdev,tmp2) 1607 rdev_for_each(rdev, tmp2, mddev)
1595 print_rdev(rdev); 1608 print_rdev(rdev);
1596 } 1609 }
1597 printk("md: **********************************\n"); 1610 printk("md: **********************************\n");
@@ -1610,7 +1623,7 @@ static void sync_sbs(mddev_t * mddev, int nospares)
1610 mdk_rdev_t *rdev; 1623 mdk_rdev_t *rdev;
1611 struct list_head *tmp; 1624 struct list_head *tmp;
1612 1625
1613 ITERATE_RDEV(mddev,rdev,tmp) { 1626 rdev_for_each(rdev, tmp, mddev) {
1614 if (rdev->sb_events == mddev->events || 1627 if (rdev->sb_events == mddev->events ||
1615 (nospares && 1628 (nospares &&
1616 rdev->raid_disk < 0 && 1629 rdev->raid_disk < 0 &&
@@ -1696,18 +1709,20 @@ repeat:
1696 MD_BUG(); 1709 MD_BUG();
1697 mddev->events --; 1710 mddev->events --;
1698 } 1711 }
1699 sync_sbs(mddev, nospares);
1700 1712
1701 /* 1713 /*
1702 * do not write anything to disk if using 1714 * do not write anything to disk if using
1703 * nonpersistent superblocks 1715 * nonpersistent superblocks
1704 */ 1716 */
1705 if (!mddev->persistent) { 1717 if (!mddev->persistent) {
1706 clear_bit(MD_CHANGE_PENDING, &mddev->flags); 1718 if (!mddev->external)
1719 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1720
1707 spin_unlock_irq(&mddev->write_lock); 1721 spin_unlock_irq(&mddev->write_lock);
1708 wake_up(&mddev->sb_wait); 1722 wake_up(&mddev->sb_wait);
1709 return; 1723 return;
1710 } 1724 }
1725 sync_sbs(mddev, nospares);
1711 spin_unlock_irq(&mddev->write_lock); 1726 spin_unlock_irq(&mddev->write_lock);
1712 1727
1713 dprintk(KERN_INFO 1728 dprintk(KERN_INFO
@@ -1715,7 +1730,7 @@ repeat:
1715 mdname(mddev),mddev->in_sync); 1730 mdname(mddev),mddev->in_sync);
1716 1731
1717 bitmap_update_sb(mddev->bitmap); 1732 bitmap_update_sb(mddev->bitmap);
1718 ITERATE_RDEV(mddev,rdev,tmp) { 1733 rdev_for_each(rdev, tmp, mddev) {
1719 char b[BDEVNAME_SIZE]; 1734 char b[BDEVNAME_SIZE];
1720 dprintk(KERN_INFO "md: "); 1735 dprintk(KERN_INFO "md: ");
1721 if (rdev->sb_loaded != 1) 1736 if (rdev->sb_loaded != 1)
@@ -1785,7 +1800,7 @@ static ssize_t
1785state_show(mdk_rdev_t *rdev, char *page) 1800state_show(mdk_rdev_t *rdev, char *page)
1786{ 1801{
1787 char *sep = ""; 1802 char *sep = "";
1788 int len=0; 1803 size_t len = 0;
1789 1804
1790 if (test_bit(Faulty, &rdev->flags)) { 1805 if (test_bit(Faulty, &rdev->flags)) {
1791 len+= sprintf(page+len, "%sfaulty",sep); 1806 len+= sprintf(page+len, "%sfaulty",sep);
@@ -1887,20 +1902,45 @@ static ssize_t
1887slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) 1902slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1888{ 1903{
1889 char *e; 1904 char *e;
1905 int err;
1906 char nm[20];
1890 int slot = simple_strtoul(buf, &e, 10); 1907 int slot = simple_strtoul(buf, &e, 10);
1891 if (strncmp(buf, "none", 4)==0) 1908 if (strncmp(buf, "none", 4)==0)
1892 slot = -1; 1909 slot = -1;
1893 else if (e==buf || (*e && *e!= '\n')) 1910 else if (e==buf || (*e && *e!= '\n'))
1894 return -EINVAL; 1911 return -EINVAL;
1895 if (rdev->mddev->pers) 1912 if (rdev->mddev->pers) {
1896 /* Cannot set slot in active array (yet) */ 1913 /* Setting 'slot' on an active array requires also
1897 return -EBUSY; 1914 * updating the 'rd%d' link, and communicating
1898 if (slot >= rdev->mddev->raid_disks) 1915 * with the personality with ->hot_*_disk.
1899 return -ENOSPC; 1916 * For now we only support removing
1900 rdev->raid_disk = slot; 1917 * failed/spare devices. This normally happens automatically,
1901 /* assume it is working */ 1918 * but not when the metadata is externally managed.
1902 rdev->flags = 0; 1919 */
1903 set_bit(In_sync, &rdev->flags); 1920 if (slot != -1)
1921 return -EBUSY;
1922 if (rdev->raid_disk == -1)
1923 return -EEXIST;
1924 /* personality does all needed checks */
1925 if (rdev->mddev->pers->hot_add_disk == NULL)
1926 return -EINVAL;
1927 err = rdev->mddev->pers->
1928 hot_remove_disk(rdev->mddev, rdev->raid_disk);
1929 if (err)
1930 return err;
1931 sprintf(nm, "rd%d", rdev->raid_disk);
1932 sysfs_remove_link(&rdev->mddev->kobj, nm);
1933 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
1934 md_wakeup_thread(rdev->mddev->thread);
1935 } else {
1936 if (slot >= rdev->mddev->raid_disks)
1937 return -ENOSPC;
1938 rdev->raid_disk = slot;
1939 /* assume it is working */
1940 clear_bit(Faulty, &rdev->flags);
1941 clear_bit(WriteMostly, &rdev->flags);
1942 set_bit(In_sync, &rdev->flags);
1943 }
1904 return len; 1944 return len;
1905} 1945}
1906 1946
@@ -1923,6 +1963,10 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1923 return -EINVAL; 1963 return -EINVAL;
1924 if (rdev->mddev->pers) 1964 if (rdev->mddev->pers)
1925 return -EBUSY; 1965 return -EBUSY;
1966 if (rdev->size && rdev->mddev->external)
1967 /* Must set offset before size, so overlap checks
1968 * can be sane */
1969 return -EBUSY;
1926 rdev->data_offset = offset; 1970 rdev->data_offset = offset;
1927 return len; 1971 return len;
1928} 1972}
@@ -1936,16 +1980,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *page)
1936 return sprintf(page, "%llu\n", (unsigned long long)rdev->size); 1980 return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
1937} 1981}
1938 1982
1983static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
1984{
1985 /* check if two start/length pairs overlap */
1986 if (s1+l1 <= s2)
1987 return 0;
1988 if (s2+l2 <= s1)
1989 return 0;
1990 return 1;
1991}
1992
1939static ssize_t 1993static ssize_t
1940rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) 1994rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1941{ 1995{
1942 char *e; 1996 char *e;
1943 unsigned long long size = simple_strtoull(buf, &e, 10); 1997 unsigned long long size = simple_strtoull(buf, &e, 10);
1998 unsigned long long oldsize = rdev->size;
1944 if (e==buf || (*e && *e != '\n')) 1999 if (e==buf || (*e && *e != '\n'))
1945 return -EINVAL; 2000 return -EINVAL;
1946 if (rdev->mddev->pers) 2001 if (rdev->mddev->pers)
1947 return -EBUSY; 2002 return -EBUSY;
1948 rdev->size = size; 2003 rdev->size = size;
2004 if (size > oldsize && rdev->mddev->external) {
2005 /* need to check that all other rdevs with the same ->bdev
2006 * do not overlap. We need to unlock the mddev to avoid
2007 * a deadlock. We have already changed rdev->size, and if
2008 * we have to change it back, we will have the lock again.
2009 */
2010 mddev_t *mddev;
2011 int overlap = 0;
2012 struct list_head *tmp, *tmp2;
2013
2014 mddev_unlock(rdev->mddev);
2015 for_each_mddev(mddev, tmp) {
2016 mdk_rdev_t *rdev2;
2017
2018 mddev_lock(mddev);
2019 rdev_for_each(rdev2, tmp2, mddev)
2020 if (test_bit(AllReserved, &rdev2->flags) ||
2021 (rdev->bdev == rdev2->bdev &&
2022 rdev != rdev2 &&
2023 overlaps(rdev->data_offset, rdev->size,
2024 rdev2->data_offset, rdev2->size))) {
2025 overlap = 1;
2026 break;
2027 }
2028 mddev_unlock(mddev);
2029 if (overlap) {
2030 mddev_put(mddev);
2031 break;
2032 }
2033 }
2034 mddev_lock(rdev->mddev);
2035 if (overlap) {
2036 /* Someone else could have slipped in a size
2037 * change here, but doing so is just silly.
2038 * We put oldsize back because we *know* it is
2039 * safe, and trust userspace not to race with
2040 * itself
2041 */
2042 rdev->size = oldsize;
2043 return -EBUSY;
2044 }
2045 }
1949 if (size < rdev->mddev->size || rdev->mddev->size == 0) 2046 if (size < rdev->mddev->size || rdev->mddev->size == 0)
1950 rdev->mddev->size = size; 2047 rdev->mddev->size = size;
1951 return len; 2048 return len;
@@ -1980,12 +2077,18 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
1980{ 2077{
1981 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); 2078 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
1982 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); 2079 mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
2080 int rv;
1983 2081
1984 if (!entry->store) 2082 if (!entry->store)
1985 return -EIO; 2083 return -EIO;
1986 if (!capable(CAP_SYS_ADMIN)) 2084 if (!capable(CAP_SYS_ADMIN))
1987 return -EACCES; 2085 return -EACCES;
1988 return entry->store(rdev, page, length); 2086 rv = mddev_lock(rdev->mddev);
2087 if (!rv) {
2088 rv = entry->store(rdev, page, length);
2089 mddev_unlock(rdev->mddev);
2090 }
2091 return rv;
1989} 2092}
1990 2093
1991static void rdev_free(struct kobject *ko) 2094static void rdev_free(struct kobject *ko)
@@ -2029,7 +2132,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2029 if ((err = alloc_disk_sb(rdev))) 2132 if ((err = alloc_disk_sb(rdev)))
2030 goto abort_free; 2133 goto abort_free;
2031 2134
2032 err = lock_rdev(rdev, newdev); 2135 err = lock_rdev(rdev, newdev, super_format == -2);
2033 if (err) 2136 if (err)
2034 goto abort_free; 2137 goto abort_free;
2035 2138
@@ -2099,7 +2202,7 @@ static void analyze_sbs(mddev_t * mddev)
2099 char b[BDEVNAME_SIZE]; 2202 char b[BDEVNAME_SIZE];
2100 2203
2101 freshest = NULL; 2204 freshest = NULL;
2102 ITERATE_RDEV(mddev,rdev,tmp) 2205 rdev_for_each(rdev, tmp, mddev)
2103 switch (super_types[mddev->major_version]. 2206 switch (super_types[mddev->major_version].
2104 load_super(rdev, freshest, mddev->minor_version)) { 2207 load_super(rdev, freshest, mddev->minor_version)) {
2105 case 1: 2208 case 1:
@@ -2120,7 +2223,7 @@ static void analyze_sbs(mddev_t * mddev)
2120 validate_super(mddev, freshest); 2223 validate_super(mddev, freshest);
2121 2224
2122 i = 0; 2225 i = 0;
2123 ITERATE_RDEV(mddev,rdev,tmp) { 2226 rdev_for_each(rdev, tmp, mddev) {
2124 if (rdev != freshest) 2227 if (rdev != freshest)
2125 if (super_types[mddev->major_version]. 2228 if (super_types[mddev->major_version].
2126 validate_super(mddev, rdev)) { 2229 validate_super(mddev, rdev)) {
@@ -2215,7 +2318,7 @@ level_show(mddev_t *mddev, char *page)
2215static ssize_t 2318static ssize_t
2216level_store(mddev_t *mddev, const char *buf, size_t len) 2319level_store(mddev_t *mddev, const char *buf, size_t len)
2217{ 2320{
2218 int rv = len; 2321 ssize_t rv = len;
2219 if (mddev->pers) 2322 if (mddev->pers)
2220 return -EBUSY; 2323 return -EBUSY;
2221 if (len == 0) 2324 if (len == 0)
@@ -2425,6 +2528,8 @@ array_state_show(mddev_t *mddev, char *page)
2425 case 0: 2528 case 0:
2426 if (mddev->in_sync) 2529 if (mddev->in_sync)
2427 st = clean; 2530 st = clean;
2531 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
2532 st = write_pending;
2428 else if (mddev->safemode) 2533 else if (mddev->safemode)
2429 st = active_idle; 2534 st = active_idle;
2430 else 2535 else
@@ -2455,11 +2560,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2455 break; 2560 break;
2456 case clear: 2561 case clear:
2457 /* stopping an active array */ 2562 /* stopping an active array */
2458 if (mddev->pers) { 2563 if (atomic_read(&mddev->active) > 1)
2459 if (atomic_read(&mddev->active) > 1) 2564 return -EBUSY;
2460 return -EBUSY; 2565 err = do_md_stop(mddev, 0);
2461 err = do_md_stop(mddev, 0);
2462 }
2463 break; 2566 break;
2464 case inactive: 2567 case inactive:
2465 /* stopping an active array */ 2568 /* stopping an active array */
@@ -2467,7 +2570,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2467 if (atomic_read(&mddev->active) > 1) 2570 if (atomic_read(&mddev->active) > 1)
2468 return -EBUSY; 2571 return -EBUSY;
2469 err = do_md_stop(mddev, 2); 2572 err = do_md_stop(mddev, 2);
2470 } 2573 } else
2574 err = 0; /* already inactive */
2471 break; 2575 break;
2472 case suspended: 2576 case suspended:
2473 break; /* not supported yet */ 2577 break; /* not supported yet */
@@ -2495,9 +2599,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2495 restart_array(mddev); 2599 restart_array(mddev);
2496 spin_lock_irq(&mddev->write_lock); 2600 spin_lock_irq(&mddev->write_lock);
2497 if (atomic_read(&mddev->writes_pending) == 0) { 2601 if (atomic_read(&mddev->writes_pending) == 0) {
2498 mddev->in_sync = 1; 2602 if (mddev->in_sync == 0) {
2499 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 2603 mddev->in_sync = 1;
2500 } 2604 if (mddev->persistent)
2605 set_bit(MD_CHANGE_CLEAN,
2606 &mddev->flags);
2607 }
2608 err = 0;
2609 } else
2610 err = -EBUSY;
2501 spin_unlock_irq(&mddev->write_lock); 2611 spin_unlock_irq(&mddev->write_lock);
2502 } else { 2612 } else {
2503 mddev->ro = 0; 2613 mddev->ro = 0;
@@ -2508,7 +2618,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2508 case active: 2618 case active:
2509 if (mddev->pers) { 2619 if (mddev->pers) {
2510 restart_array(mddev); 2620 restart_array(mddev);
2511 clear_bit(MD_CHANGE_CLEAN, &mddev->flags); 2621 if (mddev->external)
2622 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2512 wake_up(&mddev->sb_wait); 2623 wake_up(&mddev->sb_wait);
2513 err = 0; 2624 err = 0;
2514 } else { 2625 } else {
@@ -2574,7 +2685,9 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2574 if (err < 0) 2685 if (err < 0)
2575 goto out; 2686 goto out;
2576 } 2687 }
2577 } else 2688 } else if (mddev->external)
2689 rdev = md_import_device(dev, -2, -1);
2690 else
2578 rdev = md_import_device(dev, -1, -1); 2691 rdev = md_import_device(dev, -1, -1);
2579 2692
2580 if (IS_ERR(rdev)) 2693 if (IS_ERR(rdev))
@@ -2659,7 +2772,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2659 2772
2660 2773
2661/* Metdata version. 2774/* Metdata version.
2662 * This is either 'none' for arrays with externally managed metadata, 2775 * This is one of
2776 * 'none' for arrays with no metadata (good luck...)
2777 * 'external' for arrays with externally managed metadata,
2663 * or N.M for internally known formats 2778 * or N.M for internally known formats
2664 */ 2779 */
2665static ssize_t 2780static ssize_t
@@ -2668,6 +2783,8 @@ metadata_show(mddev_t *mddev, char *page)
2668 if (mddev->persistent) 2783 if (mddev->persistent)
2669 return sprintf(page, "%d.%d\n", 2784 return sprintf(page, "%d.%d\n",
2670 mddev->major_version, mddev->minor_version); 2785 mddev->major_version, mddev->minor_version);
2786 else if (mddev->external)
2787 return sprintf(page, "external:%s\n", mddev->metadata_type);
2671 else 2788 else
2672 return sprintf(page, "none\n"); 2789 return sprintf(page, "none\n");
2673} 2790}
@@ -2682,6 +2799,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2682 2799
2683 if (cmd_match(buf, "none")) { 2800 if (cmd_match(buf, "none")) {
2684 mddev->persistent = 0; 2801 mddev->persistent = 0;
2802 mddev->external = 0;
2803 mddev->major_version = 0;
2804 mddev->minor_version = 90;
2805 return len;
2806 }
2807 if (strncmp(buf, "external:", 9) == 0) {
2808 size_t namelen = len-9;
2809 if (namelen >= sizeof(mddev->metadata_type))
2810 namelen = sizeof(mddev->metadata_type)-1;
2811 strncpy(mddev->metadata_type, buf+9, namelen);
2812 mddev->metadata_type[namelen] = 0;
2813 if (namelen && mddev->metadata_type[namelen-1] == '\n')
2814 mddev->metadata_type[--namelen] = 0;
2815 mddev->persistent = 0;
2816 mddev->external = 1;
2685 mddev->major_version = 0; 2817 mddev->major_version = 0;
2686 mddev->minor_version = 90; 2818 mddev->minor_version = 90;
2687 return len; 2819 return len;
@@ -2698,6 +2830,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2698 mddev->major_version = major; 2830 mddev->major_version = major;
2699 mddev->minor_version = minor; 2831 mddev->minor_version = minor;
2700 mddev->persistent = 1; 2832 mddev->persistent = 1;
2833 mddev->external = 0;
2701 return len; 2834 return len;
2702} 2835}
2703 2836
@@ -2865,6 +2998,43 @@ sync_completed_show(mddev_t *mddev, char *page)
2865static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 2998static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
2866 2999
2867static ssize_t 3000static ssize_t
3001max_sync_show(mddev_t *mddev, char *page)
3002{
3003 if (mddev->resync_max == MaxSector)
3004 return sprintf(page, "max\n");
3005 else
3006 return sprintf(page, "%llu\n",
3007 (unsigned long long)mddev->resync_max);
3008}
3009static ssize_t
3010max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3011{
3012 if (strncmp(buf, "max", 3) == 0)
3013 mddev->resync_max = MaxSector;
3014 else {
3015 char *ep;
3016 unsigned long long max = simple_strtoull(buf, &ep, 10);
3017 if (ep == buf || (*ep != 0 && *ep != '\n'))
3018 return -EINVAL;
3019 if (max < mddev->resync_max &&
3020 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3021 return -EBUSY;
3022
3023 /* Must be a multiple of chunk_size */
3024 if (mddev->chunk_size) {
3025 if (max & (sector_t)((mddev->chunk_size>>9)-1))
3026 return -EINVAL;
3027 }
3028 mddev->resync_max = max;
3029 }
3030 wake_up(&mddev->recovery_wait);
3031 return len;
3032}
3033
3034static struct md_sysfs_entry md_max_sync =
3035__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
3036
3037static ssize_t
2868suspend_lo_show(mddev_t *mddev, char *page) 3038suspend_lo_show(mddev_t *mddev, char *page)
2869{ 3039{
2870 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); 3040 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
@@ -2974,6 +3144,7 @@ static struct attribute *md_redundancy_attrs[] = {
2974 &md_sync_max.attr, 3144 &md_sync_max.attr,
2975 &md_sync_speed.attr, 3145 &md_sync_speed.attr,
2976 &md_sync_completed.attr, 3146 &md_sync_completed.attr,
3147 &md_max_sync.attr,
2977 &md_suspend_lo.attr, 3148 &md_suspend_lo.attr,
2978 &md_suspend_hi.attr, 3149 &md_suspend_hi.attr,
2979 &md_bitmap.attr, 3150 &md_bitmap.attr,
@@ -3118,8 +3289,11 @@ static int do_md_run(mddev_t * mddev)
3118 /* 3289 /*
3119 * Analyze all RAID superblock(s) 3290 * Analyze all RAID superblock(s)
3120 */ 3291 */
3121 if (!mddev->raid_disks) 3292 if (!mddev->raid_disks) {
3293 if (!mddev->persistent)
3294 return -EINVAL;
3122 analyze_sbs(mddev); 3295 analyze_sbs(mddev);
3296 }
3123 3297
3124 chunk_size = mddev->chunk_size; 3298 chunk_size = mddev->chunk_size;
3125 3299
@@ -3143,7 +3317,7 @@ static int do_md_run(mddev_t * mddev)
3143 } 3317 }
3144 3318
3145 /* devices must have minimum size of one chunk */ 3319 /* devices must have minimum size of one chunk */
3146 ITERATE_RDEV(mddev,rdev,tmp) { 3320 rdev_for_each(rdev, tmp, mddev) {
3147 if (test_bit(Faulty, &rdev->flags)) 3321 if (test_bit(Faulty, &rdev->flags))
3148 continue; 3322 continue;
3149 if (rdev->size < chunk_size / 1024) { 3323 if (rdev->size < chunk_size / 1024) {
@@ -3170,7 +3344,7 @@ static int do_md_run(mddev_t * mddev)
3170 * the only valid external interface is through the md 3344 * the only valid external interface is through the md
3171 * device. 3345 * device.
3172 */ 3346 */
3173 ITERATE_RDEV(mddev,rdev,tmp) { 3347 rdev_for_each(rdev, tmp, mddev) {
3174 if (test_bit(Faulty, &rdev->flags)) 3348 if (test_bit(Faulty, &rdev->flags))
3175 continue; 3349 continue;
3176 sync_blockdev(rdev->bdev); 3350 sync_blockdev(rdev->bdev);
@@ -3236,8 +3410,8 @@ static int do_md_run(mddev_t * mddev)
3236 mdk_rdev_t *rdev2; 3410 mdk_rdev_t *rdev2;
3237 struct list_head *tmp2; 3411 struct list_head *tmp2;
3238 int warned = 0; 3412 int warned = 0;
3239 ITERATE_RDEV(mddev, rdev, tmp) { 3413 rdev_for_each(rdev, tmp, mddev) {
3240 ITERATE_RDEV(mddev, rdev2, tmp2) { 3414 rdev_for_each(rdev2, tmp2, mddev) {
3241 if (rdev < rdev2 && 3415 if (rdev < rdev2 &&
3242 rdev->bdev->bd_contains == 3416 rdev->bdev->bd_contains ==
3243 rdev2->bdev->bd_contains) { 3417 rdev2->bdev->bd_contains) {
@@ -3297,7 +3471,7 @@ static int do_md_run(mddev_t * mddev)
3297 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 3471 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
3298 mddev->in_sync = 1; 3472 mddev->in_sync = 1;
3299 3473
3300 ITERATE_RDEV(mddev,rdev,tmp) 3474 rdev_for_each(rdev, tmp, mddev)
3301 if (rdev->raid_disk >= 0) { 3475 if (rdev->raid_disk >= 0) {
3302 char nm[20]; 3476 char nm[20];
3303 sprintf(nm, "rd%d", rdev->raid_disk); 3477 sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3330,7 +3504,7 @@ static int do_md_run(mddev_t * mddev)
3330 if (mddev->degraded && !mddev->sync_thread) { 3504 if (mddev->degraded && !mddev->sync_thread) {
3331 struct list_head *rtmp; 3505 struct list_head *rtmp;
3332 int spares = 0; 3506 int spares = 0;
3333 ITERATE_RDEV(mddev,rdev,rtmp) 3507 rdev_for_each(rdev, rtmp, mddev)
3334 if (rdev->raid_disk >= 0 && 3508 if (rdev->raid_disk >= 0 &&
3335 !test_bit(In_sync, &rdev->flags) && 3509 !test_bit(In_sync, &rdev->flags) &&
3336 !test_bit(Faulty, &rdev->flags)) 3510 !test_bit(Faulty, &rdev->flags))
@@ -3507,14 +3681,14 @@ static int do_md_stop(mddev_t * mddev, int mode)
3507 } 3681 }
3508 mddev->bitmap_offset = 0; 3682 mddev->bitmap_offset = 0;
3509 3683
3510 ITERATE_RDEV(mddev,rdev,tmp) 3684 rdev_for_each(rdev, tmp, mddev)
3511 if (rdev->raid_disk >= 0) { 3685 if (rdev->raid_disk >= 0) {
3512 char nm[20]; 3686 char nm[20];
3513 sprintf(nm, "rd%d", rdev->raid_disk); 3687 sprintf(nm, "rd%d", rdev->raid_disk);
3514 sysfs_remove_link(&mddev->kobj, nm); 3688 sysfs_remove_link(&mddev->kobj, nm);
3515 } 3689 }
3516 3690
3517 /* make sure all delayed_delete calls have finished */ 3691 /* make sure all md_delayed_delete calls have finished */
3518 flush_scheduled_work(); 3692 flush_scheduled_work();
3519 3693
3520 export_array(mddev); 3694 export_array(mddev);
@@ -3523,7 +3697,10 @@ static int do_md_stop(mddev_t * mddev, int mode)
3523 mddev->size = 0; 3697 mddev->size = 0;
3524 mddev->raid_disks = 0; 3698 mddev->raid_disks = 0;
3525 mddev->recovery_cp = 0; 3699 mddev->recovery_cp = 0;
3700 mddev->resync_max = MaxSector;
3526 mddev->reshape_position = MaxSector; 3701 mddev->reshape_position = MaxSector;
3702 mddev->external = 0;
3703 mddev->persistent = 0;
3527 3704
3528 } else if (mddev->pers) 3705 } else if (mddev->pers)
3529 printk(KERN_INFO "md: %s switched to read-only mode.\n", 3706 printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -3546,7 +3723,7 @@ static void autorun_array(mddev_t *mddev)
3546 3723
3547 printk(KERN_INFO "md: running: "); 3724 printk(KERN_INFO "md: running: ");
3548 3725
3549 ITERATE_RDEV(mddev,rdev,tmp) { 3726 rdev_for_each(rdev, tmp, mddev) {
3550 char b[BDEVNAME_SIZE]; 3727 char b[BDEVNAME_SIZE];
3551 printk("<%s>", bdevname(rdev->bdev,b)); 3728 printk("<%s>", bdevname(rdev->bdev,b));
3552 } 3729 }
@@ -3589,7 +3766,7 @@ static void autorun_devices(int part)
3589 printk(KERN_INFO "md: considering %s ...\n", 3766 printk(KERN_INFO "md: considering %s ...\n",
3590 bdevname(rdev0->bdev,b)); 3767 bdevname(rdev0->bdev,b));
3591 INIT_LIST_HEAD(&candidates); 3768 INIT_LIST_HEAD(&candidates);
3592 ITERATE_RDEV_PENDING(rdev,tmp) 3769 rdev_for_each_list(rdev, tmp, pending_raid_disks)
3593 if (super_90_load(rdev, rdev0, 0) >= 0) { 3770 if (super_90_load(rdev, rdev0, 0) >= 0) {
3594 printk(KERN_INFO "md: adding %s ...\n", 3771 printk(KERN_INFO "md: adding %s ...\n",
3595 bdevname(rdev->bdev,b)); 3772 bdevname(rdev->bdev,b));
@@ -3632,7 +3809,8 @@ static void autorun_devices(int part)
3632 mddev_unlock(mddev); 3809 mddev_unlock(mddev);
3633 } else { 3810 } else {
3634 printk(KERN_INFO "md: created %s\n", mdname(mddev)); 3811 printk(KERN_INFO "md: created %s\n", mdname(mddev));
3635 ITERATE_RDEV_GENERIC(candidates,rdev,tmp) { 3812 mddev->persistent = 1;
3813 rdev_for_each_list(rdev, tmp, candidates) {
3636 list_del_init(&rdev->same_set); 3814 list_del_init(&rdev->same_set);
3637 if (bind_rdev_to_array(rdev, mddev)) 3815 if (bind_rdev_to_array(rdev, mddev))
3638 export_rdev(rdev); 3816 export_rdev(rdev);
@@ -3643,7 +3821,7 @@ static void autorun_devices(int part)
3643 /* on success, candidates will be empty, on error 3821 /* on success, candidates will be empty, on error
3644 * it won't... 3822 * it won't...
3645 */ 3823 */
3646 ITERATE_RDEV_GENERIC(candidates,rdev,tmp) 3824 rdev_for_each_list(rdev, tmp, candidates)
3647 export_rdev(rdev); 3825 export_rdev(rdev);
3648 mddev_put(mddev); 3826 mddev_put(mddev);
3649 } 3827 }
@@ -3673,7 +3851,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
3673 struct list_head *tmp; 3851 struct list_head *tmp;
3674 3852
3675 nr=working=active=failed=spare=0; 3853 nr=working=active=failed=spare=0;
3676 ITERATE_RDEV(mddev,rdev,tmp) { 3854 rdev_for_each(rdev, tmp, mddev) {
3677 nr++; 3855 nr++;
3678 if (test_bit(Faulty, &rdev->flags)) 3856 if (test_bit(Faulty, &rdev->flags))
3679 failed++; 3857 failed++;
@@ -3919,8 +4097,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
3919 else 4097 else
3920 rdev->raid_disk = -1; 4098 rdev->raid_disk = -1;
3921 4099
3922 rdev->flags = 0;
3923
3924 if (rdev->raid_disk < mddev->raid_disks) 4100 if (rdev->raid_disk < mddev->raid_disks)
3925 if (info->state & (1<<MD_DISK_SYNC)) 4101 if (info->state & (1<<MD_DISK_SYNC))
3926 set_bit(In_sync, &rdev->flags); 4102 set_bit(In_sync, &rdev->flags);
@@ -4165,13 +4341,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4165 else 4341 else
4166 mddev->recovery_cp = 0; 4342 mddev->recovery_cp = 0;
4167 mddev->persistent = ! info->not_persistent; 4343 mddev->persistent = ! info->not_persistent;
4344 mddev->external = 0;
4168 4345
4169 mddev->layout = info->layout; 4346 mddev->layout = info->layout;
4170 mddev->chunk_size = info->chunk_size; 4347 mddev->chunk_size = info->chunk_size;
4171 4348
4172 mddev->max_disks = MD_SB_DISKS; 4349 mddev->max_disks = MD_SB_DISKS;
4173 4350
4174 mddev->flags = 0; 4351 if (mddev->persistent)
4352 mddev->flags = 0;
4175 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4353 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4176 4354
4177 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 4355 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -4213,7 +4391,7 @@ static int update_size(mddev_t *mddev, unsigned long size)
4213 */ 4391 */
4214 if (mddev->sync_thread) 4392 if (mddev->sync_thread)
4215 return -EBUSY; 4393 return -EBUSY;
4216 ITERATE_RDEV(mddev,rdev,tmp) { 4394 rdev_for_each(rdev, tmp, mddev) {
4217 sector_t avail; 4395 sector_t avail;
4218 avail = rdev->size * 2; 4396 avail = rdev->size * 2;
4219 4397
@@ -4471,9 +4649,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
4471 */ 4649 */
4472 /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, 4650 /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
4473 * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */ 4651 * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */
4474 if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY 4652 if ((!mddev->raid_disks && !mddev->external)
4475 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE 4653 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
4476 && cmd != GET_BITMAP_FILE) { 4654 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
4655 && cmd != GET_BITMAP_FILE) {
4477 err = -ENODEV; 4656 err = -ENODEV;
4478 goto abort_unlock; 4657 goto abort_unlock;
4479 } 4658 }
@@ -4757,7 +4936,7 @@ static void status_unused(struct seq_file *seq)
4757 4936
4758 seq_printf(seq, "unused devices: "); 4937 seq_printf(seq, "unused devices: ");
4759 4938
4760 ITERATE_RDEV_PENDING(rdev,tmp) { 4939 rdev_for_each_list(rdev, tmp, pending_raid_disks) {
4761 char b[BDEVNAME_SIZE]; 4940 char b[BDEVNAME_SIZE];
4762 i++; 4941 i++;
4763 seq_printf(seq, "%s ", 4942 seq_printf(seq, "%s ",
@@ -4953,7 +5132,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
4953 } 5132 }
4954 5133
4955 size = 0; 5134 size = 0;
4956 ITERATE_RDEV(mddev,rdev,tmp2) { 5135 rdev_for_each(rdev, tmp2, mddev) {
4957 char b[BDEVNAME_SIZE]; 5136 char b[BDEVNAME_SIZE];
4958 seq_printf(seq, " %s[%d]", 5137 seq_printf(seq, " %s[%d]",
4959 bdevname(rdev->bdev,b), rdev->desc_nr); 5138 bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -4982,7 +5161,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
4982 mddev->major_version, 5161 mddev->major_version,
4983 mddev->minor_version); 5162 mddev->minor_version);
4984 } 5163 }
4985 } else 5164 } else if (mddev->external)
5165 seq_printf(seq, " super external:%s",
5166 mddev->metadata_type);
5167 else
4986 seq_printf(seq, " super non-persistent"); 5168 seq_printf(seq, " super non-persistent");
4987 5169
4988 if (mddev->pers) { 5170 if (mddev->pers) {
@@ -5106,7 +5288,7 @@ static int is_mddev_idle(mddev_t *mddev)
5106 long curr_events; 5288 long curr_events;
5107 5289
5108 idle = 1; 5290 idle = 1;
5109 ITERATE_RDEV(mddev,rdev,tmp) { 5291 rdev_for_each(rdev, tmp, mddev) {
5110 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; 5292 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5111 curr_events = disk_stat_read(disk, sectors[0]) + 5293 curr_events = disk_stat_read(disk, sectors[0]) +
5112 disk_stat_read(disk, sectors[1]) - 5294 disk_stat_read(disk, sectors[1]) -
@@ -5283,7 +5465,7 @@ void md_do_sync(mddev_t *mddev)
5283 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 5465 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5284 goto skip; 5466 goto skip;
5285 } 5467 }
5286 ITERATE_MDDEV(mddev2,tmp) { 5468 for_each_mddev(mddev2, tmp) {
5287 if (mddev2 == mddev) 5469 if (mddev2 == mddev)
5288 continue; 5470 continue;
5289 if (mddev2->curr_resync && 5471 if (mddev2->curr_resync &&
@@ -5333,7 +5515,7 @@ void md_do_sync(mddev_t *mddev)
5333 /* recovery follows the physical size of devices */ 5515 /* recovery follows the physical size of devices */
5334 max_sectors = mddev->size << 1; 5516 max_sectors = mddev->size << 1;
5335 j = MaxSector; 5517 j = MaxSector;
5336 ITERATE_RDEV(mddev,rdev,rtmp) 5518 rdev_for_each(rdev, rtmp, mddev)
5337 if (rdev->raid_disk >= 0 && 5519 if (rdev->raid_disk >= 0 &&
5338 !test_bit(Faulty, &rdev->flags) && 5520 !test_bit(Faulty, &rdev->flags) &&
5339 !test_bit(In_sync, &rdev->flags) && 5521 !test_bit(In_sync, &rdev->flags) &&
@@ -5381,8 +5563,16 @@ void md_do_sync(mddev_t *mddev)
5381 sector_t sectors; 5563 sector_t sectors;
5382 5564
5383 skipped = 0; 5565 skipped = 0;
5566 if (j >= mddev->resync_max) {
5567 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5568 wait_event(mddev->recovery_wait,
5569 mddev->resync_max > j
5570 || kthread_should_stop());
5571 }
5572 if (kthread_should_stop())
5573 goto interrupted;
5384 sectors = mddev->pers->sync_request(mddev, j, &skipped, 5574 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5385 currspeed < speed_min(mddev)); 5575 currspeed < speed_min(mddev));
5386 if (sectors == 0) { 5576 if (sectors == 0) {
5387 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5577 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
5388 goto out; 5578 goto out;
@@ -5424,15 +5614,9 @@ void md_do_sync(mddev_t *mddev)
5424 } 5614 }
5425 5615
5426 5616
5427 if (kthread_should_stop()) { 5617 if (kthread_should_stop())
5428 /* 5618 goto interrupted;
5429 * got a signal, exit. 5619
5430 */
5431 printk(KERN_INFO
5432 "md: md_do_sync() got signal ... exiting\n");
5433 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5434 goto out;
5435 }
5436 5620
5437 /* 5621 /*
5438 * this loop exits only if either when we are slower than 5622 * this loop exits only if either when we are slower than
@@ -5484,7 +5668,7 @@ void md_do_sync(mddev_t *mddev)
5484 } else { 5668 } else {
5485 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 5669 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5486 mddev->curr_resync = MaxSector; 5670 mddev->curr_resync = MaxSector;
5487 ITERATE_RDEV(mddev,rdev,rtmp) 5671 rdev_for_each(rdev, rtmp, mddev)
5488 if (rdev->raid_disk >= 0 && 5672 if (rdev->raid_disk >= 0 &&
5489 !test_bit(Faulty, &rdev->flags) && 5673 !test_bit(Faulty, &rdev->flags) &&
5490 !test_bit(In_sync, &rdev->flags) && 5674 !test_bit(In_sync, &rdev->flags) &&
@@ -5496,9 +5680,22 @@ void md_do_sync(mddev_t *mddev)
5496 5680
5497 skip: 5681 skip:
5498 mddev->curr_resync = 0; 5682 mddev->curr_resync = 0;
5683 mddev->resync_max = MaxSector;
5684 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5499 wake_up(&resync_wait); 5685 wake_up(&resync_wait);
5500 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 5686 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
5501 md_wakeup_thread(mddev->thread); 5687 md_wakeup_thread(mddev->thread);
5688 return;
5689
5690 interrupted:
5691 /*
5692 * got a signal, exit.
5693 */
5694 printk(KERN_INFO
5695 "md: md_do_sync() got signal ... exiting\n");
5696 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5697 goto out;
5698
5502} 5699}
5503EXPORT_SYMBOL_GPL(md_do_sync); 5700EXPORT_SYMBOL_GPL(md_do_sync);
5504 5701
@@ -5509,8 +5706,9 @@ static int remove_and_add_spares(mddev_t *mddev)
5509 struct list_head *rtmp; 5706 struct list_head *rtmp;
5510 int spares = 0; 5707 int spares = 0;
5511 5708
5512 ITERATE_RDEV(mddev,rdev,rtmp) 5709 rdev_for_each(rdev, rtmp, mddev)
5513 if (rdev->raid_disk >= 0 && 5710 if (rdev->raid_disk >= 0 &&
5711 !mddev->external &&
5514 (test_bit(Faulty, &rdev->flags) || 5712 (test_bit(Faulty, &rdev->flags) ||
5515 ! test_bit(In_sync, &rdev->flags)) && 5713 ! test_bit(In_sync, &rdev->flags)) &&
5516 atomic_read(&rdev->nr_pending)==0) { 5714 atomic_read(&rdev->nr_pending)==0) {
@@ -5524,7 +5722,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5524 } 5722 }
5525 5723
5526 if (mddev->degraded) { 5724 if (mddev->degraded) {
5527 ITERATE_RDEV(mddev,rdev,rtmp) 5725 rdev_for_each(rdev, rtmp, mddev)
5528 if (rdev->raid_disk < 0 5726 if (rdev->raid_disk < 0
5529 && !test_bit(Faulty, &rdev->flags)) { 5727 && !test_bit(Faulty, &rdev->flags)) {
5530 rdev->recovery_offset = 0; 5728 rdev->recovery_offset = 0;
@@ -5589,7 +5787,7 @@ void md_check_recovery(mddev_t *mddev)
5589 } 5787 }
5590 5788
5591 if ( ! ( 5789 if ( ! (
5592 mddev->flags || 5790 (mddev->flags && !mddev->external) ||
5593 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 5791 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
5594 test_bit(MD_RECOVERY_DONE, &mddev->recovery) || 5792 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
5595 (mddev->safemode == 1) || 5793 (mddev->safemode == 1) ||
@@ -5605,7 +5803,8 @@ void md_check_recovery(mddev_t *mddev)
5605 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5803 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
5606 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 5804 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
5607 mddev->in_sync = 1; 5805 mddev->in_sync = 1;
5608 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 5806 if (mddev->persistent)
5807 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5609 } 5808 }
5610 if (mddev->safemode == 1) 5809 if (mddev->safemode == 1)
5611 mddev->safemode = 0; 5810 mddev->safemode = 0;
@@ -5637,7 +5836,7 @@ void md_check_recovery(mddev_t *mddev)
5637 * information must be scrapped 5836 * information must be scrapped
5638 */ 5837 */
5639 if (!mddev->degraded) 5838 if (!mddev->degraded)
5640 ITERATE_RDEV(mddev,rdev,rtmp) 5839 rdev_for_each(rdev, rtmp, mddev)
5641 rdev->saved_raid_disk = -1; 5840 rdev->saved_raid_disk = -1;
5642 5841
5643 mddev->recovery = 0; 5842 mddev->recovery = 0;
@@ -5714,7 +5913,7 @@ static int md_notify_reboot(struct notifier_block *this,
5714 5913
5715 printk(KERN_INFO "md: stopping all md devices.\n"); 5914 printk(KERN_INFO "md: stopping all md devices.\n");
5716 5915
5717 ITERATE_MDDEV(mddev,tmp) 5916 for_each_mddev(mddev, tmp)
5718 if (mddev_trylock(mddev)) { 5917 if (mddev_trylock(mddev)) {
5719 do_md_stop (mddev, 1); 5918 do_md_stop (mddev, 1);
5720 mddev_unlock(mddev); 5919 mddev_unlock(mddev);
@@ -5848,7 +6047,7 @@ static __exit void md_exit(void)
5848 unregister_reboot_notifier(&md_notifier); 6047 unregister_reboot_notifier(&md_notifier);
5849 unregister_sysctl_table(raid_table_header); 6048 unregister_sysctl_table(raid_table_header);
5850 remove_proc_entry("mdstat", NULL); 6049 remove_proc_entry("mdstat", NULL);
5851 ITERATE_MDDEV(mddev,tmp) { 6050 for_each_mddev(mddev, tmp) {
5852 struct gendisk *disk = mddev->gendisk; 6051 struct gendisk *disk = mddev->gendisk;
5853 if (!disk) 6052 if (!disk)
5854 continue; 6053 continue;