diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 39 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 395 | ||||
-rw-r--r-- | drivers/md/mktables.c | 187 | ||||
-rw-r--r-- | drivers/md/multipath.c | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 5 | ||||
-rw-r--r-- | drivers/md/raid10.c | 7 | ||||
-rw-r--r-- | drivers/md/raid5.c | 48 | ||||
-rw-r--r-- | drivers/md/raid6test/test.c | 117 |
11 files changed, 542 insertions, 270 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1b1ef3130e6e..a0585fb6da94 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -237,7 +237,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde | |||
237 | if (!page) | 237 | if (!page) |
238 | return ERR_PTR(-ENOMEM); | 238 | return ERR_PTR(-ENOMEM); |
239 | 239 | ||
240 | ITERATE_RDEV(mddev, rdev, tmp) { | 240 | rdev_for_each(rdev, tmp, mddev) { |
241 | if (! test_bit(In_sync, &rdev->flags) | 241 | if (! test_bit(In_sync, &rdev->flags) |
242 | || test_bit(Faulty, &rdev->flags)) | 242 | || test_bit(Faulty, &rdev->flags)) |
243 | continue; | 243 | continue; |
@@ -261,7 +261,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
261 | struct list_head *tmp; | 261 | struct list_head *tmp; |
262 | mddev_t *mddev = bitmap->mddev; | 262 | mddev_t *mddev = bitmap->mddev; |
263 | 263 | ||
264 | ITERATE_RDEV(mddev, rdev, tmp) | 264 | rdev_for_each(rdev, tmp, mddev) |
265 | if (test_bit(In_sync, &rdev->flags) | 265 | if (test_bit(In_sync, &rdev->flags) |
266 | && !test_bit(Faulty, &rdev->flags)) { | 266 | && !test_bit(Faulty, &rdev->flags)) { |
267 | int size = PAGE_SIZE; | 267 | int size = PAGE_SIZE; |
@@ -1348,14 +1348,38 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
1348 | */ | 1348 | */ |
1349 | sector_t sector = 0; | 1349 | sector_t sector = 0; |
1350 | int blocks; | 1350 | int blocks; |
1351 | if (!bitmap) return; | 1351 | if (!bitmap) |
1352 | return; | ||
1352 | while (sector < bitmap->mddev->resync_max_sectors) { | 1353 | while (sector < bitmap->mddev->resync_max_sectors) { |
1353 | bitmap_end_sync(bitmap, sector, &blocks, 0); | 1354 | bitmap_end_sync(bitmap, sector, &blocks, 0); |
1354 | /* | 1355 | sector += blocks; |
1355 | if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n", | 1356 | } |
1356 | (unsigned long long)sector, blocks); | 1357 | } |
1357 | */ sector += blocks; | 1358 | |
1359 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | ||
1360 | { | ||
1361 | sector_t s = 0; | ||
1362 | int blocks; | ||
1363 | |||
1364 | if (!bitmap) | ||
1365 | return; | ||
1366 | if (sector == 0) { | ||
1367 | bitmap->last_end_sync = jiffies; | ||
1368 | return; | ||
1369 | } | ||
1370 | if (time_before(jiffies, (bitmap->last_end_sync | ||
1371 | + bitmap->daemon_sleep * HZ))) | ||
1372 | return; | ||
1373 | wait_event(bitmap->mddev->recovery_wait, | ||
1374 | atomic_read(&bitmap->mddev->recovery_active) == 0); | ||
1375 | |||
1376 | sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); | ||
1377 | s = 0; | ||
1378 | while (s < sector && s < bitmap->mddev->resync_max_sectors) { | ||
1379 | bitmap_end_sync(bitmap, s, &blocks, 0); | ||
1380 | s += blocks; | ||
1358 | } | 1381 | } |
1382 | bitmap->last_end_sync = jiffies; | ||
1359 | } | 1383 | } |
1360 | 1384 | ||
1361 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) | 1385 | static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) |
@@ -1565,3 +1589,4 @@ EXPORT_SYMBOL(bitmap_start_sync); | |||
1565 | EXPORT_SYMBOL(bitmap_end_sync); | 1589 | EXPORT_SYMBOL(bitmap_end_sync); |
1566 | EXPORT_SYMBOL(bitmap_unplug); | 1590 | EXPORT_SYMBOL(bitmap_unplug); |
1567 | EXPORT_SYMBOL(bitmap_close_sync); | 1591 | EXPORT_SYMBOL(bitmap_close_sync); |
1592 | EXPORT_SYMBOL(bitmap_cond_end_sync); | ||
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index cf2ddce34118..d107ddceefcd 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -294,7 +294,7 @@ static int run(mddev_t *mddev) | |||
294 | } | 294 | } |
295 | conf->nfaults = 0; | 295 | conf->nfaults = 0; |
296 | 296 | ||
297 | ITERATE_RDEV(mddev, rdev, tmp) | 297 | rdev_for_each(rdev, tmp, mddev) |
298 | conf->rdev = rdev; | 298 | conf->rdev = rdev; |
299 | 299 | ||
300 | mddev->array_size = mddev->size; | 300 | mddev->array_size = mddev->size; |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3dac1cfb8189..0b8511776b3e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -122,7 +122,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
122 | cnt = 0; | 122 | cnt = 0; |
123 | conf->array_size = 0; | 123 | conf->array_size = 0; |
124 | 124 | ||
125 | ITERATE_RDEV(mddev,rdev,tmp) { | 125 | rdev_for_each(rdev, tmp, mddev) { |
126 | int j = rdev->raid_disk; | 126 | int j = rdev->raid_disk; |
127 | dev_info_t *disk = conf->disks + j; | 127 | dev_info_t *disk = conf->disks + j; |
128 | 128 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index c28a120b4161..5fc326d3970e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -195,7 +195,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
195 | * Any code which breaks out of this loop while own | 195 | * Any code which breaks out of this loop while own |
196 | * a reference to the current mddev and must mddev_put it. | 196 | * a reference to the current mddev and must mddev_put it. |
197 | */ | 197 | */ |
198 | #define ITERATE_MDDEV(mddev,tmp) \ | 198 | #define for_each_mddev(mddev,tmp) \ |
199 | \ | 199 | \ |
200 | for (({ spin_lock(&all_mddevs_lock); \ | 200 | for (({ spin_lock(&all_mddevs_lock); \ |
201 | tmp = all_mddevs.next; \ | 201 | tmp = all_mddevs.next; \ |
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
275 | spin_lock_init(&new->write_lock); | 275 | spin_lock_init(&new->write_lock); |
276 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | 277 | new->reshape_position = MaxSector; |
278 | new->resync_max = MaxSector; | ||
278 | 279 | ||
279 | new->queue = blk_alloc_queue(GFP_KERNEL); | 280 | new->queue = blk_alloc_queue(GFP_KERNEL); |
280 | if (!new->queue) { | 281 | if (!new->queue) { |
@@ -310,7 +311,7 @@ static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) | |||
310 | mdk_rdev_t * rdev; | 311 | mdk_rdev_t * rdev; |
311 | struct list_head *tmp; | 312 | struct list_head *tmp; |
312 | 313 | ||
313 | ITERATE_RDEV(mddev,rdev,tmp) { | 314 | rdev_for_each(rdev, tmp, mddev) { |
314 | if (rdev->desc_nr == nr) | 315 | if (rdev->desc_nr == nr) |
315 | return rdev; | 316 | return rdev; |
316 | } | 317 | } |
@@ -322,7 +323,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) | |||
322 | struct list_head *tmp; | 323 | struct list_head *tmp; |
323 | mdk_rdev_t *rdev; | 324 | mdk_rdev_t *rdev; |
324 | 325 | ||
325 | ITERATE_RDEV(mddev,rdev,tmp) { | 326 | rdev_for_each(rdev, tmp, mddev) { |
326 | if (rdev->bdev->bd_dev == dev) | 327 | if (rdev->bdev->bd_dev == dev) |
327 | return rdev; | 328 | return rdev; |
328 | } | 329 | } |
@@ -773,12 +774,16 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
773 | __u64 ev1 = md_event(sb); | 774 | __u64 ev1 = md_event(sb); |
774 | 775 | ||
775 | rdev->raid_disk = -1; | 776 | rdev->raid_disk = -1; |
776 | rdev->flags = 0; | 777 | clear_bit(Faulty, &rdev->flags); |
778 | clear_bit(In_sync, &rdev->flags); | ||
779 | clear_bit(WriteMostly, &rdev->flags); | ||
780 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
781 | |||
777 | if (mddev->raid_disks == 0) { | 782 | if (mddev->raid_disks == 0) { |
778 | mddev->major_version = 0; | 783 | mddev->major_version = 0; |
779 | mddev->minor_version = sb->minor_version; | 784 | mddev->minor_version = sb->minor_version; |
780 | mddev->patch_version = sb->patch_version; | 785 | mddev->patch_version = sb->patch_version; |
781 | mddev->persistent = ! sb->not_persistent; | 786 | mddev->external = 0; |
782 | mddev->chunk_size = sb->chunk_size; | 787 | mddev->chunk_size = sb->chunk_size; |
783 | mddev->ctime = sb->ctime; | 788 | mddev->ctime = sb->ctime; |
784 | mddev->utime = sb->utime; | 789 | mddev->utime = sb->utime; |
@@ -904,7 +909,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
904 | sb->size = mddev->size; | 909 | sb->size = mddev->size; |
905 | sb->raid_disks = mddev->raid_disks; | 910 | sb->raid_disks = mddev->raid_disks; |
906 | sb->md_minor = mddev->md_minor; | 911 | sb->md_minor = mddev->md_minor; |
907 | sb->not_persistent = !mddev->persistent; | 912 | sb->not_persistent = 0; |
908 | sb->utime = mddev->utime; | 913 | sb->utime = mddev->utime; |
909 | sb->state = 0; | 914 | sb->state = 0; |
910 | sb->events_hi = (mddev->events>>32); | 915 | sb->events_hi = (mddev->events>>32); |
@@ -938,7 +943,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
938 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | 943 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); |
939 | 944 | ||
940 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 945 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
941 | ITERATE_RDEV(mddev,rdev2,tmp) { | 946 | rdev_for_each(rdev2, tmp, mddev) { |
942 | mdp_disk_t *d; | 947 | mdp_disk_t *d; |
943 | int desc_nr; | 948 | int desc_nr; |
944 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) | 949 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
@@ -1153,11 +1158,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1153 | __u64 ev1 = le64_to_cpu(sb->events); | 1158 | __u64 ev1 = le64_to_cpu(sb->events); |
1154 | 1159 | ||
1155 | rdev->raid_disk = -1; | 1160 | rdev->raid_disk = -1; |
1156 | rdev->flags = 0; | 1161 | clear_bit(Faulty, &rdev->flags); |
1162 | clear_bit(In_sync, &rdev->flags); | ||
1163 | clear_bit(WriteMostly, &rdev->flags); | ||
1164 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1165 | |||
1157 | if (mddev->raid_disks == 0) { | 1166 | if (mddev->raid_disks == 0) { |
1158 | mddev->major_version = 1; | 1167 | mddev->major_version = 1; |
1159 | mddev->patch_version = 0; | 1168 | mddev->patch_version = 0; |
1160 | mddev->persistent = 1; | 1169 | mddev->external = 0; |
1161 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; | 1170 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; |
1162 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); | 1171 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); |
1163 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); | 1172 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); |
@@ -1286,7 +1295,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1286 | } | 1295 | } |
1287 | 1296 | ||
1288 | max_dev = 0; | 1297 | max_dev = 0; |
1289 | ITERATE_RDEV(mddev,rdev2,tmp) | 1298 | rdev_for_each(rdev2, tmp, mddev) |
1290 | if (rdev2->desc_nr+1 > max_dev) | 1299 | if (rdev2->desc_nr+1 > max_dev) |
1291 | max_dev = rdev2->desc_nr+1; | 1300 | max_dev = rdev2->desc_nr+1; |
1292 | 1301 | ||
@@ -1295,7 +1304,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1295 | for (i=0; i<max_dev;i++) | 1304 | for (i=0; i<max_dev;i++) |
1296 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1305 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1297 | 1306 | ||
1298 | ITERATE_RDEV(mddev,rdev2,tmp) { | 1307 | rdev_for_each(rdev2, tmp, mddev) { |
1299 | i = rdev2->desc_nr; | 1308 | i = rdev2->desc_nr; |
1300 | if (test_bit(Faulty, &rdev2->flags)) | 1309 | if (test_bit(Faulty, &rdev2->flags)) |
1301 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1310 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
@@ -1333,8 +1342,8 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1333 | struct list_head *tmp, *tmp2; | 1342 | struct list_head *tmp, *tmp2; |
1334 | mdk_rdev_t *rdev, *rdev2; | 1343 | mdk_rdev_t *rdev, *rdev2; |
1335 | 1344 | ||
1336 | ITERATE_RDEV(mddev1,rdev,tmp) | 1345 | rdev_for_each(rdev, tmp, mddev1) |
1337 | ITERATE_RDEV(mddev2, rdev2, tmp2) | 1346 | rdev_for_each(rdev2, tmp2, mddev2) |
1338 | if (rdev->bdev->bd_contains == | 1347 | if (rdev->bdev->bd_contains == |
1339 | rdev2->bdev->bd_contains) | 1348 | rdev2->bdev->bd_contains) |
1340 | return 1; | 1349 | return 1; |
@@ -1401,7 +1410,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1401 | goto fail; | 1410 | goto fail; |
1402 | } | 1411 | } |
1403 | list_add(&rdev->same_set, &mddev->disks); | 1412 | list_add(&rdev->same_set, &mddev->disks); |
1404 | bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); | 1413 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
1405 | return 0; | 1414 | return 0; |
1406 | 1415 | ||
1407 | fail: | 1416 | fail: |
@@ -1410,10 +1419,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1410 | return err; | 1419 | return err; |
1411 | } | 1420 | } |
1412 | 1421 | ||
1413 | static void delayed_delete(struct work_struct *ws) | 1422 | static void md_delayed_delete(struct work_struct *ws) |
1414 | { | 1423 | { |
1415 | mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); | 1424 | mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work); |
1416 | kobject_del(&rdev->kobj); | 1425 | kobject_del(&rdev->kobj); |
1426 | kobject_put(&rdev->kobj); | ||
1417 | } | 1427 | } |
1418 | 1428 | ||
1419 | static void unbind_rdev_from_array(mdk_rdev_t * rdev) | 1429 | static void unbind_rdev_from_array(mdk_rdev_t * rdev) |
@@ -1432,7 +1442,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1432 | /* We need to delay this, otherwise we can deadlock when | 1442 | /* We need to delay this, otherwise we can deadlock when |
1433 | * writing to 'remove' to "dev/state" | 1443 | * writing to 'remove' to "dev/state" |
1434 | */ | 1444 | */ |
1435 | INIT_WORK(&rdev->del_work, delayed_delete); | 1445 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1446 | kobject_get(&rdev->kobj); | ||
1436 | schedule_work(&rdev->del_work); | 1447 | schedule_work(&rdev->del_work); |
1437 | } | 1448 | } |
1438 | 1449 | ||
@@ -1441,7 +1452,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1441 | * otherwise reused by a RAID array (or any other kernel | 1452 | * otherwise reused by a RAID array (or any other kernel |
1442 | * subsystem), by bd_claiming the device. | 1453 | * subsystem), by bd_claiming the device. |
1443 | */ | 1454 | */ |
1444 | static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | 1455 | static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) |
1445 | { | 1456 | { |
1446 | int err = 0; | 1457 | int err = 0; |
1447 | struct block_device *bdev; | 1458 | struct block_device *bdev; |
@@ -1453,13 +1464,15 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) | |||
1453 | __bdevname(dev, b)); | 1464 | __bdevname(dev, b)); |
1454 | return PTR_ERR(bdev); | 1465 | return PTR_ERR(bdev); |
1455 | } | 1466 | } |
1456 | err = bd_claim(bdev, rdev); | 1467 | err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev); |
1457 | if (err) { | 1468 | if (err) { |
1458 | printk(KERN_ERR "md: could not bd_claim %s.\n", | 1469 | printk(KERN_ERR "md: could not bd_claim %s.\n", |
1459 | bdevname(bdev, b)); | 1470 | bdevname(bdev, b)); |
1460 | blkdev_put(bdev); | 1471 | blkdev_put(bdev); |
1461 | return err; | 1472 | return err; |
1462 | } | 1473 | } |
1474 | if (!shared) | ||
1475 | set_bit(AllReserved, &rdev->flags); | ||
1463 | rdev->bdev = bdev; | 1476 | rdev->bdev = bdev; |
1464 | return err; | 1477 | return err; |
1465 | } | 1478 | } |
@@ -1503,7 +1516,7 @@ static void export_array(mddev_t *mddev) | |||
1503 | struct list_head *tmp; | 1516 | struct list_head *tmp; |
1504 | mdk_rdev_t *rdev; | 1517 | mdk_rdev_t *rdev; |
1505 | 1518 | ||
1506 | ITERATE_RDEV(mddev,rdev,tmp) { | 1519 | rdev_for_each(rdev, tmp, mddev) { |
1507 | if (!rdev->mddev) { | 1520 | if (!rdev->mddev) { |
1508 | MD_BUG(); | 1521 | MD_BUG(); |
1509 | continue; | 1522 | continue; |
@@ -1581,17 +1594,17 @@ static void md_print_devices(void) | |||
1581 | printk("md: **********************************\n"); | 1594 | printk("md: **********************************\n"); |
1582 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); | 1595 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); |
1583 | printk("md: **********************************\n"); | 1596 | printk("md: **********************************\n"); |
1584 | ITERATE_MDDEV(mddev,tmp) { | 1597 | for_each_mddev(mddev, tmp) { |
1585 | 1598 | ||
1586 | if (mddev->bitmap) | 1599 | if (mddev->bitmap) |
1587 | bitmap_print_sb(mddev->bitmap); | 1600 | bitmap_print_sb(mddev->bitmap); |
1588 | else | 1601 | else |
1589 | printk("%s: ", mdname(mddev)); | 1602 | printk("%s: ", mdname(mddev)); |
1590 | ITERATE_RDEV(mddev,rdev,tmp2) | 1603 | rdev_for_each(rdev, tmp2, mddev) |
1591 | printk("<%s>", bdevname(rdev->bdev,b)); | 1604 | printk("<%s>", bdevname(rdev->bdev,b)); |
1592 | printk("\n"); | 1605 | printk("\n"); |
1593 | 1606 | ||
1594 | ITERATE_RDEV(mddev,rdev,tmp2) | 1607 | rdev_for_each(rdev, tmp2, mddev) |
1595 | print_rdev(rdev); | 1608 | print_rdev(rdev); |
1596 | } | 1609 | } |
1597 | printk("md: **********************************\n"); | 1610 | printk("md: **********************************\n"); |
@@ -1610,7 +1623,7 @@ static void sync_sbs(mddev_t * mddev, int nospares) | |||
1610 | mdk_rdev_t *rdev; | 1623 | mdk_rdev_t *rdev; |
1611 | struct list_head *tmp; | 1624 | struct list_head *tmp; |
1612 | 1625 | ||
1613 | ITERATE_RDEV(mddev,rdev,tmp) { | 1626 | rdev_for_each(rdev, tmp, mddev) { |
1614 | if (rdev->sb_events == mddev->events || | 1627 | if (rdev->sb_events == mddev->events || |
1615 | (nospares && | 1628 | (nospares && |
1616 | rdev->raid_disk < 0 && | 1629 | rdev->raid_disk < 0 && |
@@ -1696,18 +1709,20 @@ repeat: | |||
1696 | MD_BUG(); | 1709 | MD_BUG(); |
1697 | mddev->events --; | 1710 | mddev->events --; |
1698 | } | 1711 | } |
1699 | sync_sbs(mddev, nospares); | ||
1700 | 1712 | ||
1701 | /* | 1713 | /* |
1702 | * do not write anything to disk if using | 1714 | * do not write anything to disk if using |
1703 | * nonpersistent superblocks | 1715 | * nonpersistent superblocks |
1704 | */ | 1716 | */ |
1705 | if (!mddev->persistent) { | 1717 | if (!mddev->persistent) { |
1706 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 1718 | if (!mddev->external) |
1719 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
1720 | |||
1707 | spin_unlock_irq(&mddev->write_lock); | 1721 | spin_unlock_irq(&mddev->write_lock); |
1708 | wake_up(&mddev->sb_wait); | 1722 | wake_up(&mddev->sb_wait); |
1709 | return; | 1723 | return; |
1710 | } | 1724 | } |
1725 | sync_sbs(mddev, nospares); | ||
1711 | spin_unlock_irq(&mddev->write_lock); | 1726 | spin_unlock_irq(&mddev->write_lock); |
1712 | 1727 | ||
1713 | dprintk(KERN_INFO | 1728 | dprintk(KERN_INFO |
@@ -1715,7 +1730,7 @@ repeat: | |||
1715 | mdname(mddev),mddev->in_sync); | 1730 | mdname(mddev),mddev->in_sync); |
1716 | 1731 | ||
1717 | bitmap_update_sb(mddev->bitmap); | 1732 | bitmap_update_sb(mddev->bitmap); |
1718 | ITERATE_RDEV(mddev,rdev,tmp) { | 1733 | rdev_for_each(rdev, tmp, mddev) { |
1719 | char b[BDEVNAME_SIZE]; | 1734 | char b[BDEVNAME_SIZE]; |
1720 | dprintk(KERN_INFO "md: "); | 1735 | dprintk(KERN_INFO "md: "); |
1721 | if (rdev->sb_loaded != 1) | 1736 | if (rdev->sb_loaded != 1) |
@@ -1785,7 +1800,7 @@ static ssize_t | |||
1785 | state_show(mdk_rdev_t *rdev, char *page) | 1800 | state_show(mdk_rdev_t *rdev, char *page) |
1786 | { | 1801 | { |
1787 | char *sep = ""; | 1802 | char *sep = ""; |
1788 | int len=0; | 1803 | size_t len = 0; |
1789 | 1804 | ||
1790 | if (test_bit(Faulty, &rdev->flags)) { | 1805 | if (test_bit(Faulty, &rdev->flags)) { |
1791 | len+= sprintf(page+len, "%sfaulty",sep); | 1806 | len+= sprintf(page+len, "%sfaulty",sep); |
@@ -1887,20 +1902,45 @@ static ssize_t | |||
1887 | slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 1902 | slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
1888 | { | 1903 | { |
1889 | char *e; | 1904 | char *e; |
1905 | int err; | ||
1906 | char nm[20]; | ||
1890 | int slot = simple_strtoul(buf, &e, 10); | 1907 | int slot = simple_strtoul(buf, &e, 10); |
1891 | if (strncmp(buf, "none", 4)==0) | 1908 | if (strncmp(buf, "none", 4)==0) |
1892 | slot = -1; | 1909 | slot = -1; |
1893 | else if (e==buf || (*e && *e!= '\n')) | 1910 | else if (e==buf || (*e && *e!= '\n')) |
1894 | return -EINVAL; | 1911 | return -EINVAL; |
1895 | if (rdev->mddev->pers) | 1912 | if (rdev->mddev->pers) { |
1896 | /* Cannot set slot in active array (yet) */ | 1913 | /* Setting 'slot' on an active array requires also |
1897 | return -EBUSY; | 1914 | * updating the 'rd%d' link, and communicating |
1898 | if (slot >= rdev->mddev->raid_disks) | 1915 | * with the personality with ->hot_*_disk. |
1899 | return -ENOSPC; | 1916 | * For now we only support removing |
1900 | rdev->raid_disk = slot; | 1917 | * failed/spare devices. This normally happens automatically, |
1901 | /* assume it is working */ | 1918 | * but not when the metadata is externally managed. |
1902 | rdev->flags = 0; | 1919 | */ |
1903 | set_bit(In_sync, &rdev->flags); | 1920 | if (slot != -1) |
1921 | return -EBUSY; | ||
1922 | if (rdev->raid_disk == -1) | ||
1923 | return -EEXIST; | ||
1924 | /* personality does all needed checks */ | ||
1925 | if (rdev->mddev->pers->hot_add_disk == NULL) | ||
1926 | return -EINVAL; | ||
1927 | err = rdev->mddev->pers-> | ||
1928 | hot_remove_disk(rdev->mddev, rdev->raid_disk); | ||
1929 | if (err) | ||
1930 | return err; | ||
1931 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
1932 | sysfs_remove_link(&rdev->mddev->kobj, nm); | ||
1933 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | ||
1934 | md_wakeup_thread(rdev->mddev->thread); | ||
1935 | } else { | ||
1936 | if (slot >= rdev->mddev->raid_disks) | ||
1937 | return -ENOSPC; | ||
1938 | rdev->raid_disk = slot; | ||
1939 | /* assume it is working */ | ||
1940 | clear_bit(Faulty, &rdev->flags); | ||
1941 | clear_bit(WriteMostly, &rdev->flags); | ||
1942 | set_bit(In_sync, &rdev->flags); | ||
1943 | } | ||
1904 | return len; | 1944 | return len; |
1905 | } | 1945 | } |
1906 | 1946 | ||
@@ -1923,6 +1963,10 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1923 | return -EINVAL; | 1963 | return -EINVAL; |
1924 | if (rdev->mddev->pers) | 1964 | if (rdev->mddev->pers) |
1925 | return -EBUSY; | 1965 | return -EBUSY; |
1966 | if (rdev->size && rdev->mddev->external) | ||
1967 | /* Must set offset before size, so overlap checks | ||
1968 | * can be sane */ | ||
1969 | return -EBUSY; | ||
1926 | rdev->data_offset = offset; | 1970 | rdev->data_offset = offset; |
1927 | return len; | 1971 | return len; |
1928 | } | 1972 | } |
@@ -1936,16 +1980,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *page) | |||
1936 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); | 1980 | return sprintf(page, "%llu\n", (unsigned long long)rdev->size); |
1937 | } | 1981 | } |
1938 | 1982 | ||
1983 | static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | ||
1984 | { | ||
1985 | /* check if two start/length pairs overlap */ | ||
1986 | if (s1+l1 <= s2) | ||
1987 | return 0; | ||
1988 | if (s2+l2 <= s1) | ||
1989 | return 0; | ||
1990 | return 1; | ||
1991 | } | ||
1992 | |||
1939 | static ssize_t | 1993 | static ssize_t |
1940 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 1994 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
1941 | { | 1995 | { |
1942 | char *e; | 1996 | char *e; |
1943 | unsigned long long size = simple_strtoull(buf, &e, 10); | 1997 | unsigned long long size = simple_strtoull(buf, &e, 10); |
1998 | unsigned long long oldsize = rdev->size; | ||
1944 | if (e==buf || (*e && *e != '\n')) | 1999 | if (e==buf || (*e && *e != '\n')) |
1945 | return -EINVAL; | 2000 | return -EINVAL; |
1946 | if (rdev->mddev->pers) | 2001 | if (rdev->mddev->pers) |
1947 | return -EBUSY; | 2002 | return -EBUSY; |
1948 | rdev->size = size; | 2003 | rdev->size = size; |
2004 | if (size > oldsize && rdev->mddev->external) { | ||
2005 | /* need to check that all other rdevs with the same ->bdev | ||
2006 | * do not overlap. We need to unlock the mddev to avoid | ||
2007 | * a deadlock. We have already changed rdev->size, and if | ||
2008 | * we have to change it back, we will have the lock again. | ||
2009 | */ | ||
2010 | mddev_t *mddev; | ||
2011 | int overlap = 0; | ||
2012 | struct list_head *tmp, *tmp2; | ||
2013 | |||
2014 | mddev_unlock(rdev->mddev); | ||
2015 | for_each_mddev(mddev, tmp) { | ||
2016 | mdk_rdev_t *rdev2; | ||
2017 | |||
2018 | mddev_lock(mddev); | ||
2019 | rdev_for_each(rdev2, tmp2, mddev) | ||
2020 | if (test_bit(AllReserved, &rdev2->flags) || | ||
2021 | (rdev->bdev == rdev2->bdev && | ||
2022 | rdev != rdev2 && | ||
2023 | overlaps(rdev->data_offset, rdev->size, | ||
2024 | rdev2->data_offset, rdev2->size))) { | ||
2025 | overlap = 1; | ||
2026 | break; | ||
2027 | } | ||
2028 | mddev_unlock(mddev); | ||
2029 | if (overlap) { | ||
2030 | mddev_put(mddev); | ||
2031 | break; | ||
2032 | } | ||
2033 | } | ||
2034 | mddev_lock(rdev->mddev); | ||
2035 | if (overlap) { | ||
2036 | /* Someone else could have slipped in a size | ||
2037 | * change here, but doing so is just silly. | ||
2038 | * We put oldsize back because we *know* it is | ||
2039 | * safe, and trust userspace not to race with | ||
2040 | * itself | ||
2041 | */ | ||
2042 | rdev->size = oldsize; | ||
2043 | return -EBUSY; | ||
2044 | } | ||
2045 | } | ||
1949 | if (size < rdev->mddev->size || rdev->mddev->size == 0) | 2046 | if (size < rdev->mddev->size || rdev->mddev->size == 0) |
1950 | rdev->mddev->size = size; | 2047 | rdev->mddev->size = size; |
1951 | return len; | 2048 | return len; |
@@ -1980,12 +2077,18 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, | |||
1980 | { | 2077 | { |
1981 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | 2078 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); |
1982 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | 2079 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); |
2080 | int rv; | ||
1983 | 2081 | ||
1984 | if (!entry->store) | 2082 | if (!entry->store) |
1985 | return -EIO; | 2083 | return -EIO; |
1986 | if (!capable(CAP_SYS_ADMIN)) | 2084 | if (!capable(CAP_SYS_ADMIN)) |
1987 | return -EACCES; | 2085 | return -EACCES; |
1988 | return entry->store(rdev, page, length); | 2086 | rv = mddev_lock(rdev->mddev); |
2087 | if (!rv) { | ||
2088 | rv = entry->store(rdev, page, length); | ||
2089 | mddev_unlock(rdev->mddev); | ||
2090 | } | ||
2091 | return rv; | ||
1989 | } | 2092 | } |
1990 | 2093 | ||
1991 | static void rdev_free(struct kobject *ko) | 2094 | static void rdev_free(struct kobject *ko) |
@@ -2029,7 +2132,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2029 | if ((err = alloc_disk_sb(rdev))) | 2132 | if ((err = alloc_disk_sb(rdev))) |
2030 | goto abort_free; | 2133 | goto abort_free; |
2031 | 2134 | ||
2032 | err = lock_rdev(rdev, newdev); | 2135 | err = lock_rdev(rdev, newdev, super_format == -2); |
2033 | if (err) | 2136 | if (err) |
2034 | goto abort_free; | 2137 | goto abort_free; |
2035 | 2138 | ||
@@ -2099,7 +2202,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
2099 | char b[BDEVNAME_SIZE]; | 2202 | char b[BDEVNAME_SIZE]; |
2100 | 2203 | ||
2101 | freshest = NULL; | 2204 | freshest = NULL; |
2102 | ITERATE_RDEV(mddev,rdev,tmp) | 2205 | rdev_for_each(rdev, tmp, mddev) |
2103 | switch (super_types[mddev->major_version]. | 2206 | switch (super_types[mddev->major_version]. |
2104 | load_super(rdev, freshest, mddev->minor_version)) { | 2207 | load_super(rdev, freshest, mddev->minor_version)) { |
2105 | case 1: | 2208 | case 1: |
@@ -2120,7 +2223,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
2120 | validate_super(mddev, freshest); | 2223 | validate_super(mddev, freshest); |
2121 | 2224 | ||
2122 | i = 0; | 2225 | i = 0; |
2123 | ITERATE_RDEV(mddev,rdev,tmp) { | 2226 | rdev_for_each(rdev, tmp, mddev) { |
2124 | if (rdev != freshest) | 2227 | if (rdev != freshest) |
2125 | if (super_types[mddev->major_version]. | 2228 | if (super_types[mddev->major_version]. |
2126 | validate_super(mddev, rdev)) { | 2229 | validate_super(mddev, rdev)) { |
@@ -2215,7 +2318,7 @@ level_show(mddev_t *mddev, char *page) | |||
2215 | static ssize_t | 2318 | static ssize_t |
2216 | level_store(mddev_t *mddev, const char *buf, size_t len) | 2319 | level_store(mddev_t *mddev, const char *buf, size_t len) |
2217 | { | 2320 | { |
2218 | int rv = len; | 2321 | ssize_t rv = len; |
2219 | if (mddev->pers) | 2322 | if (mddev->pers) |
2220 | return -EBUSY; | 2323 | return -EBUSY; |
2221 | if (len == 0) | 2324 | if (len == 0) |
@@ -2425,6 +2528,8 @@ array_state_show(mddev_t *mddev, char *page) | |||
2425 | case 0: | 2528 | case 0: |
2426 | if (mddev->in_sync) | 2529 | if (mddev->in_sync) |
2427 | st = clean; | 2530 | st = clean; |
2531 | else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) | ||
2532 | st = write_pending; | ||
2428 | else if (mddev->safemode) | 2533 | else if (mddev->safemode) |
2429 | st = active_idle; | 2534 | st = active_idle; |
2430 | else | 2535 | else |
@@ -2455,11 +2560,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2455 | break; | 2560 | break; |
2456 | case clear: | 2561 | case clear: |
2457 | /* stopping an active array */ | 2562 | /* stopping an active array */ |
2458 | if (mddev->pers) { | 2563 | if (atomic_read(&mddev->active) > 1) |
2459 | if (atomic_read(&mddev->active) > 1) | 2564 | return -EBUSY; |
2460 | return -EBUSY; | 2565 | err = do_md_stop(mddev, 0); |
2461 | err = do_md_stop(mddev, 0); | ||
2462 | } | ||
2463 | break; | 2566 | break; |
2464 | case inactive: | 2567 | case inactive: |
2465 | /* stopping an active array */ | 2568 | /* stopping an active array */ |
@@ -2467,7 +2570,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2467 | if (atomic_read(&mddev->active) > 1) | 2570 | if (atomic_read(&mddev->active) > 1) |
2468 | return -EBUSY; | 2571 | return -EBUSY; |
2469 | err = do_md_stop(mddev, 2); | 2572 | err = do_md_stop(mddev, 2); |
2470 | } | 2573 | } else |
2574 | err = 0; /* already inactive */ | ||
2471 | break; | 2575 | break; |
2472 | case suspended: | 2576 | case suspended: |
2473 | break; /* not supported yet */ | 2577 | break; /* not supported yet */ |
@@ -2495,9 +2599,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2495 | restart_array(mddev); | 2599 | restart_array(mddev); |
2496 | spin_lock_irq(&mddev->write_lock); | 2600 | spin_lock_irq(&mddev->write_lock); |
2497 | if (atomic_read(&mddev->writes_pending) == 0) { | 2601 | if (atomic_read(&mddev->writes_pending) == 0) { |
2498 | mddev->in_sync = 1; | 2602 | if (mddev->in_sync == 0) { |
2499 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2603 | mddev->in_sync = 1; |
2500 | } | 2604 | if (mddev->persistent) |
2605 | set_bit(MD_CHANGE_CLEAN, | ||
2606 | &mddev->flags); | ||
2607 | } | ||
2608 | err = 0; | ||
2609 | } else | ||
2610 | err = -EBUSY; | ||
2501 | spin_unlock_irq(&mddev->write_lock); | 2611 | spin_unlock_irq(&mddev->write_lock); |
2502 | } else { | 2612 | } else { |
2503 | mddev->ro = 0; | 2613 | mddev->ro = 0; |
@@ -2508,7 +2618,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2508 | case active: | 2618 | case active: |
2509 | if (mddev->pers) { | 2619 | if (mddev->pers) { |
2510 | restart_array(mddev); | 2620 | restart_array(mddev); |
2511 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2621 | if (mddev->external) |
2622 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
2512 | wake_up(&mddev->sb_wait); | 2623 | wake_up(&mddev->sb_wait); |
2513 | err = 0; | 2624 | err = 0; |
2514 | } else { | 2625 | } else { |
@@ -2574,7 +2685,9 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len) | |||
2574 | if (err < 0) | 2685 | if (err < 0) |
2575 | goto out; | 2686 | goto out; |
2576 | } | 2687 | } |
2577 | } else | 2688 | } else if (mddev->external) |
2689 | rdev = md_import_device(dev, -2, -1); | ||
2690 | else | ||
2578 | rdev = md_import_device(dev, -1, -1); | 2691 | rdev = md_import_device(dev, -1, -1); |
2579 | 2692 | ||
2580 | if (IS_ERR(rdev)) | 2693 | if (IS_ERR(rdev)) |
@@ -2659,7 +2772,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); | |||
2659 | 2772 | ||
2660 | 2773 | ||
2661 | /* Metdata version. | 2774 | /* Metdata version. |
2662 | * This is either 'none' for arrays with externally managed metadata, | 2775 | * This is one of |
2776 | * 'none' for arrays with no metadata (good luck...) | ||
2777 | * 'external' for arrays with externally managed metadata, | ||
2663 | * or N.M for internally known formats | 2778 | * or N.M for internally known formats |
2664 | */ | 2779 | */ |
2665 | static ssize_t | 2780 | static ssize_t |
@@ -2668,6 +2783,8 @@ metadata_show(mddev_t *mddev, char *page) | |||
2668 | if (mddev->persistent) | 2783 | if (mddev->persistent) |
2669 | return sprintf(page, "%d.%d\n", | 2784 | return sprintf(page, "%d.%d\n", |
2670 | mddev->major_version, mddev->minor_version); | 2785 | mddev->major_version, mddev->minor_version); |
2786 | else if (mddev->external) | ||
2787 | return sprintf(page, "external:%s\n", mddev->metadata_type); | ||
2671 | else | 2788 | else |
2672 | return sprintf(page, "none\n"); | 2789 | return sprintf(page, "none\n"); |
2673 | } | 2790 | } |
@@ -2682,6 +2799,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2682 | 2799 | ||
2683 | if (cmd_match(buf, "none")) { | 2800 | if (cmd_match(buf, "none")) { |
2684 | mddev->persistent = 0; | 2801 | mddev->persistent = 0; |
2802 | mddev->external = 0; | ||
2803 | mddev->major_version = 0; | ||
2804 | mddev->minor_version = 90; | ||
2805 | return len; | ||
2806 | } | ||
2807 | if (strncmp(buf, "external:", 9) == 0) { | ||
2808 | size_t namelen = len-9; | ||
2809 | if (namelen >= sizeof(mddev->metadata_type)) | ||
2810 | namelen = sizeof(mddev->metadata_type)-1; | ||
2811 | strncpy(mddev->metadata_type, buf+9, namelen); | ||
2812 | mddev->metadata_type[namelen] = 0; | ||
2813 | if (namelen && mddev->metadata_type[namelen-1] == '\n') | ||
2814 | mddev->metadata_type[--namelen] = 0; | ||
2815 | mddev->persistent = 0; | ||
2816 | mddev->external = 1; | ||
2685 | mddev->major_version = 0; | 2817 | mddev->major_version = 0; |
2686 | mddev->minor_version = 90; | 2818 | mddev->minor_version = 90; |
2687 | return len; | 2819 | return len; |
@@ -2698,6 +2830,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2698 | mddev->major_version = major; | 2830 | mddev->major_version = major; |
2699 | mddev->minor_version = minor; | 2831 | mddev->minor_version = minor; |
2700 | mddev->persistent = 1; | 2832 | mddev->persistent = 1; |
2833 | mddev->external = 0; | ||
2701 | return len; | 2834 | return len; |
2702 | } | 2835 | } |
2703 | 2836 | ||
@@ -2865,6 +2998,43 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2865 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 2998 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
2866 | 2999 | ||
2867 | static ssize_t | 3000 | static ssize_t |
3001 | max_sync_show(mddev_t *mddev, char *page) | ||
3002 | { | ||
3003 | if (mddev->resync_max == MaxSector) | ||
3004 | return sprintf(page, "max\n"); | ||
3005 | else | ||
3006 | return sprintf(page, "%llu\n", | ||
3007 | (unsigned long long)mddev->resync_max); | ||
3008 | } | ||
3009 | static ssize_t | ||
3010 | max_sync_store(mddev_t *mddev, const char *buf, size_t len) | ||
3011 | { | ||
3012 | if (strncmp(buf, "max", 3) == 0) | ||
3013 | mddev->resync_max = MaxSector; | ||
3014 | else { | ||
3015 | char *ep; | ||
3016 | unsigned long long max = simple_strtoull(buf, &ep, 10); | ||
3017 | if (ep == buf || (*ep != 0 && *ep != '\n')) | ||
3018 | return -EINVAL; | ||
3019 | if (max < mddev->resync_max && | ||
3020 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | ||
3021 | return -EBUSY; | ||
3022 | |||
3023 | /* Must be a multiple of chunk_size */ | ||
3024 | if (mddev->chunk_size) { | ||
3025 | if (max & (sector_t)((mddev->chunk_size>>9)-1)) | ||
3026 | return -EINVAL; | ||
3027 | } | ||
3028 | mddev->resync_max = max; | ||
3029 | } | ||
3030 | wake_up(&mddev->recovery_wait); | ||
3031 | return len; | ||
3032 | } | ||
3033 | |||
3034 | static struct md_sysfs_entry md_max_sync = | ||
3035 | __ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store); | ||
3036 | |||
3037 | static ssize_t | ||
2868 | suspend_lo_show(mddev_t *mddev, char *page) | 3038 | suspend_lo_show(mddev_t *mddev, char *page) |
2869 | { | 3039 | { |
2870 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); | 3040 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); |
@@ -2974,6 +3144,7 @@ static struct attribute *md_redundancy_attrs[] = { | |||
2974 | &md_sync_max.attr, | 3144 | &md_sync_max.attr, |
2975 | &md_sync_speed.attr, | 3145 | &md_sync_speed.attr, |
2976 | &md_sync_completed.attr, | 3146 | &md_sync_completed.attr, |
3147 | &md_max_sync.attr, | ||
2977 | &md_suspend_lo.attr, | 3148 | &md_suspend_lo.attr, |
2978 | &md_suspend_hi.attr, | 3149 | &md_suspend_hi.attr, |
2979 | &md_bitmap.attr, | 3150 | &md_bitmap.attr, |
@@ -3118,8 +3289,11 @@ static int do_md_run(mddev_t * mddev) | |||
3118 | /* | 3289 | /* |
3119 | * Analyze all RAID superblock(s) | 3290 | * Analyze all RAID superblock(s) |
3120 | */ | 3291 | */ |
3121 | if (!mddev->raid_disks) | 3292 | if (!mddev->raid_disks) { |
3293 | if (!mddev->persistent) | ||
3294 | return -EINVAL; | ||
3122 | analyze_sbs(mddev); | 3295 | analyze_sbs(mddev); |
3296 | } | ||
3123 | 3297 | ||
3124 | chunk_size = mddev->chunk_size; | 3298 | chunk_size = mddev->chunk_size; |
3125 | 3299 | ||
@@ -3143,7 +3317,7 @@ static int do_md_run(mddev_t * mddev) | |||
3143 | } | 3317 | } |
3144 | 3318 | ||
3145 | /* devices must have minimum size of one chunk */ | 3319 | /* devices must have minimum size of one chunk */ |
3146 | ITERATE_RDEV(mddev,rdev,tmp) { | 3320 | rdev_for_each(rdev, tmp, mddev) { |
3147 | if (test_bit(Faulty, &rdev->flags)) | 3321 | if (test_bit(Faulty, &rdev->flags)) |
3148 | continue; | 3322 | continue; |
3149 | if (rdev->size < chunk_size / 1024) { | 3323 | if (rdev->size < chunk_size / 1024) { |
@@ -3170,7 +3344,7 @@ static int do_md_run(mddev_t * mddev) | |||
3170 | * the only valid external interface is through the md | 3344 | * the only valid external interface is through the md |
3171 | * device. | 3345 | * device. |
3172 | */ | 3346 | */ |
3173 | ITERATE_RDEV(mddev,rdev,tmp) { | 3347 | rdev_for_each(rdev, tmp, mddev) { |
3174 | if (test_bit(Faulty, &rdev->flags)) | 3348 | if (test_bit(Faulty, &rdev->flags)) |
3175 | continue; | 3349 | continue; |
3176 | sync_blockdev(rdev->bdev); | 3350 | sync_blockdev(rdev->bdev); |
@@ -3236,8 +3410,8 @@ static int do_md_run(mddev_t * mddev) | |||
3236 | mdk_rdev_t *rdev2; | 3410 | mdk_rdev_t *rdev2; |
3237 | struct list_head *tmp2; | 3411 | struct list_head *tmp2; |
3238 | int warned = 0; | 3412 | int warned = 0; |
3239 | ITERATE_RDEV(mddev, rdev, tmp) { | 3413 | rdev_for_each(rdev, tmp, mddev) { |
3240 | ITERATE_RDEV(mddev, rdev2, tmp2) { | 3414 | rdev_for_each(rdev2, tmp2, mddev) { |
3241 | if (rdev < rdev2 && | 3415 | if (rdev < rdev2 && |
3242 | rdev->bdev->bd_contains == | 3416 | rdev->bdev->bd_contains == |
3243 | rdev2->bdev->bd_contains) { | 3417 | rdev2->bdev->bd_contains) { |
@@ -3297,7 +3471,7 @@ static int do_md_run(mddev_t * mddev) | |||
3297 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ | 3471 | mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ |
3298 | mddev->in_sync = 1; | 3472 | mddev->in_sync = 1; |
3299 | 3473 | ||
3300 | ITERATE_RDEV(mddev,rdev,tmp) | 3474 | rdev_for_each(rdev, tmp, mddev) |
3301 | if (rdev->raid_disk >= 0) { | 3475 | if (rdev->raid_disk >= 0) { |
3302 | char nm[20]; | 3476 | char nm[20]; |
3303 | sprintf(nm, "rd%d", rdev->raid_disk); | 3477 | sprintf(nm, "rd%d", rdev->raid_disk); |
@@ -3330,7 +3504,7 @@ static int do_md_run(mddev_t * mddev) | |||
3330 | if (mddev->degraded && !mddev->sync_thread) { | 3504 | if (mddev->degraded && !mddev->sync_thread) { |
3331 | struct list_head *rtmp; | 3505 | struct list_head *rtmp; |
3332 | int spares = 0; | 3506 | int spares = 0; |
3333 | ITERATE_RDEV(mddev,rdev,rtmp) | 3507 | rdev_for_each(rdev, rtmp, mddev) |
3334 | if (rdev->raid_disk >= 0 && | 3508 | if (rdev->raid_disk >= 0 && |
3335 | !test_bit(In_sync, &rdev->flags) && | 3509 | !test_bit(In_sync, &rdev->flags) && |
3336 | !test_bit(Faulty, &rdev->flags)) | 3510 | !test_bit(Faulty, &rdev->flags)) |
@@ -3507,14 +3681,14 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3507 | } | 3681 | } |
3508 | mddev->bitmap_offset = 0; | 3682 | mddev->bitmap_offset = 0; |
3509 | 3683 | ||
3510 | ITERATE_RDEV(mddev,rdev,tmp) | 3684 | rdev_for_each(rdev, tmp, mddev) |
3511 | if (rdev->raid_disk >= 0) { | 3685 | if (rdev->raid_disk >= 0) { |
3512 | char nm[20]; | 3686 | char nm[20]; |
3513 | sprintf(nm, "rd%d", rdev->raid_disk); | 3687 | sprintf(nm, "rd%d", rdev->raid_disk); |
3514 | sysfs_remove_link(&mddev->kobj, nm); | 3688 | sysfs_remove_link(&mddev->kobj, nm); |
3515 | } | 3689 | } |
3516 | 3690 | ||
3517 | /* make sure all delayed_delete calls have finished */ | 3691 | /* make sure all md_delayed_delete calls have finished */ |
3518 | flush_scheduled_work(); | 3692 | flush_scheduled_work(); |
3519 | 3693 | ||
3520 | export_array(mddev); | 3694 | export_array(mddev); |
@@ -3523,7 +3697,10 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3523 | mddev->size = 0; | 3697 | mddev->size = 0; |
3524 | mddev->raid_disks = 0; | 3698 | mddev->raid_disks = 0; |
3525 | mddev->recovery_cp = 0; | 3699 | mddev->recovery_cp = 0; |
3700 | mddev->resync_max = MaxSector; | ||
3526 | mddev->reshape_position = MaxSector; | 3701 | mddev->reshape_position = MaxSector; |
3702 | mddev->external = 0; | ||
3703 | mddev->persistent = 0; | ||
3527 | 3704 | ||
3528 | } else if (mddev->pers) | 3705 | } else if (mddev->pers) |
3529 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3706 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -3546,7 +3723,7 @@ static void autorun_array(mddev_t *mddev) | |||
3546 | 3723 | ||
3547 | printk(KERN_INFO "md: running: "); | 3724 | printk(KERN_INFO "md: running: "); |
3548 | 3725 | ||
3549 | ITERATE_RDEV(mddev,rdev,tmp) { | 3726 | rdev_for_each(rdev, tmp, mddev) { |
3550 | char b[BDEVNAME_SIZE]; | 3727 | char b[BDEVNAME_SIZE]; |
3551 | printk("<%s>", bdevname(rdev->bdev,b)); | 3728 | printk("<%s>", bdevname(rdev->bdev,b)); |
3552 | } | 3729 | } |
@@ -3589,7 +3766,7 @@ static void autorun_devices(int part) | |||
3589 | printk(KERN_INFO "md: considering %s ...\n", | 3766 | printk(KERN_INFO "md: considering %s ...\n", |
3590 | bdevname(rdev0->bdev,b)); | 3767 | bdevname(rdev0->bdev,b)); |
3591 | INIT_LIST_HEAD(&candidates); | 3768 | INIT_LIST_HEAD(&candidates); |
3592 | ITERATE_RDEV_PENDING(rdev,tmp) | 3769 | rdev_for_each_list(rdev, tmp, pending_raid_disks) |
3593 | if (super_90_load(rdev, rdev0, 0) >= 0) { | 3770 | if (super_90_load(rdev, rdev0, 0) >= 0) { |
3594 | printk(KERN_INFO "md: adding %s ...\n", | 3771 | printk(KERN_INFO "md: adding %s ...\n", |
3595 | bdevname(rdev->bdev,b)); | 3772 | bdevname(rdev->bdev,b)); |
@@ -3632,7 +3809,8 @@ static void autorun_devices(int part) | |||
3632 | mddev_unlock(mddev); | 3809 | mddev_unlock(mddev); |
3633 | } else { | 3810 | } else { |
3634 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); | 3811 | printk(KERN_INFO "md: created %s\n", mdname(mddev)); |
3635 | ITERATE_RDEV_GENERIC(candidates,rdev,tmp) { | 3812 | mddev->persistent = 1; |
3813 | rdev_for_each_list(rdev, tmp, candidates) { | ||
3636 | list_del_init(&rdev->same_set); | 3814 | list_del_init(&rdev->same_set); |
3637 | if (bind_rdev_to_array(rdev, mddev)) | 3815 | if (bind_rdev_to_array(rdev, mddev)) |
3638 | export_rdev(rdev); | 3816 | export_rdev(rdev); |
@@ -3643,7 +3821,7 @@ static void autorun_devices(int part) | |||
3643 | /* on success, candidates will be empty, on error | 3821 | /* on success, candidates will be empty, on error |
3644 | * it won't... | 3822 | * it won't... |
3645 | */ | 3823 | */ |
3646 | ITERATE_RDEV_GENERIC(candidates,rdev,tmp) | 3824 | rdev_for_each_list(rdev, tmp, candidates) |
3647 | export_rdev(rdev); | 3825 | export_rdev(rdev); |
3648 | mddev_put(mddev); | 3826 | mddev_put(mddev); |
3649 | } | 3827 | } |
@@ -3673,7 +3851,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
3673 | struct list_head *tmp; | 3851 | struct list_head *tmp; |
3674 | 3852 | ||
3675 | nr=working=active=failed=spare=0; | 3853 | nr=working=active=failed=spare=0; |
3676 | ITERATE_RDEV(mddev,rdev,tmp) { | 3854 | rdev_for_each(rdev, tmp, mddev) { |
3677 | nr++; | 3855 | nr++; |
3678 | if (test_bit(Faulty, &rdev->flags)) | 3856 | if (test_bit(Faulty, &rdev->flags)) |
3679 | failed++; | 3857 | failed++; |
@@ -3919,8 +4097,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
3919 | else | 4097 | else |
3920 | rdev->raid_disk = -1; | 4098 | rdev->raid_disk = -1; |
3921 | 4099 | ||
3922 | rdev->flags = 0; | ||
3923 | |||
3924 | if (rdev->raid_disk < mddev->raid_disks) | 4100 | if (rdev->raid_disk < mddev->raid_disks) |
3925 | if (info->state & (1<<MD_DISK_SYNC)) | 4101 | if (info->state & (1<<MD_DISK_SYNC)) |
3926 | set_bit(In_sync, &rdev->flags); | 4102 | set_bit(In_sync, &rdev->flags); |
@@ -4165,13 +4341,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4165 | else | 4341 | else |
4166 | mddev->recovery_cp = 0; | 4342 | mddev->recovery_cp = 0; |
4167 | mddev->persistent = ! info->not_persistent; | 4343 | mddev->persistent = ! info->not_persistent; |
4344 | mddev->external = 0; | ||
4168 | 4345 | ||
4169 | mddev->layout = info->layout; | 4346 | mddev->layout = info->layout; |
4170 | mddev->chunk_size = info->chunk_size; | 4347 | mddev->chunk_size = info->chunk_size; |
4171 | 4348 | ||
4172 | mddev->max_disks = MD_SB_DISKS; | 4349 | mddev->max_disks = MD_SB_DISKS; |
4173 | 4350 | ||
4174 | mddev->flags = 0; | 4351 | if (mddev->persistent) |
4352 | mddev->flags = 0; | ||
4175 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4353 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
4176 | 4354 | ||
4177 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 4355 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
@@ -4213,7 +4391,7 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
4213 | */ | 4391 | */ |
4214 | if (mddev->sync_thread) | 4392 | if (mddev->sync_thread) |
4215 | return -EBUSY; | 4393 | return -EBUSY; |
4216 | ITERATE_RDEV(mddev,rdev,tmp) { | 4394 | rdev_for_each(rdev, tmp, mddev) { |
4217 | sector_t avail; | 4395 | sector_t avail; |
4218 | avail = rdev->size * 2; | 4396 | avail = rdev->size * 2; |
4219 | 4397 | ||
@@ -4471,9 +4649,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4471 | */ | 4649 | */ |
4472 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, | 4650 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, |
4473 | * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */ | 4651 | * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */ |
4474 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY | 4652 | if ((!mddev->raid_disks && !mddev->external) |
4475 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE | 4653 | && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY |
4476 | && cmd != GET_BITMAP_FILE) { | 4654 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE |
4655 | && cmd != GET_BITMAP_FILE) { | ||
4477 | err = -ENODEV; | 4656 | err = -ENODEV; |
4478 | goto abort_unlock; | 4657 | goto abort_unlock; |
4479 | } | 4658 | } |
@@ -4757,7 +4936,7 @@ static void status_unused(struct seq_file *seq) | |||
4757 | 4936 | ||
4758 | seq_printf(seq, "unused devices: "); | 4937 | seq_printf(seq, "unused devices: "); |
4759 | 4938 | ||
4760 | ITERATE_RDEV_PENDING(rdev,tmp) { | 4939 | rdev_for_each_list(rdev, tmp, pending_raid_disks) { |
4761 | char b[BDEVNAME_SIZE]; | 4940 | char b[BDEVNAME_SIZE]; |
4762 | i++; | 4941 | i++; |
4763 | seq_printf(seq, "%s ", | 4942 | seq_printf(seq, "%s ", |
@@ -4953,7 +5132,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
4953 | } | 5132 | } |
4954 | 5133 | ||
4955 | size = 0; | 5134 | size = 0; |
4956 | ITERATE_RDEV(mddev,rdev,tmp2) { | 5135 | rdev_for_each(rdev, tmp2, mddev) { |
4957 | char b[BDEVNAME_SIZE]; | 5136 | char b[BDEVNAME_SIZE]; |
4958 | seq_printf(seq, " %s[%d]", | 5137 | seq_printf(seq, " %s[%d]", |
4959 | bdevname(rdev->bdev,b), rdev->desc_nr); | 5138 | bdevname(rdev->bdev,b), rdev->desc_nr); |
@@ -4982,7 +5161,10 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
4982 | mddev->major_version, | 5161 | mddev->major_version, |
4983 | mddev->minor_version); | 5162 | mddev->minor_version); |
4984 | } | 5163 | } |
4985 | } else | 5164 | } else if (mddev->external) |
5165 | seq_printf(seq, " super external:%s", | ||
5166 | mddev->metadata_type); | ||
5167 | else | ||
4986 | seq_printf(seq, " super non-persistent"); | 5168 | seq_printf(seq, " super non-persistent"); |
4987 | 5169 | ||
4988 | if (mddev->pers) { | 5170 | if (mddev->pers) { |
@@ -5106,7 +5288,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5106 | long curr_events; | 5288 | long curr_events; |
5107 | 5289 | ||
5108 | idle = 1; | 5290 | idle = 1; |
5109 | ITERATE_RDEV(mddev,rdev,tmp) { | 5291 | rdev_for_each(rdev, tmp, mddev) { |
5110 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 5292 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5111 | curr_events = disk_stat_read(disk, sectors[0]) + | 5293 | curr_events = disk_stat_read(disk, sectors[0]) + |
5112 | disk_stat_read(disk, sectors[1]) - | 5294 | disk_stat_read(disk, sectors[1]) - |
@@ -5283,7 +5465,7 @@ void md_do_sync(mddev_t *mddev) | |||
5283 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5465 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5284 | goto skip; | 5466 | goto skip; |
5285 | } | 5467 | } |
5286 | ITERATE_MDDEV(mddev2,tmp) { | 5468 | for_each_mddev(mddev2, tmp) { |
5287 | if (mddev2 == mddev) | 5469 | if (mddev2 == mddev) |
5288 | continue; | 5470 | continue; |
5289 | if (mddev2->curr_resync && | 5471 | if (mddev2->curr_resync && |
@@ -5333,7 +5515,7 @@ void md_do_sync(mddev_t *mddev) | |||
5333 | /* recovery follows the physical size of devices */ | 5515 | /* recovery follows the physical size of devices */ |
5334 | max_sectors = mddev->size << 1; | 5516 | max_sectors = mddev->size << 1; |
5335 | j = MaxSector; | 5517 | j = MaxSector; |
5336 | ITERATE_RDEV(mddev,rdev,rtmp) | 5518 | rdev_for_each(rdev, rtmp, mddev) |
5337 | if (rdev->raid_disk >= 0 && | 5519 | if (rdev->raid_disk >= 0 && |
5338 | !test_bit(Faulty, &rdev->flags) && | 5520 | !test_bit(Faulty, &rdev->flags) && |
5339 | !test_bit(In_sync, &rdev->flags) && | 5521 | !test_bit(In_sync, &rdev->flags) && |
@@ -5381,8 +5563,16 @@ void md_do_sync(mddev_t *mddev) | |||
5381 | sector_t sectors; | 5563 | sector_t sectors; |
5382 | 5564 | ||
5383 | skipped = 0; | 5565 | skipped = 0; |
5566 | if (j >= mddev->resync_max) { | ||
5567 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5568 | wait_event(mddev->recovery_wait, | ||
5569 | mddev->resync_max > j | ||
5570 | || kthread_should_stop()); | ||
5571 | } | ||
5572 | if (kthread_should_stop()) | ||
5573 | goto interrupted; | ||
5384 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 5574 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
5385 | currspeed < speed_min(mddev)); | 5575 | currspeed < speed_min(mddev)); |
5386 | if (sectors == 0) { | 5576 | if (sectors == 0) { |
5387 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 5577 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
5388 | goto out; | 5578 | goto out; |
@@ -5424,15 +5614,9 @@ void md_do_sync(mddev_t *mddev) | |||
5424 | } | 5614 | } |
5425 | 5615 | ||
5426 | 5616 | ||
5427 | if (kthread_should_stop()) { | 5617 | if (kthread_should_stop()) |
5428 | /* | 5618 | goto interrupted; |
5429 | * got a signal, exit. | 5619 | |
5430 | */ | ||
5431 | printk(KERN_INFO | ||
5432 | "md: md_do_sync() got signal ... exiting\n"); | ||
5433 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5434 | goto out; | ||
5435 | } | ||
5436 | 5620 | ||
5437 | /* | 5621 | /* |
5438 | * this loop exits only if either when we are slower than | 5622 | * this loop exits only if either when we are slower than |
@@ -5484,7 +5668,7 @@ void md_do_sync(mddev_t *mddev) | |||
5484 | } else { | 5668 | } else { |
5485 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 5669 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
5486 | mddev->curr_resync = MaxSector; | 5670 | mddev->curr_resync = MaxSector; |
5487 | ITERATE_RDEV(mddev,rdev,rtmp) | 5671 | rdev_for_each(rdev, rtmp, mddev) |
5488 | if (rdev->raid_disk >= 0 && | 5672 | if (rdev->raid_disk >= 0 && |
5489 | !test_bit(Faulty, &rdev->flags) && | 5673 | !test_bit(Faulty, &rdev->flags) && |
5490 | !test_bit(In_sync, &rdev->flags) && | 5674 | !test_bit(In_sync, &rdev->flags) && |
@@ -5496,9 +5680,22 @@ void md_do_sync(mddev_t *mddev) | |||
5496 | 5680 | ||
5497 | skip: | 5681 | skip: |
5498 | mddev->curr_resync = 0; | 5682 | mddev->curr_resync = 0; |
5683 | mddev->resync_max = MaxSector; | ||
5684 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5499 | wake_up(&resync_wait); | 5685 | wake_up(&resync_wait); |
5500 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5686 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5501 | md_wakeup_thread(mddev->thread); | 5687 | md_wakeup_thread(mddev->thread); |
5688 | return; | ||
5689 | |||
5690 | interrupted: | ||
5691 | /* | ||
5692 | * got a signal, exit. | ||
5693 | */ | ||
5694 | printk(KERN_INFO | ||
5695 | "md: md_do_sync() got signal ... exiting\n"); | ||
5696 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5697 | goto out; | ||
5698 | |||
5502 | } | 5699 | } |
5503 | EXPORT_SYMBOL_GPL(md_do_sync); | 5700 | EXPORT_SYMBOL_GPL(md_do_sync); |
5504 | 5701 | ||
@@ -5509,8 +5706,9 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5509 | struct list_head *rtmp; | 5706 | struct list_head *rtmp; |
5510 | int spares = 0; | 5707 | int spares = 0; |
5511 | 5708 | ||
5512 | ITERATE_RDEV(mddev,rdev,rtmp) | 5709 | rdev_for_each(rdev, rtmp, mddev) |
5513 | if (rdev->raid_disk >= 0 && | 5710 | if (rdev->raid_disk >= 0 && |
5711 | !mddev->external && | ||
5514 | (test_bit(Faulty, &rdev->flags) || | 5712 | (test_bit(Faulty, &rdev->flags) || |
5515 | ! test_bit(In_sync, &rdev->flags)) && | 5713 | ! test_bit(In_sync, &rdev->flags)) && |
5516 | atomic_read(&rdev->nr_pending)==0) { | 5714 | atomic_read(&rdev->nr_pending)==0) { |
@@ -5524,7 +5722,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5524 | } | 5722 | } |
5525 | 5723 | ||
5526 | if (mddev->degraded) { | 5724 | if (mddev->degraded) { |
5527 | ITERATE_RDEV(mddev,rdev,rtmp) | 5725 | rdev_for_each(rdev, rtmp, mddev) |
5528 | if (rdev->raid_disk < 0 | 5726 | if (rdev->raid_disk < 0 |
5529 | && !test_bit(Faulty, &rdev->flags)) { | 5727 | && !test_bit(Faulty, &rdev->flags)) { |
5530 | rdev->recovery_offset = 0; | 5728 | rdev->recovery_offset = 0; |
@@ -5589,7 +5787,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5589 | } | 5787 | } |
5590 | 5788 | ||
5591 | if ( ! ( | 5789 | if ( ! ( |
5592 | mddev->flags || | 5790 | (mddev->flags && !mddev->external) || |
5593 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 5791 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
5594 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || | 5792 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
5595 | (mddev->safemode == 1) || | 5793 | (mddev->safemode == 1) || |
@@ -5605,7 +5803,8 @@ void md_check_recovery(mddev_t *mddev) | |||
5605 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5803 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
5606 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5804 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
5607 | mddev->in_sync = 1; | 5805 | mddev->in_sync = 1; |
5608 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 5806 | if (mddev->persistent) |
5807 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
5609 | } | 5808 | } |
5610 | if (mddev->safemode == 1) | 5809 | if (mddev->safemode == 1) |
5611 | mddev->safemode = 0; | 5810 | mddev->safemode = 0; |
@@ -5637,7 +5836,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5637 | * information must be scrapped | 5836 | * information must be scrapped |
5638 | */ | 5837 | */ |
5639 | if (!mddev->degraded) | 5838 | if (!mddev->degraded) |
5640 | ITERATE_RDEV(mddev,rdev,rtmp) | 5839 | rdev_for_each(rdev, rtmp, mddev) |
5641 | rdev->saved_raid_disk = -1; | 5840 | rdev->saved_raid_disk = -1; |
5642 | 5841 | ||
5643 | mddev->recovery = 0; | 5842 | mddev->recovery = 0; |
@@ -5714,7 +5913,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
5714 | 5913 | ||
5715 | printk(KERN_INFO "md: stopping all md devices.\n"); | 5914 | printk(KERN_INFO "md: stopping all md devices.\n"); |
5716 | 5915 | ||
5717 | ITERATE_MDDEV(mddev,tmp) | 5916 | for_each_mddev(mddev, tmp) |
5718 | if (mddev_trylock(mddev)) { | 5917 | if (mddev_trylock(mddev)) { |
5719 | do_md_stop (mddev, 1); | 5918 | do_md_stop (mddev, 1); |
5720 | mddev_unlock(mddev); | 5919 | mddev_unlock(mddev); |
@@ -5848,7 +6047,7 @@ static __exit void md_exit(void) | |||
5848 | unregister_reboot_notifier(&md_notifier); | 6047 | unregister_reboot_notifier(&md_notifier); |
5849 | unregister_sysctl_table(raid_table_header); | 6048 | unregister_sysctl_table(raid_table_header); |
5850 | remove_proc_entry("mdstat", NULL); | 6049 | remove_proc_entry("mdstat", NULL); |
5851 | ITERATE_MDDEV(mddev,tmp) { | 6050 | for_each_mddev(mddev, tmp) { |
5852 | struct gendisk *disk = mddev->gendisk; | 6051 | struct gendisk *disk = mddev->gendisk; |
5853 | if (!disk) | 6052 | if (!disk) |
5854 | continue; | 6053 | continue; |
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c index adef299908cf..b61d5767aae7 100644 --- a/drivers/md/mktables.c +++ b/drivers/md/mktables.c | |||
@@ -1,13 +1,10 @@ | |||
1 | #ident "$Id: mktables.c,v 1.2 2002/12/12 22:41:27 hpa Exp $" | 1 | /* -*- linux-c -*- ------------------------------------------------------- * |
2 | /* ----------------------------------------------------------------------- * | ||
3 | * | 2 | * |
4 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved |
5 | * | 4 | * |
6 | * This program is free software; you can redistribute it and/or modify | 5 | * This file is part of the Linux kernel, and is made available under |
7 | * it under the terms of the GNU General Public License as published by | 6 | * the terms of the GNU General Public License version 2 or (at your |
8 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * option) any later version; incorporated herein by reference. |
9 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | ||
10 | * (at your option) any later version; incorporated herein by reference. | ||
11 | * | 8 | * |
12 | * ----------------------------------------------------------------------- */ | 9 | * ----------------------------------------------------------------------- */ |
13 | 10 | ||
@@ -26,100 +23,98 @@ | |||
26 | 23 | ||
27 | static uint8_t gfmul(uint8_t a, uint8_t b) | 24 | static uint8_t gfmul(uint8_t a, uint8_t b) |
28 | { | 25 | { |
29 | uint8_t v = 0; | 26 | uint8_t v = 0; |
30 | 27 | ||
31 | while ( b ) { | 28 | while (b) { |
32 | if ( b & 1 ) v ^= a; | 29 | if (b & 1) |
33 | a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); | 30 | v ^= a; |
34 | b >>= 1; | 31 | a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); |
35 | } | 32 | b >>= 1; |
36 | return v; | 33 | } |
34 | |||
35 | return v; | ||
37 | } | 36 | } |
38 | 37 | ||
39 | static uint8_t gfpow(uint8_t a, int b) | 38 | static uint8_t gfpow(uint8_t a, int b) |
40 | { | 39 | { |
41 | uint8_t v = 1; | 40 | uint8_t v = 1; |
42 | 41 | ||
43 | b %= 255; | 42 | b %= 255; |
44 | if ( b < 0 ) | 43 | if (b < 0) |
45 | b += 255; | 44 | b += 255; |
46 | 45 | ||
47 | while ( b ) { | 46 | while (b) { |
48 | if ( b & 1 ) v = gfmul(v,a); | 47 | if (b & 1) |
49 | a = gfmul(a,a); | 48 | v = gfmul(v, a); |
50 | b >>= 1; | 49 | a = gfmul(a, a); |
51 | } | 50 | b >>= 1; |
52 | return v; | 51 | } |
52 | |||
53 | return v; | ||
53 | } | 54 | } |
54 | 55 | ||
55 | int main(int argc, char *argv[]) | 56 | int main(int argc, char *argv[]) |
56 | { | 57 | { |
57 | int i, j, k; | 58 | int i, j, k; |
58 | uint8_t v; | 59 | uint8_t v; |
59 | uint8_t exptbl[256], invtbl[256]; | 60 | uint8_t exptbl[256], invtbl[256]; |
60 | 61 | ||
61 | printf("#include \"raid6.h\"\n"); | 62 | printf("#include \"raid6.h\"\n"); |
62 | 63 | ||
63 | /* Compute multiplication table */ | 64 | /* Compute multiplication table */ |
64 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 65 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
65 | "raid6_gfmul[256][256] =\n" | 66 | "raid6_gfmul[256][256] =\n" |
66 | "{\n"); | 67 | "{\n"); |
67 | for ( i = 0 ; i < 256 ; i++ ) { | 68 | for (i = 0; i < 256; i++) { |
68 | printf("\t{\n"); | 69 | printf("\t{\n"); |
69 | for ( j = 0 ; j < 256 ; j += 8 ) { | 70 | for (j = 0; j < 256; j += 8) { |
70 | printf("\t\t"); | 71 | printf("\t\t"); |
71 | for ( k = 0 ; k < 8 ; k++ ) { | 72 | for (k = 0; k < 8; k++) |
72 | printf("0x%02x, ", gfmul(i,j+k)); | 73 | printf("0x%02x,%c", gfmul(i, j + k), |
73 | } | 74 | (k == 7) ? '\n' : ' '); |
74 | printf("\n"); | 75 | } |
75 | } | 76 | printf("\t},\n"); |
76 | printf("\t},\n"); | 77 | } |
77 | } | 78 | printf("};\n"); |
78 | printf("};\n"); | 79 | |
79 | 80 | /* Compute power-of-2 table (exponent) */ | |
80 | /* Compute power-of-2 table (exponent) */ | 81 | v = 1; |
81 | v = 1; | 82 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
82 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 83 | "raid6_gfexp[256] =\n" "{\n"); |
83 | "raid6_gfexp[256] =\n" | 84 | for (i = 0; i < 256; i += 8) { |
84 | "{\n"); | 85 | printf("\t"); |
85 | for ( i = 0 ; i < 256 ; i += 8 ) { | 86 | for (j = 0; j < 8; j++) { |
86 | printf("\t"); | 87 | exptbl[i + j] = v; |
87 | for ( j = 0 ; j < 8 ; j++ ) { | 88 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); |
88 | exptbl[i+j] = v; | 89 | v = gfmul(v, 2); |
89 | printf("0x%02x, ", v); | 90 | if (v == 1) |
90 | v = gfmul(v,2); | 91 | v = 0; /* For entry 255, not a real entry */ |
91 | if ( v == 1 ) v = 0; /* For entry 255, not a real entry */ | 92 | } |
92 | } | 93 | } |
93 | printf("\n"); | 94 | printf("};\n"); |
94 | } | 95 | |
95 | printf("};\n"); | 96 | /* Compute inverse table x^-1 == x^254 */ |
96 | 97 | printf("\nconst u8 __attribute__((aligned(256)))\n" | |
97 | /* Compute inverse table x^-1 == x^254 */ | 98 | "raid6_gfinv[256] =\n" "{\n"); |
98 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 99 | for (i = 0; i < 256; i += 8) { |
99 | "raid6_gfinv[256] =\n" | 100 | printf("\t"); |
100 | "{\n"); | 101 | for (j = 0; j < 8; j++) { |
101 | for ( i = 0 ; i < 256 ; i += 8 ) { | 102 | invtbl[i + j] = v = gfpow(i + j, 254); |
102 | printf("\t"); | 103 | printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); |
103 | for ( j = 0 ; j < 8 ; j++ ) { | 104 | } |
104 | invtbl[i+j] = v = gfpow(i+j,254); | 105 | } |
105 | printf("0x%02x, ", v); | 106 | printf("};\n"); |
106 | } | 107 | |
107 | printf("\n"); | 108 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ |
108 | } | 109 | printf("\nconst u8 __attribute__((aligned(256)))\n" |
109 | printf("};\n"); | 110 | "raid6_gfexi[256] =\n" "{\n"); |
110 | 111 | for (i = 0; i < 256; i += 8) { | |
111 | /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ | 112 | printf("\t"); |
112 | printf("\nconst u8 __attribute__((aligned(256)))\n" | 113 | for (j = 0; j < 8; j++) |
113 | "raid6_gfexi[256] =\n" | 114 | printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], |
114 | "{\n"); | 115 | (j == 7) ? '\n' : ' '); |
115 | for ( i = 0 ; i < 256 ; i += 8 ) { | 116 | } |
116 | printf("\t"); | 117 | printf("};\n"); |
117 | for ( j = 0 ; j < 8 ; j++ ) { | 118 | |
118 | printf("0x%02x, ", invtbl[exptbl[i+j]^1]); | 119 | return 0; |
119 | } | ||
120 | printf("\n"); | ||
121 | } | ||
122 | printf("};\n\n"); | ||
123 | |||
124 | return 0; | ||
125 | } | 120 | } |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index eb631ebed686..3f299d835a2b 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -436,7 +436,7 @@ static int multipath_run (mddev_t *mddev) | |||
436 | } | 436 | } |
437 | 437 | ||
438 | conf->working_disks = 0; | 438 | conf->working_disks = 0; |
439 | ITERATE_RDEV(mddev,rdev,tmp) { | 439 | rdev_for_each(rdev, tmp, mddev) { |
440 | disk_idx = rdev->raid_disk; | 440 | disk_idx = rdev->raid_disk; |
441 | if (disk_idx < 0 || | 441 | if (disk_idx < 0 || |
442 | disk_idx >= mddev->raid_disks) | 442 | disk_idx >= mddev->raid_disks) |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f8e591708d1f..818b48284096 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -72,11 +72,11 @@ static int create_strip_zones (mddev_t *mddev) | |||
72 | */ | 72 | */ |
73 | conf->nr_strip_zones = 0; | 73 | conf->nr_strip_zones = 0; |
74 | 74 | ||
75 | ITERATE_RDEV(mddev,rdev1,tmp1) { | 75 | rdev_for_each(rdev1, tmp1, mddev) { |
76 | printk("raid0: looking at %s\n", | 76 | printk("raid0: looking at %s\n", |
77 | bdevname(rdev1->bdev,b)); | 77 | bdevname(rdev1->bdev,b)); |
78 | c = 0; | 78 | c = 0; |
79 | ITERATE_RDEV(mddev,rdev2,tmp2) { | 79 | rdev_for_each(rdev2, tmp2, mddev) { |
80 | printk("raid0: comparing %s(%llu)", | 80 | printk("raid0: comparing %s(%llu)", |
81 | bdevname(rdev1->bdev,b), | 81 | bdevname(rdev1->bdev,b), |
82 | (unsigned long long)rdev1->size); | 82 | (unsigned long long)rdev1->size); |
@@ -124,7 +124,7 @@ static int create_strip_zones (mddev_t *mddev) | |||
124 | cnt = 0; | 124 | cnt = 0; |
125 | smallest = NULL; | 125 | smallest = NULL; |
126 | zone->dev = conf->devlist; | 126 | zone->dev = conf->devlist; |
127 | ITERATE_RDEV(mddev, rdev1, tmp1) { | 127 | rdev_for_each(rdev1, tmp1, mddev) { |
128 | int j = rdev1->raid_disk; | 128 | int j = rdev1->raid_disk; |
129 | 129 | ||
130 | if (j < 0 || j >= mddev->raid_disks) { | 130 | if (j < 0 || j >= mddev->raid_disks) { |
@@ -293,7 +293,7 @@ static int raid0_run (mddev_t *mddev) | |||
293 | 293 | ||
294 | /* calculate array device size */ | 294 | /* calculate array device size */ |
295 | mddev->array_size = 0; | 295 | mddev->array_size = 0; |
296 | ITERATE_RDEV(mddev,rdev,tmp) | 296 | rdev_for_each(rdev, tmp, mddev) |
297 | mddev->array_size += rdev->size; | 297 | mddev->array_size += rdev->size; |
298 | 298 | ||
299 | printk("raid0 : md_size is %llu blocks.\n", | 299 | printk("raid0 : md_size is %llu blocks.\n", |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4a69c416e045..5c7fef091cec 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1684,6 +1684,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1684 | if (!go_faster && conf->nr_waiting) | 1684 | if (!go_faster && conf->nr_waiting) |
1685 | msleep_interruptible(1000); | 1685 | msleep_interruptible(1000); |
1686 | 1686 | ||
1687 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | ||
1687 | raise_barrier(conf); | 1688 | raise_barrier(conf); |
1688 | 1689 | ||
1689 | conf->next_resync = sector_nr; | 1690 | conf->next_resync = sector_nr; |
@@ -1766,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1766 | return rv; | 1767 | return rv; |
1767 | } | 1768 | } |
1768 | 1769 | ||
1770 | if (max_sector > mddev->resync_max) | ||
1771 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ | ||
1769 | nr_sectors = 0; | 1772 | nr_sectors = 0; |
1770 | sync_blocks = 0; | 1773 | sync_blocks = 0; |
1771 | do { | 1774 | do { |
@@ -1884,7 +1887,7 @@ static int run(mddev_t *mddev) | |||
1884 | if (!conf->r1bio_pool) | 1887 | if (!conf->r1bio_pool) |
1885 | goto out_no_mem; | 1888 | goto out_no_mem; |
1886 | 1889 | ||
1887 | ITERATE_RDEV(mddev, rdev, tmp) { | 1890 | rdev_for_each(rdev, tmp, mddev) { |
1888 | disk_idx = rdev->raid_disk; | 1891 | disk_idx = rdev->raid_disk; |
1889 | if (disk_idx >= mddev->raid_disks | 1892 | if (disk_idx >= mddev->raid_disks |
1890 | || disk_idx < 0) | 1893 | || disk_idx < 0) |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5cdcc9386200..017f58113c33 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1657 | return (max_sector - sector_nr) + sectors_skipped; | 1657 | return (max_sector - sector_nr) + sectors_skipped; |
1658 | } | 1658 | } |
1659 | 1659 | ||
1660 | if (max_sector > mddev->resync_max) | ||
1661 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ | ||
1662 | |||
1660 | /* make sure whole request will fit in a chunk - if chunks | 1663 | /* make sure whole request will fit in a chunk - if chunks |
1661 | * are meaningful | 1664 | * are meaningful |
1662 | */ | 1665 | */ |
@@ -1670,6 +1673,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1670 | if (!go_faster && conf->nr_waiting) | 1673 | if (!go_faster && conf->nr_waiting) |
1671 | msleep_interruptible(1000); | 1674 | msleep_interruptible(1000); |
1672 | 1675 | ||
1676 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | ||
1677 | |||
1673 | /* Again, very different code for resync and recovery. | 1678 | /* Again, very different code for resync and recovery. |
1674 | * Both must result in an r10bio with a list of bios that | 1679 | * Both must result in an r10bio with a list of bios that |
1675 | * have bi_end_io, bi_sector, bi_bdev set, | 1680 | * have bi_end_io, bi_sector, bi_bdev set, |
@@ -2021,7 +2026,7 @@ static int run(mddev_t *mddev) | |||
2021 | goto out_free_conf; | 2026 | goto out_free_conf; |
2022 | } | 2027 | } |
2023 | 2028 | ||
2024 | ITERATE_RDEV(mddev, rdev, tmp) { | 2029 | rdev_for_each(rdev, tmp, mddev) { |
2025 | disk_idx = rdev->raid_disk; | 2030 | disk_idx = rdev->raid_disk; |
2026 | if (disk_idx >= mddev->raid_disks | 2031 | if (disk_idx >= mddev->raid_disks |
2027 | || disk_idx < 0) | 2032 | || disk_idx < 0) |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e8c8157b02fc..2d6f1a51359c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3159,7 +3159,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3159 | atomic_inc(&conf->preread_active_stripes); | 3159 | atomic_inc(&conf->preread_active_stripes); |
3160 | list_add_tail(&sh->lru, &conf->handle_list); | 3160 | list_add_tail(&sh->lru, &conf->handle_list); |
3161 | } | 3161 | } |
3162 | } | 3162 | } else |
3163 | blk_plug_device(conf->mddev->queue); | ||
3163 | } | 3164 | } |
3164 | 3165 | ||
3165 | static void activate_bit_delay(raid5_conf_t *conf) | 3166 | static void activate_bit_delay(raid5_conf_t *conf) |
@@ -3549,7 +3550,8 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3549 | goto retry; | 3550 | goto retry; |
3550 | } | 3551 | } |
3551 | finish_wait(&conf->wait_for_overlap, &w); | 3552 | finish_wait(&conf->wait_for_overlap, &w); |
3552 | handle_stripe(sh, NULL); | 3553 | set_bit(STRIPE_HANDLE, &sh->state); |
3554 | clear_bit(STRIPE_DELAYED, &sh->state); | ||
3553 | release_stripe(sh); | 3555 | release_stripe(sh); |
3554 | } else { | 3556 | } else { |
3555 | /* cannot get stripe for read-ahead, just give-up */ | 3557 | /* cannot get stripe for read-ahead, just give-up */ |
@@ -3698,6 +3700,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3698 | release_stripe(sh); | 3700 | release_stripe(sh); |
3699 | first_sector += STRIPE_SECTORS; | 3701 | first_sector += STRIPE_SECTORS; |
3700 | } | 3702 | } |
3703 | /* If this takes us to the resync_max point where we have to pause, | ||
3704 | * then we need to write out the superblock. | ||
3705 | */ | ||
3706 | sector_nr += conf->chunk_size>>9; | ||
3707 | if (sector_nr >= mddev->resync_max) { | ||
3708 | /* Cannot proceed until we've updated the superblock... */ | ||
3709 | wait_event(conf->wait_for_overlap, | ||
3710 | atomic_read(&conf->reshape_stripes) == 0); | ||
3711 | mddev->reshape_position = conf->expand_progress; | ||
3712 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
3713 | md_wakeup_thread(mddev->thread); | ||
3714 | wait_event(mddev->sb_wait, | ||
3715 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) | ||
3716 | || kthread_should_stop()); | ||
3717 | spin_lock_irq(&conf->device_lock); | ||
3718 | conf->expand_lo = mddev->reshape_position; | ||
3719 | spin_unlock_irq(&conf->device_lock); | ||
3720 | wake_up(&conf->wait_for_overlap); | ||
3721 | } | ||
3701 | return conf->chunk_size>>9; | 3722 | return conf->chunk_size>>9; |
3702 | } | 3723 | } |
3703 | 3724 | ||
@@ -3734,6 +3755,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3734 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 3755 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
3735 | return reshape_request(mddev, sector_nr, skipped); | 3756 | return reshape_request(mddev, sector_nr, skipped); |
3736 | 3757 | ||
3758 | /* No need to check resync_max as we never do more than one | ||
3759 | * stripe, and as resync_max will always be on a chunk boundary, | ||
3760 | * if the check in md_do_sync didn't fire, there is no chance | ||
3761 | * of overstepping resync_max here | ||
3762 | */ | ||
3763 | |||
3737 | /* if there is too many failed drives and we are trying | 3764 | /* if there is too many failed drives and we are trying |
3738 | * to resync, then assert that we are finished, because there is | 3765 | * to resync, then assert that we are finished, because there is |
3739 | * nothing we can do. | 3766 | * nothing we can do. |
@@ -3753,6 +3780,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3753 | return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ | 3780 | return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ |
3754 | } | 3781 | } |
3755 | 3782 | ||
3783 | |||
3784 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | ||
3785 | |||
3756 | pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks); | 3786 | pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks); |
3757 | sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1); | 3787 | sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1); |
3758 | if (sh == NULL) { | 3788 | if (sh == NULL) { |
@@ -3864,7 +3894,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3864 | * During the scan, completed stripes are saved for us by the interrupt | 3894 | * During the scan, completed stripes are saved for us by the interrupt |
3865 | * handler, so that they will not have to wait for our next wakeup. | 3895 | * handler, so that they will not have to wait for our next wakeup. |
3866 | */ | 3896 | */ |
3867 | static void raid5d (mddev_t *mddev) | 3897 | static void raid5d(mddev_t *mddev) |
3868 | { | 3898 | { |
3869 | struct stripe_head *sh; | 3899 | struct stripe_head *sh; |
3870 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3900 | raid5_conf_t *conf = mddev_to_conf(mddev); |
@@ -3889,12 +3919,6 @@ static void raid5d (mddev_t *mddev) | |||
3889 | activate_bit_delay(conf); | 3919 | activate_bit_delay(conf); |
3890 | } | 3920 | } |
3891 | 3921 | ||
3892 | if (list_empty(&conf->handle_list) && | ||
3893 | atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && | ||
3894 | !blk_queue_plugged(mddev->queue) && | ||
3895 | !list_empty(&conf->delayed_list)) | ||
3896 | raid5_activate_delayed(conf); | ||
3897 | |||
3898 | while ((bio = remove_bio_from_retry(conf))) { | 3922 | while ((bio = remove_bio_from_retry(conf))) { |
3899 | int ok; | 3923 | int ok; |
3900 | spin_unlock_irq(&conf->device_lock); | 3924 | spin_unlock_irq(&conf->device_lock); |
@@ -4108,7 +4132,7 @@ static int run(mddev_t *mddev) | |||
4108 | 4132 | ||
4109 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); | 4133 | pr_debug("raid5: run(%s) called.\n", mdname(mddev)); |
4110 | 4134 | ||
4111 | ITERATE_RDEV(mddev,rdev,tmp) { | 4135 | rdev_for_each(rdev, tmp, mddev) { |
4112 | raid_disk = rdev->raid_disk; | 4136 | raid_disk = rdev->raid_disk; |
4113 | if (raid_disk >= conf->raid_disks | 4137 | if (raid_disk >= conf->raid_disks |
4114 | || raid_disk < 0) | 4138 | || raid_disk < 0) |
@@ -4521,7 +4545,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4521 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 4545 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
4522 | return -EBUSY; | 4546 | return -EBUSY; |
4523 | 4547 | ||
4524 | ITERATE_RDEV(mddev, rdev, rtmp) | 4548 | rdev_for_each(rdev, rtmp, mddev) |
4525 | if (rdev->raid_disk < 0 && | 4549 | if (rdev->raid_disk < 0 && |
4526 | !test_bit(Faulty, &rdev->flags)) | 4550 | !test_bit(Faulty, &rdev->flags)) |
4527 | spares++; | 4551 | spares++; |
@@ -4543,7 +4567,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4543 | /* Add some new drives, as many as will fit. | 4567 | /* Add some new drives, as many as will fit. |
4544 | * We know there are enough to make the newly sized array work. | 4568 | * We know there are enough to make the newly sized array work. |
4545 | */ | 4569 | */ |
4546 | ITERATE_RDEV(mddev, rdev, rtmp) | 4570 | rdev_for_each(rdev, rtmp, mddev) |
4547 | if (rdev->raid_disk < 0 && | 4571 | if (rdev->raid_disk < 0 && |
4548 | !test_bit(Faulty, &rdev->flags)) { | 4572 | !test_bit(Faulty, &rdev->flags)) { |
4549 | if (raid5_add_disk(mddev, rdev)) { | 4573 | if (raid5_add_disk(mddev, rdev)) { |
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c index 0d5cd57accd7..559cc41b2585 100644 --- a/drivers/md/raid6test/test.c +++ b/drivers/md/raid6test/test.c | |||
@@ -1,12 +1,10 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | 1 | /* -*- linux-c -*- ------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2002 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This file is part of the Linux kernel, and is made available under |
6 | * it under the terms of the GNU General Public License as published by | 6 | * the terms of the GNU General Public License version 2 or (at your |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | 7 | * option) any later version; incorporated herein by reference. |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | ||
9 | * (at your option) any later version; incorporated herein by reference. | ||
10 | * | 8 | * |
11 | * ----------------------------------------------------------------------- */ | 9 | * ----------------------------------------------------------------------- */ |
12 | 10 | ||
@@ -30,67 +28,87 @@ char *dataptrs[NDISKS]; | |||
30 | char data[NDISKS][PAGE_SIZE]; | 28 | char data[NDISKS][PAGE_SIZE]; |
31 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | 29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; |
32 | 30 | ||
33 | void makedata(void) | 31 | static void makedata(void) |
34 | { | 32 | { |
35 | int i, j; | 33 | int i, j; |
36 | 34 | ||
37 | for ( i = 0 ; i < NDISKS ; i++ ) { | 35 | for (i = 0; i < NDISKS; i++) { |
38 | for ( j = 0 ; j < PAGE_SIZE ; j++ ) { | 36 | for (j = 0; j < PAGE_SIZE; j++) |
39 | data[i][j] = rand(); | 37 | data[i][j] = rand(); |
40 | } | 38 | |
41 | dataptrs[i] = data[i]; | 39 | dataptrs[i] = data[i]; |
42 | } | 40 | } |
43 | } | 41 | } |
44 | 42 | ||
43 | static char disk_type(int d) | ||
44 | { | ||
45 | switch (d) { | ||
46 | case NDISKS-2: | ||
47 | return 'P'; | ||
48 | case NDISKS-1: | ||
49 | return 'Q'; | ||
50 | default: | ||
51 | return 'D'; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | static int test_disks(int i, int j) | ||
56 | { | ||
57 | int erra, errb; | ||
58 | |||
59 | memset(recovi, 0xf0, PAGE_SIZE); | ||
60 | memset(recovj, 0xba, PAGE_SIZE); | ||
61 | |||
62 | dataptrs[i] = recovi; | ||
63 | dataptrs[j] = recovj; | ||
64 | |||
65 | raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | ||
66 | |||
67 | erra = memcmp(data[i], recovi, PAGE_SIZE); | ||
68 | errb = memcmp(data[j], recovj, PAGE_SIZE); | ||
69 | |||
70 | if (i < NDISKS-2 && j == NDISKS-1) { | ||
71 | /* We don't implement the DQ failure scenario, since it's | ||
72 | equivalent to a RAID-5 failure (XOR, then recompute Q) */ | ||
73 | erra = errb = 0; | ||
74 | } else { | ||
75 | printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | ||
76 | raid6_call.name, | ||
77 | i, disk_type(i), | ||
78 | j, disk_type(j), | ||
79 | (!erra && !errb) ? "OK" : | ||
80 | !erra ? "ERRB" : | ||
81 | !errb ? "ERRA" : "ERRAB"); | ||
82 | } | ||
83 | |||
84 | dataptrs[i] = data[i]; | ||
85 | dataptrs[j] = data[j]; | ||
86 | |||
87 | return erra || errb; | ||
88 | } | ||
89 | |||
45 | int main(int argc, char *argv[]) | 90 | int main(int argc, char *argv[]) |
46 | { | 91 | { |
47 | const struct raid6_calls * const * algo; | 92 | const struct raid6_calls *const *algo; |
48 | int i, j; | 93 | int i, j; |
49 | int erra, errb; | 94 | int err = 0; |
50 | 95 | ||
51 | makedata(); | 96 | makedata(); |
52 | 97 | ||
53 | for ( algo = raid6_algos ; *algo ; algo++ ) { | 98 | for (algo = raid6_algos; *algo; algo++) { |
54 | if ( !(*algo)->valid || (*algo)->valid() ) { | 99 | if (!(*algo)->valid || (*algo)->valid()) { |
55 | raid6_call = **algo; | 100 | raid6_call = **algo; |
56 | 101 | ||
57 | /* Nuke syndromes */ | 102 | /* Nuke syndromes */ |
58 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); | 103 | memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); |
59 | 104 | ||
60 | /* Generate assumed good syndrome */ | 105 | /* Generate assumed good syndrome */ |
61 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, (void **)&dataptrs); | 106 | raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, |
62 | 107 | (void **)&dataptrs); | |
63 | for ( i = 0 ; i < NDISKS-1 ; i++ ) { | 108 | |
64 | for ( j = i+1 ; j < NDISKS ; j++ ) { | 109 | for (i = 0; i < NDISKS-1; i++) |
65 | memset(recovi, 0xf0, PAGE_SIZE); | 110 | for (j = i+1; j < NDISKS; j++) |
66 | memset(recovj, 0xba, PAGE_SIZE); | 111 | err += test_disks(i, j); |
67 | |||
68 | dataptrs[i] = recovi; | ||
69 | dataptrs[j] = recovj; | ||
70 | |||
71 | raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); | ||
72 | |||
73 | erra = memcmp(data[i], recovi, PAGE_SIZE); | ||
74 | errb = memcmp(data[j], recovj, PAGE_SIZE); | ||
75 | |||
76 | if ( i < NDISKS-2 && j == NDISKS-1 ) { | ||
77 | /* We don't implement the DQ failure scenario, since it's | ||
78 | equivalent to a RAID-5 failure (XOR, then recompute Q) */ | ||
79 | } else { | ||
80 | printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | ||
81 | raid6_call.name, | ||
82 | i, (i==NDISKS-2)?'P':'D', | ||
83 | j, (j==NDISKS-1)?'Q':(j==NDISKS-2)?'P':'D', | ||
84 | (!erra && !errb) ? "OK" : | ||
85 | !erra ? "ERRB" : | ||
86 | !errb ? "ERRA" : | ||
87 | "ERRAB"); | ||
88 | } | ||
89 | |||
90 | dataptrs[i] = data[i]; | ||
91 | dataptrs[j] = data[j]; | ||
92 | } | ||
93 | } | ||
94 | } | 112 | } |
95 | printf("\n"); | 113 | printf("\n"); |
96 | } | 114 | } |
@@ -99,5 +117,8 @@ int main(int argc, char *argv[]) | |||
99 | /* Pick the best algorithm test */ | 117 | /* Pick the best algorithm test */ |
100 | raid6_select_algo(); | 118 | raid6_select_algo(); |
101 | 119 | ||
102 | return 0; | 120 | if (err) |
121 | printf("\n*** ERRORS FOUND ***\n"); | ||
122 | |||
123 | return err; | ||
103 | } | 124 | } |